modify scripts
This commit is contained in:
@ -246,13 +246,13 @@ def fetch_performers_detail():
|
||||
limit_count = 5 if debug else 100
|
||||
performers_list = []
|
||||
last_performer_id = 0
|
||||
abnormal_codes = [scraper.http_code_404, scraper.http_code_login]
|
||||
abnormal_codes = [craw.http_code_404, craw.http_code_redirect]
|
||||
|
||||
def get_performers(**kwargs):
|
||||
if scan_mode == 1:
|
||||
kwargs["from_actor_list"] = 1
|
||||
kwargs["uncensored"] = 1
|
||||
elif scan_mode == 0:
|
||||
kwargs["from_actor_list"] = 0
|
||||
kwargs["uncensored"] = 0
|
||||
else:
|
||||
logging.debug(f"scan all records")
|
||||
kwargs["order_by"] = 'id asc'
|
||||
@ -278,29 +278,29 @@ def fetch_performers_detail():
|
||||
for performer in performers_list:
|
||||
url = performer['href']
|
||||
person = performer['name']
|
||||
pic = ''
|
||||
alias = []
|
||||
uncensored = int(performer['uncensored'])
|
||||
avatar = None
|
||||
|
||||
next_url = url
|
||||
all_movies = []
|
||||
need_insert = True
|
||||
while next_url:
|
||||
logging.debug(f"Fetching data for actor ({person}), url {next_url} ...")
|
||||
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="span", identifier="actor-section-name", attr_type="class"))
|
||||
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="alert alert-success alert-common", attr_type="class"))
|
||||
if soup:
|
||||
data, next_url = scraper.parse_actor_detail(soup, next_url)
|
||||
if data:
|
||||
pic = data.get('pic', '')
|
||||
alias = data.get('alias', [])
|
||||
if not avatar:
|
||||
avatar = data.get('avatar')
|
||||
all_movies.extend(data.get('movies', []))
|
||||
|
||||
elif status_code and status_code == scraper.http_code_404:
|
||||
actor_id = db_tools.insert_or_update_actor_404(name=person, href=url, is_full_data=scraper.http_code_404)
|
||||
elif status_code and status_code == craw.http_code_404:
|
||||
actor_id = db_tools.update_actor_detail_404({'href': url, 'is_full_data': craw.http_code_404})
|
||||
logging.warning(f'404 page. id: {actor_id}, name: ({person}), url: {url}, Skiping...')
|
||||
need_insert = False
|
||||
break
|
||||
elif status_code and status_code == scraper.http_code_login:
|
||||
actor_id = db_tools.insert_or_update_actor_404(name=person, href=url, is_full_data=scraper.http_code_login)
|
||||
elif status_code and status_code == craw.http_code_redirect:
|
||||
actor_id = db_tools.update_actor_detail_404({'href': url, 'is_full_data': craw.http_code_redirect})
|
||||
logging.warning(f'401 page(need login). id: {actor_id}, name: ({person}), url: {url}, Skiping...')
|
||||
need_insert = False
|
||||
break
|
||||
@ -311,16 +311,20 @@ def fetch_performers_detail():
|
||||
if not need_insert:
|
||||
continue
|
||||
|
||||
#utils.pretty_print_json(avatar)
|
||||
#utils.pretty_print_json(all_movies)
|
||||
#continue
|
||||
|
||||
# 获取完了个人的所有影片,开始插入数据
|
||||
performer_id = db_tools.insert_or_update_actor({
|
||||
performer_id = db_tools.update_actor_detail({
|
||||
'href': url,
|
||||
'name': person,
|
||||
'pic' : pic,
|
||||
'alias' : alias,
|
||||
'credits':all_movies
|
||||
'avatar': avatar,
|
||||
'credits':all_movies,
|
||||
'uncensored':uncensored
|
||||
})
|
||||
if performer_id:
|
||||
logging.debug(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
||||
logging.debug(f'insert/update one person, id: {performer_id}, person: ({person}), url: {url}')
|
||||
last_performer_id = performer_id
|
||||
succ_rows += 1
|
||||
else:
|
||||
@ -334,10 +338,10 @@ def fetch_performers_detail():
|
||||
|
||||
# 更新影片信息
|
||||
def fetch_movies_detail():
|
||||
limit_count = 10 if debug else 100
|
||||
limit_count = 2 if debug else 100
|
||||
movies_list = []
|
||||
last_movie_id = 0
|
||||
abnormal_codes = [scraper.http_code_404, scraper.http_code_login]
|
||||
abnormal_codes = [craw.http_code_404, craw.http_code_redirect]
|
||||
|
||||
def get_movies(**kwargs):
|
||||
if scan_mode == 1:
|
||||
@ -347,7 +351,7 @@ def fetch_movies_detail():
|
||||
else:
|
||||
logging.debug(f"scan all records.")
|
||||
kwargs["order_by"] = 'id asc'
|
||||
return db_tools.query_movie_hrefs(limit=limit_count, **kwargs)
|
||||
return db_tools.query_movies(limit=limit_count, **kwargs)
|
||||
|
||||
while True:
|
||||
if update_mode == 0: # 只遍历新纪录
|
||||
@ -370,10 +374,11 @@ def fetch_movies_detail():
|
||||
url = movie['href']
|
||||
title = movie['title']
|
||||
curr_id = movie['id']
|
||||
uncensored = int(movie['uncensored'])
|
||||
logging.debug(f"Fetching data for movie ({title}), url {url} ...")
|
||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="video-meta-panel", attr_type="class"))
|
||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="container", attr_type="class"))
|
||||
# 从本地读取的文件,忽略
|
||||
if skip_local and status_code == scraper.http_code_local :
|
||||
if skip_local and status_code == craw.http_code_local :
|
||||
last_movie_id = curr_id
|
||||
succ_count += 1
|
||||
continue
|
||||
@ -381,6 +386,9 @@ def fetch_movies_detail():
|
||||
if soup:
|
||||
movie_data = scraper.parse_movie_detail(soup, url, title)
|
||||
if movie_data :
|
||||
#utils.pretty_print_json(movie_data)
|
||||
#continue
|
||||
movie_data['uncensored'] = uncensored
|
||||
movie_id = db_tools.insert_or_update_movie(movie_data)
|
||||
if movie_id:
|
||||
logging.debug(f'insert one movie, id: {movie_id}, title: ({title}) url: {url}')
|
||||
@ -391,11 +399,11 @@ def fetch_movies_detail():
|
||||
else:
|
||||
logging.warning(f'parse_page_movie error. url: {url}')
|
||||
|
||||
elif status_code and status_code == scraper.http_code_404:
|
||||
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=scraper.http_code_404)
|
||||
elif status_code and status_code == craw.http_code_404:
|
||||
movie_id = db_tools.insert_or_update_movie_404({'href': url, 'is_full_data': craw.http_code_404})
|
||||
logging.warning(f'404 page. id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||
elif status_code and status_code == scraper.http_code_login:
|
||||
movie_id = db_tools.insert_or_update_movie_404(title=title, href=url, is_full_data=scraper.http_code_login)
|
||||
elif status_code and status_code == craw.http_code_redirect:
|
||||
movie_id = db_tools.insert_or_update_movie_404({'href': url, 'is_full_data': craw.http_code_redirect})
|
||||
logging.warning(f'401 page(need login). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||
else:
|
||||
logging.warning(f'fetch_page error. url: {url}')
|
||||
|
||||
Reference in New Issue
Block a user