modify scripts
This commit is contained in:
@ -274,7 +274,8 @@ def fetch_performers_detail_once(perfomers_list):
|
|||||||
logging.warning(f'601 page(wrong url). id: {performer_id}, name: {person}, url: {url}, Skiping...')
|
logging.warning(f'601 page(wrong url). id: {performer_id}, name: {person}, url: {url}, Skiping...')
|
||||||
else:
|
else:
|
||||||
logging.warning(f'fetch_page error. person: ({person}), url: {url}')
|
logging.warning(f'fetch_page error. person: ({person}), url: {url}')
|
||||||
time.sleep(1)
|
if status_code != 99: # 从网站上获取的数据,需要控制频率
|
||||||
|
time.sleep(0.5)
|
||||||
return last_performer_id
|
return last_performer_id
|
||||||
|
|
||||||
# 更新演员信息
|
# 更新演员信息
|
||||||
@ -357,7 +358,8 @@ def fetch_movies_detail():
|
|||||||
logging.warning(f'601 page(wrong url). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
logging.warning(f'601 page(wrong url). id: {movie_id}, title: ({title}), url: {url}, Skiping...')
|
||||||
else:
|
else:
|
||||||
logging.warning(f'fetch_page error. url: {url}')
|
logging.warning(f'fetch_page error. url: {url}')
|
||||||
time.sleep(1)
|
if status_code != 99: # 从网站上获取的数据,需要控制频率
|
||||||
|
time.sleep(0.5)
|
||||||
logging.info(f'total request: {len(movies_list)}, succ: {succ_count}. last movie id: {last_movie_id}')
|
logging.info(f'total request: {len(movies_list)}, succ: {succ_count}. last movie id: {last_movie_id}')
|
||||||
# 调试增加break
|
# 调试增加break
|
||||||
if debug:
|
if debug:
|
||||||
|
|||||||
@ -49,7 +49,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor
|
|||||||
|
|
||||||
soup = BeautifulSoup(html_text, parser)
|
soup = BeautifulSoup(html_text, parser)
|
||||||
if validator(soup): # 进行自定义页面检查
|
if validator(soup): # 进行自定义页面检查
|
||||||
return soup, 200
|
return soup, 99 # 返回一个小于100的错误码,表明是从本地返回的
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user