diff --git a/iafd/src/fetch.py b/iafd/src/fetch.py index f356b94..88943af 100644 --- a/iafd/src/fetch.py +++ b/iafd/src/fetch.py @@ -274,7 +274,8 @@ def fetch_performers_detail_once(perfomers_list): logging.warning(f'601 page(wrong url). id: {performer_id}, name: {person}, url: {url}, Skiping...') else: logging.warning(f'fetch_page error. person: ({person}), url: {url}') - time.sleep(1) + if status_code != 99: # 从网站上获取的数据,需要控制频率 + time.sleep(0.5) return last_performer_id # 更新演员信息 @@ -357,7 +358,8 @@ def fetch_movies_detail(): logging.warning(f'601 page(wrong url). id: {movie_id}, title: ({title}), url: {url}, Skiping...') else: logging.warning(f'fetch_page error. url: {url}') - time.sleep(1) + if status_code != 99: # 从网站上获取的数据,需要控制频率 + time.sleep(0.5) logging.info(f'total request: {len(movies_list)}, succ: {succ_count}. last movie id: {last_movie_id}') # 调试增加break if debug: diff --git a/iafd/src/iafd_scraper.py b/iafd/src/iafd_scraper.py index 501dbdf..ef7a778 100644 --- a/iafd/src/iafd_scraper.py +++ b/iafd/src/iafd_scraper.py @@ -49,7 +49,7 @@ def fetch_page(url, validator, max_retries=3, parser="html.parser", preprocessor soup = BeautifulSoup(html_text, parser) if validator(soup): # 进行自定义页面检查 - return soup, 200 + return soup, 99 # 返回一个小于100的错误码,表明是从本地返回的 for attempt in range(max_retries): try: