diff --git a/u9a9/src/config.py b/u9a9/src/config.py index b318d06..6ac9e3b 100644 --- a/u9a9/src/config.py +++ b/u9a9/src/config.py @@ -36,7 +36,7 @@ class RateLimitFilter(logging.Filter): if elapsed < 60: # 60 秒内 log_count[message_key] += 1 if log_count[message_key] > self.LOG_LIMIT: - print('reach limit.') + print('reach limit.\n') return False # 直接丢弃 else: log_count[message_key] = 1 # 超过 60 秒,重新计数 diff --git a/u9a9/src/fetch.py b/u9a9/src/fetch.py index 318d848..94a92cb 100644 --- a/u9a9/src/fetch.py +++ b/u9a9/src/fetch.py @@ -29,8 +29,8 @@ def fetch_list(start_p=1): total_results = [] # 备份已有文件 utils.backup_existing_file(target_csv) - while True: - url = f"https://u001.25img.com/?p={p}" + url = f"https://u001.25img.com/?p={p}" + while url: logging.info(f"fetching url {url}") soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class")) if soup: @@ -42,15 +42,14 @@ def fetch_list(start_p=1): if total_pages: if p >= total_pages: url = None - break else: p += 1 + url = f"https://u001.25img.com/?p={p}" if p % 10 == 0 : - #utils.write_to_csv(total_results, target_csv) lines = utils.append_to_csv(total_results, target_csv) + total_results.clear() # 清空缓冲区 if lines: logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}") - total_results.clear() # 清空缓冲区 time.sleep(1) else: logging.warning(f"fetch_list failed. url: {url} ") @@ -59,17 +58,14 @@ def fetch_list(start_p=1): else: logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}') - if not url: - break - if debug: break # 写入csv文件 - lines = utils.write_to_csv(total_results, target_csv) + lines = utils.append_to_csv(total_results, target_csv) total_results.clear() if lines: - logging.info(f"write to file succ. file: {target_csv}. total lines: {lines}") + logging.info(f"write to csv file succ. file: {target_csv}. total lines: {lines}") logging.info(f"fetch list finished. total pages: {p}") @@ -143,11 +139,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_ url = next_url cnt += 1 if cnt % 10 == 0 : - #utils.write_to_csv(total_results, target_csv_sis) lines = utils.append_to_csv(total_results, target_csv_sis) + total_results.clear() if lines: logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}") - total_results.clear() time.sleep(1) else: logging.warning(f"fetch_list failed. url: {url} ") @@ -160,10 +155,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_ break # 写入csv文件 - lines = utils.write_to_csv(total_results, target_csv_sis) + lines = utils.append_to_csv(total_results, target_csv_sis) total_results.clear() if lines: - logging.info(f"write to file succ. file: {target_csv_sis}, total lines: {lines}") + logging.info(f"write to csv file succ. file: {target_csv_sis}, total lines: {lines}") logging.info(f"fetch list finished. total pages: {cnt}") def fetch_sis_all():