modify scripts

This commit is contained in:
oscarz
2025-06-20 08:38:18 +08:00
parent 767858f7a4
commit 3a3bc1f5fd
2 changed files with 10 additions and 15 deletions

View File

@ -36,7 +36,7 @@ class RateLimitFilter(logging.Filter):
if elapsed < 60: # 60 秒内 if elapsed < 60: # 60 秒内
log_count[message_key] += 1 log_count[message_key] += 1
if log_count[message_key] > self.LOG_LIMIT: if log_count[message_key] > self.LOG_LIMIT:
print('reach limit.') print('reach limit.\n')
return False # 直接丢弃 return False # 直接丢弃
else: else:
log_count[message_key] = 1 # 超过 60 秒,重新计数 log_count[message_key] = 1 # 超过 60 秒,重新计数

View File

@ -29,8 +29,8 @@ def fetch_list(start_p=1):
total_results = [] total_results = []
# 备份已有文件 # 备份已有文件
utils.backup_existing_file(target_csv) utils.backup_existing_file(target_csv)
while True:
url = f"https://u001.25img.com/?p={p}" url = f"https://u001.25img.com/?p={p}"
while url:
logging.info(f"fetching url {url}") logging.info(f"fetching url {url}")
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class")) soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class"))
if soup: if soup:
@ -42,15 +42,14 @@ def fetch_list(start_p=1):
if total_pages: if total_pages:
if p >= total_pages: if p >= total_pages:
url = None url = None
break
else: else:
p += 1 p += 1
url = f"https://u001.25img.com/?p={p}"
if p % 10 == 0 : if p % 10 == 0 :
#utils.write_to_csv(total_results, target_csv)
lines = utils.append_to_csv(total_results, target_csv) lines = utils.append_to_csv(total_results, target_csv)
total_results.clear() # 清空缓冲区
if lines: if lines:
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}") logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
total_results.clear() # 清空缓冲区
time.sleep(1) time.sleep(1)
else: else:
logging.warning(f"fetch_list failed. url: {url} ") logging.warning(f"fetch_list failed. url: {url} ")
@ -59,17 +58,14 @@ def fetch_list(start_p=1):
else: else:
logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}') logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}')
if not url:
break
if debug: if debug:
break break
# 写入csv文件 # 写入csv文件
lines = utils.write_to_csv(total_results, target_csv) lines = utils.append_to_csv(total_results, target_csv)
total_results.clear() total_results.clear()
if lines: if lines:
logging.info(f"write to file succ. file: {target_csv}. total lines: {lines}") logging.info(f"write to csv file succ. file: {target_csv}. total lines: {lines}")
logging.info(f"fetch list finished. total pages: {p}") logging.info(f"fetch list finished. total pages: {p}")
@ -143,11 +139,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
url = next_url url = next_url
cnt += 1 cnt += 1
if cnt % 10 == 0 : if cnt % 10 == 0 :
#utils.write_to_csv(total_results, target_csv_sis)
lines = utils.append_to_csv(total_results, target_csv_sis) lines = utils.append_to_csv(total_results, target_csv_sis)
total_results.clear()
if lines: if lines:
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}") logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
total_results.clear()
time.sleep(1) time.sleep(1)
else: else:
logging.warning(f"fetch_list failed. url: {url} ") logging.warning(f"fetch_list failed. url: {url} ")
@ -160,10 +155,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
break break
# 写入csv文件 # 写入csv文件
lines = utils.write_to_csv(total_results, target_csv_sis) lines = utils.append_to_csv(total_results, target_csv_sis)
total_results.clear() total_results.clear()
if lines: if lines:
logging.info(f"write to file succ. file: {target_csv_sis}, total lines: {lines}") logging.info(f"write to csv file succ. file: {target_csv_sis}, total lines: {lines}")
logging.info(f"fetch list finished. total pages: {cnt}") logging.info(f"fetch list finished. total pages: {cnt}")
def fetch_sis_all(): def fetch_sis_all():