modify scripts

This commit is contained in:
oscarz
2025-06-20 08:38:18 +08:00
parent 767858f7a4
commit 3a3bc1f5fd
2 changed files with 10 additions and 15 deletions

View File

@ -36,7 +36,7 @@ class RateLimitFilter(logging.Filter):
if elapsed < 60: # 60 秒内
log_count[message_key] += 1
if log_count[message_key] > self.LOG_LIMIT:
print('reach limit.')
print('reach limit.\n')
return False # 直接丢弃
else:
log_count[message_key] = 1 # 超过 60 秒,重新计数

View File

@ -29,8 +29,8 @@ def fetch_list(start_p=1):
total_results = []
# 备份已有文件
utils.backup_existing_file(target_csv)
while True:
url = f"https://u001.25img.com/?p={p}"
url = f"https://u001.25img.com/?p={p}"
while url:
logging.info(f"fetching url {url}")
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class"))
if soup:
@ -42,15 +42,14 @@ def fetch_list(start_p=1):
if total_pages:
if p >= total_pages:
url = None
break
else:
p += 1
url = f"https://u001.25img.com/?p={p}"
if p % 10 == 0 :
#utils.write_to_csv(total_results, target_csv)
lines = utils.append_to_csv(total_results, target_csv)
total_results.clear() # 清空缓冲区
if lines:
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
total_results.clear() # 清空缓冲区
time.sleep(1)
else:
logging.warning(f"fetch_list failed. url: {url} ")
@ -59,17 +58,14 @@ def fetch_list(start_p=1):
else:
logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}')
if not url:
break
if debug:
break
# 写入csv文件
lines = utils.write_to_csv(total_results, target_csv)
lines = utils.append_to_csv(total_results, target_csv)
total_results.clear()
if lines:
logging.info(f"write to file succ. file: {target_csv}. total lines: {lines}")
logging.info(f"write to csv file succ. file: {target_csv}. total lines: {lines}")
logging.info(f"fetch list finished. total pages: {p}")
@ -143,11 +139,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
url = next_url
cnt += 1
if cnt % 10 == 0 :
#utils.write_to_csv(total_results, target_csv_sis)
lines = utils.append_to_csv(total_results, target_csv_sis)
total_results.clear()
if lines:
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
total_results.clear()
time.sleep(1)
else:
logging.warning(f"fetch_list failed. url: {url} ")
@ -160,10 +155,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
break
# 写入csv文件
lines = utils.write_to_csv(total_results, target_csv_sis)
lines = utils.append_to_csv(total_results, target_csv_sis)
total_results.clear()
if lines:
logging.info(f"write to file succ. file: {target_csv_sis}, total lines: {lines}")
logging.info(f"write to csv file succ. file: {target_csv_sis}, total lines: {lines}")
logging.info(f"fetch list finished. total pages: {cnt}")
def fetch_sis_all():