modify scripts
This commit is contained in:
@ -36,7 +36,7 @@ class RateLimitFilter(logging.Filter):
|
||||
if elapsed < 60: # 60 秒内
|
||||
log_count[message_key] += 1
|
||||
if log_count[message_key] > self.LOG_LIMIT:
|
||||
print('reach limit.')
|
||||
print('reach limit.\n')
|
||||
return False # 直接丢弃
|
||||
else:
|
||||
log_count[message_key] = 1 # 超过 60 秒,重新计数
|
||||
|
||||
@ -29,8 +29,8 @@ def fetch_list(start_p=1):
|
||||
total_results = []
|
||||
# 备份已有文件
|
||||
utils.backup_existing_file(target_csv)
|
||||
while True:
|
||||
url = f"https://u001.25img.com/?p={p}"
|
||||
url = f"https://u001.25img.com/?p={p}"
|
||||
while url:
|
||||
logging.info(f"fetching url {url}")
|
||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class"))
|
||||
if soup:
|
||||
@ -42,15 +42,14 @@ def fetch_list(start_p=1):
|
||||
if total_pages:
|
||||
if p >= total_pages:
|
||||
url = None
|
||||
break
|
||||
else:
|
||||
p += 1
|
||||
url = f"https://u001.25img.com/?p={p}"
|
||||
if p % 10 == 0 :
|
||||
#utils.write_to_csv(total_results, target_csv)
|
||||
lines = utils.append_to_csv(total_results, target_csv)
|
||||
total_results.clear() # 清空缓冲区
|
||||
if lines:
|
||||
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
|
||||
total_results.clear() # 清空缓冲区
|
||||
time.sleep(1)
|
||||
else:
|
||||
logging.warning(f"fetch_list failed. url: {url} ")
|
||||
@ -59,17 +58,14 @@ def fetch_list(start_p=1):
|
||||
else:
|
||||
logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}')
|
||||
|
||||
if not url:
|
||||
break
|
||||
|
||||
if debug:
|
||||
break
|
||||
|
||||
# 写入csv文件
|
||||
lines = utils.write_to_csv(total_results, target_csv)
|
||||
lines = utils.append_to_csv(total_results, target_csv)
|
||||
total_results.clear()
|
||||
if lines:
|
||||
logging.info(f"write to file succ. file: {target_csv}. total lines: {lines}")
|
||||
logging.info(f"write to csv file succ. file: {target_csv}. total lines: {lines}")
|
||||
logging.info(f"fetch list finished. total pages: {p}")
|
||||
|
||||
|
||||
@ -143,11 +139,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
|
||||
url = next_url
|
||||
cnt += 1
|
||||
if cnt % 10 == 0 :
|
||||
#utils.write_to_csv(total_results, target_csv_sis)
|
||||
lines = utils.append_to_csv(total_results, target_csv_sis)
|
||||
total_results.clear()
|
||||
if lines:
|
||||
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
|
||||
total_results.clear()
|
||||
time.sleep(1)
|
||||
else:
|
||||
logging.warning(f"fetch_list failed. url: {url} ")
|
||||
@ -160,10 +155,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
|
||||
break
|
||||
|
||||
# 写入csv文件
|
||||
lines = utils.write_to_csv(total_results, target_csv_sis)
|
||||
lines = utils.append_to_csv(total_results, target_csv_sis)
|
||||
total_results.clear()
|
||||
if lines:
|
||||
logging.info(f"write to file succ. file: {target_csv_sis}, total lines: {lines}")
|
||||
logging.info(f"write to csv file succ. file: {target_csv_sis}, total lines: {lines}")
|
||||
logging.info(f"fetch list finished. total pages: {cnt}")
|
||||
|
||||
def fetch_sis_all():
|
||||
|
||||
Reference in New Issue
Block a user