modify scripts
This commit is contained in:
@ -36,7 +36,7 @@ class RateLimitFilter(logging.Filter):
|
|||||||
if elapsed < 60: # 60 秒内
|
if elapsed < 60: # 60 秒内
|
||||||
log_count[message_key] += 1
|
log_count[message_key] += 1
|
||||||
if log_count[message_key] > self.LOG_LIMIT:
|
if log_count[message_key] > self.LOG_LIMIT:
|
||||||
print('reach limit.')
|
print('reach limit.\n')
|
||||||
return False # 直接丢弃
|
return False # 直接丢弃
|
||||||
else:
|
else:
|
||||||
log_count[message_key] = 1 # 超过 60 秒,重新计数
|
log_count[message_key] = 1 # 超过 60 秒,重新计数
|
||||||
|
|||||||
@ -29,8 +29,8 @@ def fetch_list(start_p=1):
|
|||||||
total_results = []
|
total_results = []
|
||||||
# 备份已有文件
|
# 备份已有文件
|
||||||
utils.backup_existing_file(target_csv)
|
utils.backup_existing_file(target_csv)
|
||||||
while True:
|
url = f"https://u001.25img.com/?p={p}"
|
||||||
url = f"https://u001.25img.com/?p={p}"
|
while url:
|
||||||
logging.info(f"fetching url {url}")
|
logging.info(f"fetching url {url}")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class"))
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="table-responsive", attr_type="class"))
|
||||||
if soup:
|
if soup:
|
||||||
@ -42,15 +42,14 @@ def fetch_list(start_p=1):
|
|||||||
if total_pages:
|
if total_pages:
|
||||||
if p >= total_pages:
|
if p >= total_pages:
|
||||||
url = None
|
url = None
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
p += 1
|
p += 1
|
||||||
|
url = f"https://u001.25img.com/?p={p}"
|
||||||
if p % 10 == 0 :
|
if p % 10 == 0 :
|
||||||
#utils.write_to_csv(total_results, target_csv)
|
|
||||||
lines = utils.append_to_csv(total_results, target_csv)
|
lines = utils.append_to_csv(total_results, target_csv)
|
||||||
|
total_results.clear() # 清空缓冲区
|
||||||
if lines:
|
if lines:
|
||||||
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
|
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
|
||||||
total_results.clear() # 清空缓冲区
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
else:
|
else:
|
||||||
logging.warning(f"fetch_list failed. url: {url} ")
|
logging.warning(f"fetch_list failed. url: {url} ")
|
||||||
@ -59,17 +58,14 @@ def fetch_list(start_p=1):
|
|||||||
else:
|
else:
|
||||||
logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}')
|
logging.warning(f'fetch_page error. url: {url}, status_code: {status_code}')
|
||||||
|
|
||||||
if not url:
|
|
||||||
break
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
break
|
break
|
||||||
|
|
||||||
# 写入csv文件
|
# 写入csv文件
|
||||||
lines = utils.write_to_csv(total_results, target_csv)
|
lines = utils.append_to_csv(total_results, target_csv)
|
||||||
total_results.clear()
|
total_results.clear()
|
||||||
if lines:
|
if lines:
|
||||||
logging.info(f"write to file succ. file: {target_csv}. total lines: {lines}")
|
logging.info(f"write to csv file succ. file: {target_csv}. total lines: {lines}")
|
||||||
logging.info(f"fetch list finished. total pages: {p}")
|
logging.info(f"fetch list finished. total pages: {p}")
|
||||||
|
|
||||||
|
|
||||||
@ -143,11 +139,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
|
|||||||
url = next_url
|
url = next_url
|
||||||
cnt += 1
|
cnt += 1
|
||||||
if cnt % 10 == 0 :
|
if cnt % 10 == 0 :
|
||||||
#utils.write_to_csv(total_results, target_csv_sis)
|
|
||||||
lines = utils.append_to_csv(total_results, target_csv_sis)
|
lines = utils.append_to_csv(total_results, target_csv_sis)
|
||||||
|
total_results.clear()
|
||||||
if lines:
|
if lines:
|
||||||
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
|
logging.info(f"write to csv file. new lines: {len(total_results)}, total lines: {lines}")
|
||||||
total_results.clear()
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
else:
|
else:
|
||||||
logging.warning(f"fetch_list failed. url: {url} ")
|
logging.warning(f"fetch_list failed. url: {url} ")
|
||||||
@ -160,10 +155,10 @@ def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_
|
|||||||
break
|
break
|
||||||
|
|
||||||
# 写入csv文件
|
# 写入csv文件
|
||||||
lines = utils.write_to_csv(total_results, target_csv_sis)
|
lines = utils.append_to_csv(total_results, target_csv_sis)
|
||||||
total_results.clear()
|
total_results.clear()
|
||||||
if lines:
|
if lines:
|
||||||
logging.info(f"write to file succ. file: {target_csv_sis}, total lines: {lines}")
|
logging.info(f"write to csv file succ. file: {target_csv_sis}, total lines: {lines}")
|
||||||
logging.info(f"fetch list finished. total pages: {cnt}")
|
logging.info(f"fetch list finished. total pages: {cnt}")
|
||||||
|
|
||||||
def fetch_sis_all():
|
def fetch_sis_all():
|
||||||
|
|||||||
Reference in New Issue
Block a user