modify scripts

This commit is contained in:
oscarz
2025-03-24 10:19:33 +08:00
parent 20ef4b6842
commit 7b9b37092b
2 changed files with 14 additions and 9 deletions

View File

@ -48,20 +48,24 @@ def check_dirty_chapters():
for i in range(100):
table_name = f'{tbl_name_chapters_prefix}_{i}'
try:
cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
row = cursor.fetchone()
if row:
dirty_chapters_all += row[0]
#cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
#cursor.execute(f"SELECT count(*) FROM {table_name} WHERE 1=1 ")
#row = cursor.fetchone()
#if row:
# dirty_chapters_all += row[0]
cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' and content like '%aabook%' ")
#cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' and content like '%aabook%' ")
cursor.execute(f"SELECT count(*) FROM {table_name} WHERE 1=1 and (content like '%aabook%' or content like '%疯情%') ")
row = cursor.fetchone()
if row:
dirty_chapters += row[0]
curr_rows = row[0]
print(f"check {table_name}, dirty pages: {curr_rows}")
dirty_chapters += curr_rows
except sqlite3.Error as e:
print(f"query error: {e}")
print(f"all: {dirty_chapters_all}, count: {dirty_chapters}")
print(f"dirty pages: {dirty_chapters}")
# 检查脏数据
@ -71,7 +75,8 @@ def update_dirty_chapters():
for i in range(100):
table_name = f'{tbl_name_chapters_prefix}_{i}'
try:
cursor.execute(f"update {table_name} set has_content = 0 WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
#cursor.execute(f"update {table_name} set has_content = 0 WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
cursor.execute(f"update {table_name} set has_content = 0 WHERE 1=1 and (content like '%aabook%' or content like '%疯情%') ")
updated_rows = cursor.rowcount
total += updated_rows
print(f"update {table_name}, affected rows: {updated_rows}")

View File

@ -58,7 +58,7 @@ def fetch_page(url, validator, parser="html.parser", preprocessor=None, max_retr
logging.warning(f"Validation failed on attempt {attempt + 1} for {url}")
except requests.RequestException as e:
logging.info(f"Warn fetching page {url}: {e}. Retrying ...")
logging.warning(f"fetching page ({url}) error: {e}, Retrying ...")
time.sleep(sleep_time) # 休眠指定的时间,然后重试
logging.error(f'Fetching failed after max retries. {url}')