modify scripts
This commit is contained in:
@ -48,20 +48,24 @@ def check_dirty_chapters():
|
|||||||
for i in range(100):
|
for i in range(100):
|
||||||
table_name = f'{tbl_name_chapters_prefix}_{i}'
|
table_name = f'{tbl_name_chapters_prefix}_{i}'
|
||||||
try:
|
try:
|
||||||
cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
|
#cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
|
||||||
row = cursor.fetchone()
|
#cursor.execute(f"SELECT count(*) FROM {table_name} WHERE 1=1 ")
|
||||||
if row:
|
#row = cursor.fetchone()
|
||||||
dirty_chapters_all += row[0]
|
#if row:
|
||||||
|
# dirty_chapters_all += row[0]
|
||||||
|
|
||||||
cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' and content like '%aabook%' ")
|
#cursor.execute(f"SELECT count(*) FROM {table_name} WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' and content like '%aabook%' ")
|
||||||
|
cursor.execute(f"SELECT count(*) FROM {table_name} WHERE 1=1 and (content like '%aabook%' or content like '%疯情%') ")
|
||||||
row = cursor.fetchone()
|
row = cursor.fetchone()
|
||||||
if row:
|
if row:
|
||||||
dirty_chapters += row[0]
|
curr_rows = row[0]
|
||||||
|
print(f"check {table_name}, dirty pages: {curr_rows}")
|
||||||
|
dirty_chapters += curr_rows
|
||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
print(f"query error: {e}")
|
print(f"query error: {e}")
|
||||||
|
|
||||||
print(f"all: {dirty_chapters_all}, count: {dirty_chapters}")
|
print(f"dirty pages: {dirty_chapters}")
|
||||||
|
|
||||||
|
|
||||||
# 检查脏数据
|
# 检查脏数据
|
||||||
@ -71,7 +75,8 @@ def update_dirty_chapters():
|
|||||||
for i in range(100):
|
for i in range(100):
|
||||||
table_name = f'{tbl_name_chapters_prefix}_{i}'
|
table_name = f'{tbl_name_chapters_prefix}_{i}'
|
||||||
try:
|
try:
|
||||||
cursor.execute(f"update {table_name} set has_content = 0 WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
|
#cursor.execute(f"update {table_name} set has_content = 0 WHERE updated_at >= '2025-03-23 10:20:00' and updated_at <= '2025-03-23 11:20:00' ")
|
||||||
|
cursor.execute(f"update {table_name} set has_content = 0 WHERE 1=1 and (content like '%aabook%' or content like '%疯情%') ")
|
||||||
updated_rows = cursor.rowcount
|
updated_rows = cursor.rowcount
|
||||||
total += updated_rows
|
total += updated_rows
|
||||||
print(f"update {table_name}, affected rows: {updated_rows}")
|
print(f"update {table_name}, affected rows: {updated_rows}")
|
||||||
|
|||||||
@ -58,7 +58,7 @@ def fetch_page(url, validator, parser="html.parser", preprocessor=None, max_retr
|
|||||||
|
|
||||||
logging.warning(f"Validation failed on attempt {attempt + 1} for {url}")
|
logging.warning(f"Validation failed on attempt {attempt + 1} for {url}")
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
logging.info(f"Warn fetching page {url}: {e}. Retrying ...")
|
logging.warning(f"fetching page ({url}) error: {e}, Retrying ...")
|
||||||
time.sleep(sleep_time) # 休眠指定的时间,然后重试
|
time.sleep(sleep_time) # 休眠指定的时间,然后重试
|
||||||
|
|
||||||
logging.error(f'Fetching failed after max retries. {url}')
|
logging.error(f'Fetching failed after max retries. {url}')
|
||||||
|
|||||||
Reference in New Issue
Block a user