modify scripts

This commit is contained in:
oscarz
2025-03-19 08:34:30 +08:00
parent 8791348490
commit 2b1266bbd2
12 changed files with 22 additions and 10754 deletions

View File

@ -18,6 +18,7 @@ force = False
# 获取列表
def fetch_book_list():
url = scraper.list_url_update
updated_books = 0
while True:
logging.info(f'fetching book list. url: {url}')
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="list_main", attr_type="class"))
@ -25,14 +26,14 @@ def fetch_book_list():
# 获取书籍列表
list_data, next_url = scraper.parse_book_list(soup, url=url)
for item in list_data:
#row_id = db_tools.insert_or_update_common(item, db_tools.tbl_name_books)
row_id = db_tools.insert_books_index(item)
row_id, affected_rows = db_tools.insert_books_index(item)
if row_id:
logging.debug(f"insert one book. row_id: {row_id}, name: {item['name']}")
else:
logging.warning(f"insert book error. name: {item['name']}, href: {item['href']}")
updated_books = updated_books + (affected_rows if affected_rows else 0)
if next_url is None:
logging.info(f'get all pages.')
logging.info(f"get all pages. updated books: {updated_books}")
return True
else:
url = next_url

View File

@ -134,14 +134,14 @@ def insert_books_index(data):
if existing_book: # **如果演员已存在**
logging.debug(f"book {data['href']} already exist. id: {existing_book[0]}")
return existing_book[0]
return existing_book[0], 0
# 不存在,或者需要更新
data['is_latest'] = 0
return insert_or_update_common(data, tbl_name_books)
return insert_or_update_common(data, tbl_name_books), 1
except sqlite3.Error as e:
logging.error(f"Error inserting or updating data: {e}")
return None
return None, 0
# 更新详细信息
def update_book_detail(data):
@ -302,6 +302,7 @@ def get_statics():
all_chapters = 0
all_chapters_has_contents = 0
finished_books = 0
# 循环遍历 0 到 100 的数字
for i in range(100):
@ -313,11 +314,25 @@ def get_statics():
cursor.execute(f"SELECT COUNT(*) FROM {table_name} where has_content=1")
all_chapters_has_contents += cursor.fetchone()[0]
# 统计已经下载完的书籍总数
sql = f"""
SELECT COUNT(*)
FROM (
SELECT book_id
FROM {table_name}
GROUP BY book_id
HAVING SUM(CASE WHEN has_content = 1 THEN 1 ELSE 0 END) = COUNT(*)
)
"""
cursor.execute(sql)
finished_books += cursor.fetchone()[0]
except sqlite3.Error as e:
logging.debug(f"Error querying table {table_name}: {e}")
result['all_chapters'] = all_chapters
result['all_chapters_has_contents'] = all_chapters_has_contents
result['finished_books'] = finished_books
return result