diff --git a/u9a9/src/fetch.py b/u9a9/src/fetch.py index fc21901..cb7507f 100644 --- a/u9a9/src/fetch.py +++ b/u9a9/src/fetch.py @@ -121,16 +121,14 @@ def down_torrents(): # 获取演员列表 -def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_sis = f"{config.global_share_data_dir}/sis_asia_zt.csv", ident='forum_25'): +def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_sis = f"{config.global_share_data_dir}/sis_asia_zt.csv", ident='forum_25', plate_name='亚无转帖'): total_results = [] cnt = 0 - # 备份已有文件 - utils.backup_existing_file(target_csv_sis) while url: logging.info(f"fetching url {url}") soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="table", identifier=ident, attr_type="id")) if soup: - list_data, next_url = scraper.parse_sis_list(soup, url, ident) + list_data, next_url = scraper.parse_sis_list(soup, url, ident, plate_name) if list_data : total_results.extend(list_data) else: @@ -165,21 +163,25 @@ def fetch_sis_all(): sections = [ { 'plate' : 'sis_asia_yc', + 'plate_name' : '亚无原创', 'url' : 'https://sis001.com/forum/forum-143-1.html', 'ident' : 'forum_143' }, { 'plate' : 'sis_asia_zt', + 'plate_name' : '亚无转帖', 'url' : 'https://sis001.com/forum/forum-25-1.html', 'ident' : 'forum_25' }, { 'plate' : 'sis_oumei_yc', + 'plate_name' : '欧无原创', 'url' : 'https://sis001.com/forum/forum-229-1.html', 'ident' : 'forum_229' }, { 'plate' : 'sis_oumei_zt', + 'plate_name' : '欧无转帖', 'url' : 'https://sis001.com/forum/forum-77-1.html', 'ident' : 'forum_77' }, @@ -188,7 +190,10 @@ def fetch_sis_all(): section = item['plate'] url = item['url'] logging.info(f"---------------start fetching {section}, begin url: {url}") - csv_file = f"{config.global_share_data_dir}/{section}.csv" + #csv_file = f"{config.global_share_data_dir}/{section}.csv" + csv_file = f"{config.global_share_data_dir}/sis.csv" + # 备份已有文件 + utils.backup_existing_file(csv_file) fetch_sis_list(url=url, target_csv_sis=csv_file, ident=item['ident']) diff --git a/u9a9/src/scraper.py b/u9a9/src/scraper.py index 7c50599..6ee223f 100644 --- a/u9a9/src/scraper.py +++ b/u9a9/src/scraper.py @@ -248,7 +248,7 @@ def parse_size_format(size_text: str): logging.error(f"解析大小格式时出错: {e}") return 0.0, "未知格式" -def parse_sis_list(soup, curr_url, ident): +def parse_sis_list(soup, curr_url, ident, plate_name): """解析符合条件的表格""" tables = soup.find_all('table', {'id': ident}) if not tables: @@ -308,6 +308,7 @@ def parse_sis_list(soup, curr_url, ident): # 添加到结果 results.append({ + "plate": plate_name, "category": category, "title": title, "url": url, @@ -344,7 +345,7 @@ def test_chapter_page(url): def test_sis_page(url): soup, status_code = fetch_page(url, partial(generic_validator, tag="table", identifier="forum_25", attr_type="id")) if soup: - data, next_url = parse_sis_list(soup, url) + data, next_url = parse_sis_list(soup, url, 'forum_25', '亚无转帖') if data: print(data) if next_url :