modify files.

This commit is contained in:
oscarz
2025-06-21 10:17:01 +08:00
parent 9681477ee8
commit 81ff25536c
2 changed files with 13 additions and 7 deletions

View File

@ -121,16 +121,14 @@ def down_torrents():
# 获取演员列表
def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_sis = f"{config.global_share_data_dir}/sis_asia_zt.csv", ident='forum_25'):
def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_sis = f"{config.global_share_data_dir}/sis_asia_zt.csv", ident='forum_25', plate_name='亚无转帖'):
total_results = []
cnt = 0
# 备份已有文件
utils.backup_existing_file(target_csv_sis)
while url:
logging.info(f"fetching url {url}")
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="table", identifier=ident, attr_type="id"))
if soup:
list_data, next_url = scraper.parse_sis_list(soup, url, ident)
list_data, next_url = scraper.parse_sis_list(soup, url, ident, plate_name)
if list_data :
total_results.extend(list_data)
else:
@ -165,21 +163,25 @@ def fetch_sis_all():
sections = [
{
'plate' : 'sis_asia_yc',
'plate_name' : '亚无原创',
'url' : 'https://sis001.com/forum/forum-143-1.html',
'ident' : 'forum_143'
},
{
'plate' : 'sis_asia_zt',
'plate_name' : '亚无转帖',
'url' : 'https://sis001.com/forum/forum-25-1.html',
'ident' : 'forum_25'
},
{
'plate' : 'sis_oumei_yc',
'plate_name' : '欧无原创',
'url' : 'https://sis001.com/forum/forum-229-1.html',
'ident' : 'forum_229'
},
{
'plate' : 'sis_oumei_zt',
'plate_name' : '欧无转帖',
'url' : 'https://sis001.com/forum/forum-77-1.html',
'ident' : 'forum_77'
},
@ -188,7 +190,10 @@ def fetch_sis_all():
section = item['plate']
url = item['url']
logging.info(f"---------------start fetching {section}, begin url: {url}")
csv_file = f"{config.global_share_data_dir}/{section}.csv"
#csv_file = f"{config.global_share_data_dir}/{section}.csv"
csv_file = f"{config.global_share_data_dir}/sis.csv"
# 备份已有文件
utils.backup_existing_file(csv_file)
fetch_sis_list(url=url, target_csv_sis=csv_file, ident=item['ident'])

View File

@ -248,7 +248,7 @@ def parse_size_format(size_text: str):
logging.error(f"解析大小格式时出错: {e}")
return 0.0, "未知格式"
def parse_sis_list(soup, curr_url, ident):
def parse_sis_list(soup, curr_url, ident, plate_name):
"""解析符合条件的表格"""
tables = soup.find_all('table', {'id': ident})
if not tables:
@ -308,6 +308,7 @@ def parse_sis_list(soup, curr_url, ident):
# 添加到结果
results.append({
"plate": plate_name,
"category": category,
"title": title,
"url": url,
@ -344,7 +345,7 @@ def test_chapter_page(url):
def test_sis_page(url):
soup, status_code = fetch_page(url, partial(generic_validator, tag="table", identifier="forum_25", attr_type="id"))
if soup:
data, next_url = parse_sis_list(soup, url)
data, next_url = parse_sis_list(soup, url, 'forum_25', '亚无转帖')
if data:
print(data)
if next_url :