modify files.
This commit is contained in:
@ -121,16 +121,14 @@ def down_torrents():
|
|||||||
|
|
||||||
|
|
||||||
# 获取演员列表
|
# 获取演员列表
|
||||||
def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_sis = f"{config.global_share_data_dir}/sis_asia_zt.csv", ident='forum_25'):
|
def fetch_sis_list(url = 'https://sis001.com/forum/forum-25-1.html', target_csv_sis = f"{config.global_share_data_dir}/sis_asia_zt.csv", ident='forum_25', plate_name='亚无转帖'):
|
||||||
total_results = []
|
total_results = []
|
||||||
cnt = 0
|
cnt = 0
|
||||||
# 备份已有文件
|
|
||||||
utils.backup_existing_file(target_csv_sis)
|
|
||||||
while url:
|
while url:
|
||||||
logging.info(f"fetching url {url}")
|
logging.info(f"fetching url {url}")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="table", identifier=ident, attr_type="id"))
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="table", identifier=ident, attr_type="id"))
|
||||||
if soup:
|
if soup:
|
||||||
list_data, next_url = scraper.parse_sis_list(soup, url, ident)
|
list_data, next_url = scraper.parse_sis_list(soup, url, ident, plate_name)
|
||||||
if list_data :
|
if list_data :
|
||||||
total_results.extend(list_data)
|
total_results.extend(list_data)
|
||||||
else:
|
else:
|
||||||
@ -165,21 +163,25 @@ def fetch_sis_all():
|
|||||||
sections = [
|
sections = [
|
||||||
{
|
{
|
||||||
'plate' : 'sis_asia_yc',
|
'plate' : 'sis_asia_yc',
|
||||||
|
'plate_name' : '亚无原创',
|
||||||
'url' : 'https://sis001.com/forum/forum-143-1.html',
|
'url' : 'https://sis001.com/forum/forum-143-1.html',
|
||||||
'ident' : 'forum_143'
|
'ident' : 'forum_143'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'plate' : 'sis_asia_zt',
|
'plate' : 'sis_asia_zt',
|
||||||
|
'plate_name' : '亚无转帖',
|
||||||
'url' : 'https://sis001.com/forum/forum-25-1.html',
|
'url' : 'https://sis001.com/forum/forum-25-1.html',
|
||||||
'ident' : 'forum_25'
|
'ident' : 'forum_25'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'plate' : 'sis_oumei_yc',
|
'plate' : 'sis_oumei_yc',
|
||||||
|
'plate_name' : '欧无原创',
|
||||||
'url' : 'https://sis001.com/forum/forum-229-1.html',
|
'url' : 'https://sis001.com/forum/forum-229-1.html',
|
||||||
'ident' : 'forum_229'
|
'ident' : 'forum_229'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'plate' : 'sis_oumei_zt',
|
'plate' : 'sis_oumei_zt',
|
||||||
|
'plate_name' : '欧无转帖',
|
||||||
'url' : 'https://sis001.com/forum/forum-77-1.html',
|
'url' : 'https://sis001.com/forum/forum-77-1.html',
|
||||||
'ident' : 'forum_77'
|
'ident' : 'forum_77'
|
||||||
},
|
},
|
||||||
@ -188,7 +190,10 @@ def fetch_sis_all():
|
|||||||
section = item['plate']
|
section = item['plate']
|
||||||
url = item['url']
|
url = item['url']
|
||||||
logging.info(f"---------------start fetching {section}, begin url: {url}")
|
logging.info(f"---------------start fetching {section}, begin url: {url}")
|
||||||
csv_file = f"{config.global_share_data_dir}/{section}.csv"
|
#csv_file = f"{config.global_share_data_dir}/{section}.csv"
|
||||||
|
csv_file = f"{config.global_share_data_dir}/sis.csv"
|
||||||
|
# 备份已有文件
|
||||||
|
utils.backup_existing_file(csv_file)
|
||||||
fetch_sis_list(url=url, target_csv_sis=csv_file, ident=item['ident'])
|
fetch_sis_list(url=url, target_csv_sis=csv_file, ident=item['ident'])
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -248,7 +248,7 @@ def parse_size_format(size_text: str):
|
|||||||
logging.error(f"解析大小格式时出错: {e}")
|
logging.error(f"解析大小格式时出错: {e}")
|
||||||
return 0.0, "未知格式"
|
return 0.0, "未知格式"
|
||||||
|
|
||||||
def parse_sis_list(soup, curr_url, ident):
|
def parse_sis_list(soup, curr_url, ident, plate_name):
|
||||||
"""解析符合条件的表格"""
|
"""解析符合条件的表格"""
|
||||||
tables = soup.find_all('table', {'id': ident})
|
tables = soup.find_all('table', {'id': ident})
|
||||||
if not tables:
|
if not tables:
|
||||||
@ -308,6 +308,7 @@ def parse_sis_list(soup, curr_url, ident):
|
|||||||
|
|
||||||
# 添加到结果
|
# 添加到结果
|
||||||
results.append({
|
results.append({
|
||||||
|
"plate": plate_name,
|
||||||
"category": category,
|
"category": category,
|
||||||
"title": title,
|
"title": title,
|
||||||
"url": url,
|
"url": url,
|
||||||
@ -344,7 +345,7 @@ def test_chapter_page(url):
|
|||||||
def test_sis_page(url):
|
def test_sis_page(url):
|
||||||
soup, status_code = fetch_page(url, partial(generic_validator, tag="table", identifier="forum_25", attr_type="id"))
|
soup, status_code = fetch_page(url, partial(generic_validator, tag="table", identifier="forum_25", attr_type="id"))
|
||||||
if soup:
|
if soup:
|
||||||
data, next_url = parse_sis_list(soup, url)
|
data, next_url = parse_sis_list(soup, url, 'forum_25', '亚无转帖')
|
||||||
if data:
|
if data:
|
||||||
print(data)
|
print(data)
|
||||||
if next_url :
|
if next_url :
|
||||||
|
|||||||
Reference in New Issue
Block a user