modify scripts

This commit is contained in:
oscarz
2025-04-01 10:03:24 +08:00
parent 6fb9786601
commit 118501a4ef

View File

@ -103,10 +103,11 @@ def fetch_performers_by_ethnic():
url = row['href'] url = row['href']
ethnic = row['name'] ethnic = row['name']
next_url = url next_url = url
count = 0
pages = 0
while next_url: while next_url:
logging.info(f"Fetching data for {ethnic}, url {url} ...") logging.info(f"Fetching data for {ethnic}, url {next_url} ...")
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="row headshotrow", attr_type="class"), soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="row headshotrow", attr_type="class"),
parser="lxml", preprocessor=scraper.preprocess_html) parser="lxml", preprocessor=scraper.preprocess_html)
if soup: if soup:
list_data, next_url = scraper.parse_page_ethnic(soup, ethnic) list_data, next_url = scraper.parse_page_ethnic(soup, ethnic)
@ -115,21 +116,25 @@ def fetch_performers_by_ethnic():
# 写入演员数据表 # 写入演员数据表
perfomer_id = db_tools.insert_performer_index(name=row['person'], href=row.get('href', '').lower(), from_ethnic_list=1) perfomer_id = db_tools.insert_performer_index(name=row['person'], href=row.get('href', '').lower(), from_ethnic_list=1)
if perfomer_id: if perfomer_id:
count += 1
logging.debug(f'insert performer index to db. performer_id:{perfomer_id}, name: {row['person']}, href:{row['href']}') logging.debug(f'insert performer index to db. performer_id:{perfomer_id}, name: {row['person']}, href:{row['href']}')
else: else:
logging.warning(f'insert performer index failed. name: {row['person']}, href:{row['href']}') logging.warning(f'insert performer index failed. name: {row['person']}, href:{row['href']}')
else: else:
logging.warning(f'fetch astro error. {url} ...') logging.warning(f'fetch astro error. {next_url} ...')
elif status_code and status_code == 404: elif status_code and status_code == 404:
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}, Skiping...') logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}, Skiping...')
break break
else: else:
logging.warning(f'fetch astro error. {url} ...') logging.warning(f'fetch astro error. {next_url} ...')
pages +=1
# 调试添加break # 调试添加break
if debug: if debug:
return True return True
logging.info(f"fetched data for {ethnic} total pages: {pages}, total performers: {count}")
# 获取distributors列表 # 获取distributors列表
def fetch_distributors_list(): def fetch_distributors_list():
url = scraper.distributors_list_url url = scraper.distributors_list_url