modify scripts
This commit is contained in:
@ -103,10 +103,11 @@ def fetch_performers_by_ethnic():
|
||||
url = row['href']
|
||||
ethnic = row['name']
|
||||
next_url = url
|
||||
|
||||
count = 0
|
||||
pages = 0
|
||||
while next_url:
|
||||
logging.info(f"Fetching data for {ethnic}, url {url} ...")
|
||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="row headshotrow", attr_type="class"),
|
||||
logging.info(f"Fetching data for {ethnic}, url {next_url} ...")
|
||||
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="row headshotrow", attr_type="class"),
|
||||
parser="lxml", preprocessor=scraper.preprocess_html)
|
||||
if soup:
|
||||
list_data, next_url = scraper.parse_page_ethnic(soup, ethnic)
|
||||
@ -115,20 +116,24 @@ def fetch_performers_by_ethnic():
|
||||
# 写入演员数据表
|
||||
perfomer_id = db_tools.insert_performer_index(name=row['person'], href=row.get('href', '').lower(), from_ethnic_list=1)
|
||||
if perfomer_id:
|
||||
count += 1
|
||||
logging.debug(f'insert performer index to db. performer_id:{perfomer_id}, name: {row['person']}, href:{row['href']}')
|
||||
else:
|
||||
logging.warning(f'insert performer index failed. name: {row['person']}, href:{row['href']}')
|
||||
else:
|
||||
logging.warning(f'fetch astro error. {url} ...')
|
||||
logging.warning(f'fetch astro error. {next_url} ...')
|
||||
elif status_code and status_code == 404:
|
||||
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}, Skiping...')
|
||||
break
|
||||
else:
|
||||
logging.warning(f'fetch astro error. {url} ...')
|
||||
logging.warning(f'fetch astro error. {next_url} ...')
|
||||
pages +=1
|
||||
|
||||
# 调试添加break
|
||||
if debug:
|
||||
return True
|
||||
|
||||
logging.info(f"fetched data for {ethnic} total pages: {pages}, total performers: {count}")
|
||||
|
||||
# 获取distributors列表
|
||||
def fetch_distributors_list():
|
||||
|
||||
Reference in New Issue
Block a user