modify scripts
This commit is contained in:
@ -103,10 +103,11 @@ def fetch_performers_by_ethnic():
|
|||||||
url = row['href']
|
url = row['href']
|
||||||
ethnic = row['name']
|
ethnic = row['name']
|
||||||
next_url = url
|
next_url = url
|
||||||
|
count = 0
|
||||||
|
pages = 0
|
||||||
while next_url:
|
while next_url:
|
||||||
logging.info(f"Fetching data for {ethnic}, url {url} ...")
|
logging.info(f"Fetching data for {ethnic}, url {next_url} ...")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="row headshotrow", attr_type="class"),
|
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="div", identifier="row headshotrow", attr_type="class"),
|
||||||
parser="lxml", preprocessor=scraper.preprocess_html)
|
parser="lxml", preprocessor=scraper.preprocess_html)
|
||||||
if soup:
|
if soup:
|
||||||
list_data, next_url = scraper.parse_page_ethnic(soup, ethnic)
|
list_data, next_url = scraper.parse_page_ethnic(soup, ethnic)
|
||||||
@ -115,20 +116,24 @@ def fetch_performers_by_ethnic():
|
|||||||
# 写入演员数据表
|
# 写入演员数据表
|
||||||
perfomer_id = db_tools.insert_performer_index(name=row['person'], href=row.get('href', '').lower(), from_ethnic_list=1)
|
perfomer_id = db_tools.insert_performer_index(name=row['person'], href=row.get('href', '').lower(), from_ethnic_list=1)
|
||||||
if perfomer_id:
|
if perfomer_id:
|
||||||
|
count += 1
|
||||||
logging.debug(f'insert performer index to db. performer_id:{perfomer_id}, name: {row['person']}, href:{row['href']}')
|
logging.debug(f'insert performer index to db. performer_id:{perfomer_id}, name: {row['person']}, href:{row['href']}')
|
||||||
else:
|
else:
|
||||||
logging.warning(f'insert performer index failed. name: {row['person']}, href:{row['href']}')
|
logging.warning(f'insert performer index failed. name: {row['person']}, href:{row['href']}')
|
||||||
else:
|
else:
|
||||||
logging.warning(f'fetch astro error. {url} ...')
|
logging.warning(f'fetch astro error. {next_url} ...')
|
||||||
elif status_code and status_code == 404:
|
elif status_code and status_code == 404:
|
||||||
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}, Skiping...')
|
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}, Skiping...')
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
logging.warning(f'fetch astro error. {url} ...')
|
logging.warning(f'fetch astro error. {next_url} ...')
|
||||||
|
pages +=1
|
||||||
|
|
||||||
# 调试添加break
|
# 调试添加break
|
||||||
if debug:
|
if debug:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
logging.info(f"fetched data for {ethnic} total pages: {pages}, total performers: {count}")
|
||||||
|
|
||||||
# 获取distributors列表
|
# 获取distributors列表
|
||||||
def fetch_distributors_list():
|
def fetch_distributors_list():
|
||||||
|
|||||||
Reference in New Issue
Block a user