modify scripts
This commit is contained in:
@ -160,6 +160,8 @@ def fetch_performers_detail():
|
||||
if len(perfomers_list) < 1:
|
||||
logging.info(f'all performers fetched.')
|
||||
break
|
||||
|
||||
succ_rows = 0
|
||||
for performer in perfomers_list:
|
||||
url = performer['href']
|
||||
person = performer['name']
|
||||
@ -168,8 +170,9 @@ def fetch_performers_detail():
|
||||
|
||||
next_url = url
|
||||
all_movies = []
|
||||
need_insert = True
|
||||
while next_url:
|
||||
logging.info(f"Fetching data for actor ({person}), url {next_url} ...")
|
||||
logging.debug(f"Fetching data for actor ({person}), url {next_url} ...")
|
||||
soup, status_code = scraper.fetch_page(next_url, partial(scraper.generic_validator, tag="span", identifier="actor-section-name", attr_type="class"))
|
||||
if soup:
|
||||
data, next_url = scraper.parse_actor_detail(soup, next_url)
|
||||
@ -179,10 +182,21 @@ def fetch_performers_detail():
|
||||
all_movies.extend(data.get('movies', []))
|
||||
|
||||
elif status_code and status_code == 404:
|
||||
logging.warning(f'fetch page error. httpcode: {status_code}, url: {next_url}, Skiping...')
|
||||
actor_id = db_tools.insert_or_update_actor_404(name=person, href=url, is_full_data=2)
|
||||
logging.warning(f'404 page. id: {actor_id}, name: ({person}), url: {url}, Skiping...')
|
||||
need_insert = False
|
||||
break
|
||||
elif status_code and status_code == 401:
|
||||
actor_id = db_tools.insert_or_update_movie_404(name=person, href=url, is_full_data=3)
|
||||
logging.warning(f'401 page(need login). id: {actor_id}, name: ({person}), url: {url}, Skiping...')
|
||||
need_insert = False
|
||||
break
|
||||
else:
|
||||
logging.warning(f'fetch_page error. person: ({person}), url: {url}')
|
||||
logging.warning(f'fetch_page error. url: {url}')
|
||||
|
||||
# 如果出现了401或者404,已经处理,直接跳过
|
||||
if not need_insert:
|
||||
continue
|
||||
|
||||
# 获取完了个人的所有影片,开始插入数据
|
||||
performer_id = db_tools.insert_or_update_actor({
|
||||
@ -193,11 +207,14 @@ def fetch_performers_detail():
|
||||
'credits':all_movies
|
||||
})
|
||||
if performer_id:
|
||||
logging.info(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
||||
logging.debug(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
||||
last_perfomer_id = performer_id
|
||||
succ_rows += 1
|
||||
else:
|
||||
logging.warning(f'insert person: ({person}) {url} failed.')
|
||||
time.sleep(0.5)
|
||||
|
||||
logging.info(f'total request: {len(perfomers_list)}, succ: {succ_rows}, last performer id: {last_perfomer_id}')
|
||||
# 调试break
|
||||
if debug:
|
||||
return True
|
||||
|
||||
@ -172,6 +172,34 @@ def insert_or_update_actor(actor):
|
||||
logging.error(f"插入/更新演员 {actor['name']} 失败: {e}")
|
||||
conn.rollback()
|
||||
|
||||
# """插入或更新电影数据(异常url的处理,比如404链接)"""
|
||||
def insert_or_update_actor_404(name, href, is_full_data=1):
|
||||
try:
|
||||
# 插入或更新电影信息
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO javdb_actors (name, href, is_full_data, updated_at)
|
||||
VALUES (?, ?, ?, datetime('now', 'localtime'))
|
||||
ON CONFLICT(href) DO UPDATE SET
|
||||
name=excluded.name, is_full_data=excluded.is_full_data, updated_at = datetime('now', 'localtime')
|
||||
""",
|
||||
(name, href, is_full_data)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
# 获取插入的 movie_id
|
||||
actor_id = get_id_by_href('javdb_actors', href)
|
||||
if actor_id is None:
|
||||
return None
|
||||
|
||||
return actor_id
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
logging.error("Error inserting movie: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
# 删除演员
|
||||
def delete_actor_by_href(href):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user