modify scripts
This commit is contained in:
@ -13,11 +13,9 @@ import utils
|
|||||||
config.setup_logging()
|
config.setup_logging()
|
||||||
|
|
||||||
debug = False
|
debug = False
|
||||||
force = False
|
|
||||||
skip_local = False
|
skip_local = False
|
||||||
from_actor = False
|
|
||||||
abnormal_only = False
|
|
||||||
fast_mode = False
|
fast_mode = False
|
||||||
|
update_mode = 0
|
||||||
|
|
||||||
# 获取演员列表
|
# 获取演员列表
|
||||||
def fetch_actor_list():
|
def fetch_actor_list():
|
||||||
@ -90,16 +88,18 @@ def fetch_series_list():
|
|||||||
|
|
||||||
# 更新makers列表中的影片信息
|
# 更新makers列表中的影片信息
|
||||||
def fetch_movies_by_maker():
|
def fetch_movies_by_maker():
|
||||||
if fast_mode:
|
|
||||||
url_list = db_tools.query_maker_hrefs(from_list=1)
|
|
||||||
else:
|
|
||||||
url_list = db_tools.query_maker_hrefs()
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
url_list = db_tools.query_maker_hrefs(name='muramura')
|
url_list = db_tools.query_maker_hrefs(name='muramura')
|
||||||
|
else:
|
||||||
|
if fast_mode:
|
||||||
|
url_list = db_tools.query_maker_hrefs(from_list=1)
|
||||||
|
else:
|
||||||
|
url_list = db_tools.query_maker_hrefs()
|
||||||
|
|
||||||
for row in url_list:
|
for row in url_list:
|
||||||
url = row['href']
|
url = row['href']
|
||||||
row_id = row['id']
|
row_id = row['id']
|
||||||
|
uncensored = row['from_list'] if row['from_list'] > 0 else None
|
||||||
# 去掉可下载的标志(如果有)
|
# 去掉可下载的标志(如果有)
|
||||||
next_url = utils.remove_url_query(url)
|
next_url = utils.remove_url_query(url)
|
||||||
while next_url:
|
while next_url:
|
||||||
@ -109,7 +109,7 @@ def fetch_movies_by_maker():
|
|||||||
list_data, next_url = scraper.parse_maker_detail(soup, next_url)
|
list_data, next_url = scraper.parse_maker_detail(soup, next_url)
|
||||||
if list_data:
|
if list_data:
|
||||||
for movie in list_data:
|
for movie in list_data:
|
||||||
tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_makers=1, maker_id=row_id)
|
tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_makers=1, maker_id=row_id, uncensored=uncensored)
|
||||||
if tmp_id:
|
if tmp_id:
|
||||||
logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}')
|
logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}')
|
||||||
else:
|
else:
|
||||||
@ -127,16 +127,18 @@ def fetch_movies_by_maker():
|
|||||||
|
|
||||||
# 更新series列表中的影片信息
|
# 更新series列表中的影片信息
|
||||||
def fetch_movies_by_series():
|
def fetch_movies_by_series():
|
||||||
if fast_mode:
|
|
||||||
url_list = db_tools.query_series_hrefs(from_list=1)
|
|
||||||
else:
|
|
||||||
url_list = db_tools.query_series_hrefs()
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
url_list = db_tools.query_series_hrefs(name='10musume')
|
url_list = db_tools.query_series_hrefs(name='10musume')
|
||||||
|
else:
|
||||||
|
if fast_mode:
|
||||||
|
url_list = db_tools.query_series_hrefs(from_list=1)
|
||||||
|
else:
|
||||||
|
url_list = db_tools.query_series_hrefs()
|
||||||
|
|
||||||
for row in url_list:
|
for row in url_list:
|
||||||
url = row['href']
|
url = row['href']
|
||||||
row_id = row['id']
|
row_id = row['id']
|
||||||
|
uncensored = row['from_list'] if row['from_list'] > 0 else None
|
||||||
# 去掉可下载的标志(如果有)
|
# 去掉可下载的标志(如果有)
|
||||||
next_url = utils.remove_url_query(url)
|
next_url = utils.remove_url_query(url)
|
||||||
while next_url:
|
while next_url:
|
||||||
@ -146,7 +148,7 @@ def fetch_movies_by_series():
|
|||||||
list_data, next_url = scraper.parse_series_detail(soup, next_url)
|
list_data, next_url = scraper.parse_series_detail(soup, next_url)
|
||||||
if list_data:
|
if list_data:
|
||||||
for movie in list_data:
|
for movie in list_data:
|
||||||
tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_series=1, series_id=row_id)
|
tmp_id = db_tools.insert_movie_index(title=movie['title'], href=movie['href'], from_movie_series=1, series_id=row_id, uncensored=uncensored)
|
||||||
if tmp_id:
|
if tmp_id:
|
||||||
logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}')
|
logging.debug(f'insert one movie index to db. movie_id: {tmp_id}, title: {movie['title']}, href: {movie['href']}')
|
||||||
else:
|
else:
|
||||||
@ -163,13 +165,14 @@ def fetch_movies_by_series():
|
|||||||
|
|
||||||
# 更新series列表中的影片信息
|
# 更新series列表中的影片信息
|
||||||
def fetch_movies_by_publishers():
|
def fetch_movies_by_publishers():
|
||||||
if fast_mode:
|
|
||||||
url_list = db_tools.query_publishers_hrefs(from_list=1)
|
|
||||||
else:
|
|
||||||
url_list = db_tools.query_publishers_hrefs()
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
url_list = db_tools.query_publishers_hrefs(limit=1)
|
url_list = db_tools.query_publishers_hrefs(limit=1)
|
||||||
|
else:
|
||||||
|
if fast_mode:
|
||||||
|
url_list = db_tools.query_publishers_hrefs(from_list=1)
|
||||||
|
else:
|
||||||
|
url_list = db_tools.query_publishers_hrefs()
|
||||||
|
|
||||||
for row in url_list:
|
for row in url_list:
|
||||||
url = row['href']
|
url = row['href']
|
||||||
row_id = row['id']
|
row_id = row['id']
|
||||||
@ -201,30 +204,34 @@ def fetch_movies_by_publishers():
|
|||||||
# 更新演员信息
|
# 更新演员信息
|
||||||
def fetch_performers_detail():
|
def fetch_performers_detail():
|
||||||
limit_count = 5 if debug else 100
|
limit_count = 5 if debug else 100
|
||||||
perfomers_list = []
|
performers_list = []
|
||||||
last_perfomer_id = 0
|
last_performer_id = 0
|
||||||
abnormal_codes = [scraper.http_code_404, scraper.http_code_login]
|
abnormal_codes = [scraper.http_code_404, scraper.http_code_login]
|
||||||
|
|
||||||
|
def get_performers(**kwargs):
|
||||||
|
if fast_mode:
|
||||||
|
kwargs["from_actor_list"] = 1
|
||||||
|
kwargs["order_by"] = 'id asc'
|
||||||
|
return db_tools.query_actors(limit=limit_count, **kwargs)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# 每次从数据库中取一部分,避免一次全量获取
|
if update_mode == 0: # 只遍历新纪录
|
||||||
if force: # 从头逐个遍历
|
performers_list = get_performers(start_id=0, is_full_data=0)
|
||||||
if from_actor:
|
elif update_mode == 1: # 只遍历完整纪录
|
||||||
if abnormal_only:
|
performers_list = get_performers(start_id=last_performer_id, is_full_data=1)
|
||||||
perfomers_list = db_tools.query_actors(start_id=last_perfomer_id, is_full_data_in =abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=1)
|
elif update_mode == 2: # 0+1
|
||||||
else:
|
performers_list = get_performers(start_id=last_performer_id, is_full_data_not_in=abnormal_codes)
|
||||||
perfomers_list = db_tools.query_actors(start_id=last_perfomer_id, is_full_data_not_in=abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=1)
|
elif update_mode == 3: # 其他
|
||||||
else:
|
performers_list = get_performers(start_id=last_performer_id, is_full_data_in =abnormal_codes)
|
||||||
if abnormal_only:
|
else: # 全部
|
||||||
perfomers_list = db_tools.query_actors(start_id=last_perfomer_id, is_full_data_in =abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=0)
|
performers_list = get_performers(start_id=last_performer_id)
|
||||||
else:
|
|
||||||
perfomers_list = db_tools.query_actors(start_id=last_perfomer_id, is_full_data_not_in=abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=0)
|
if len(performers_list) < 1:
|
||||||
else: # 只做更新
|
|
||||||
perfomers_list = db_tools.query_actors(is_full_data=0, limit=limit_count)
|
|
||||||
if len(perfomers_list) < 1:
|
|
||||||
logging.info(f'all performers fetched.')
|
logging.info(f'all performers fetched.')
|
||||||
break
|
break
|
||||||
|
|
||||||
succ_rows = 0
|
succ_rows = 0
|
||||||
for performer in perfomers_list:
|
for performer in performers_list:
|
||||||
url = performer['href']
|
url = performer['href']
|
||||||
person = performer['name']
|
person = performer['name']
|
||||||
pic = ''
|
pic = ''
|
||||||
@ -249,7 +256,7 @@ def fetch_performers_detail():
|
|||||||
need_insert = False
|
need_insert = False
|
||||||
break
|
break
|
||||||
elif status_code and status_code == scraper.http_code_login:
|
elif status_code and status_code == scraper.http_code_login:
|
||||||
actor_id = db_tools.insert_or_update_movie_404(name=person, href=url, is_full_data=scraper.http_code_login)
|
actor_id = db_tools.insert_or_update_actor_404(name=person, href=url, is_full_data=scraper.http_code_login)
|
||||||
logging.warning(f'401 page(need login). id: {actor_id}, name: ({person}), url: {url}, Skiping...')
|
logging.warning(f'401 page(need login). id: {actor_id}, name: ({person}), url: {url}, Skiping...')
|
||||||
need_insert = False
|
need_insert = False
|
||||||
break
|
break
|
||||||
@ -270,13 +277,13 @@ def fetch_performers_detail():
|
|||||||
})
|
})
|
||||||
if performer_id:
|
if performer_id:
|
||||||
logging.debug(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
logging.debug(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
||||||
last_perfomer_id = performer_id
|
last_performer_id = performer_id
|
||||||
succ_rows += 1
|
succ_rows += 1
|
||||||
else:
|
else:
|
||||||
logging.warning(f'insert person: ({person}) {url} failed.')
|
logging.warning(f'insert person: ({person}) {url} failed.')
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
logging.info(f'total request: {len(perfomers_list)}, succ: {succ_rows}, last performer id: {last_perfomer_id}')
|
logging.info(f'total request: {len(performers_list)}, succ: {succ_rows}, last performer id: {last_performer_id}')
|
||||||
# 调试break
|
# 调试break
|
||||||
if debug:
|
if debug:
|
||||||
return True
|
return True
|
||||||
@ -287,23 +294,29 @@ def fetch_movies_detail():
|
|||||||
movies_list = []
|
movies_list = []
|
||||||
last_movie_id = 0
|
last_movie_id = 0
|
||||||
abnormal_codes = [scraper.http_code_404, scraper.http_code_login]
|
abnormal_codes = [scraper.http_code_404, scraper.http_code_login]
|
||||||
while True:
|
|
||||||
if force: # 从头逐个遍历
|
def get_movies(**kwargs):
|
||||||
if from_actor:
|
if fast_mode:
|
||||||
if abnormal_only:
|
kwargs["uncensored"] = 1
|
||||||
movies_list = db_tools.query_movie_hrefs(start_id=last_movie_id, is_full_data_in =abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=1)
|
kwargs["order_by"] = 'id asc'
|
||||||
else:
|
return db_tools.query_movie_hrefs(limit=limit_count, **kwargs)
|
||||||
movies_list = db_tools.query_movie_hrefs(start_id=last_movie_id, is_full_data_not_in=abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=1)
|
|
||||||
else:
|
while True:
|
||||||
if abnormal_only:
|
if update_mode == 0: # 只遍历新纪录
|
||||||
movies_list = db_tools.query_movie_hrefs(start_id=last_movie_id, is_full_data_in =abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=0)
|
movies_list = get_movies(start_id=0, is_full_data=0)
|
||||||
else:
|
elif update_mode == 1: # 只遍历完整纪录
|
||||||
movies_list = db_tools.query_movie_hrefs(start_id=last_movie_id, is_full_data_not_in=abnormal_codes, order_by='id asc', limit=limit_count, from_actor_list=0)
|
movies_list = get_movies(start_id=last_movie_id, is_full_data=1)
|
||||||
else: # 只做更新
|
elif update_mode == 2: # 0+1
|
||||||
movies_list = db_tools.query_movie_hrefs(is_full_data=0, limit=limit_count)
|
movies_list = get_movies(start_id=last_movie_id, is_full_data_not_in=abnormal_codes)
|
||||||
|
elif update_mode == 3: # 其他
|
||||||
|
movies_list = get_movies(start_id=last_movie_id, is_full_data_in =abnormal_codes)
|
||||||
|
else: # 全部
|
||||||
|
movies_list = get_movies(start_id=last_movie_id)
|
||||||
|
|
||||||
if len(movies_list) < 1:
|
if len(movies_list) < 1:
|
||||||
logging.info(f'all movies fetched.')
|
logging.info(f'all performers fetched.')
|
||||||
break
|
break
|
||||||
|
|
||||||
succ_count = 0
|
succ_count = 0
|
||||||
for movie in movies_list:
|
for movie in movies_list:
|
||||||
url = movie['href']
|
url = movie['href']
|
||||||
@ -399,33 +412,26 @@ def set_env(args):
|
|||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
global force
|
|
||||||
force = args.force
|
|
||||||
|
|
||||||
global skip_local
|
global skip_local
|
||||||
skip_local = args.skip_local
|
skip_local = args.skip_local
|
||||||
|
|
||||||
global from_actor
|
|
||||||
from_actor = args.from_actor
|
|
||||||
|
|
||||||
global abnormal_only
|
|
||||||
abnormal_only = args.abnormal_only
|
|
||||||
|
|
||||||
global fast_mode
|
global fast_mode
|
||||||
fast_mode = args.fast_mode
|
fast_mode = args.fast_mode
|
||||||
|
|
||||||
|
global update_mode
|
||||||
|
if args.update:
|
||||||
|
update_mode = args.update
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# 命令行参数处理
|
# 命令行参数处理
|
||||||
keys_str = ",".join(function_map.keys())
|
keys_str = ",".join(function_map.keys())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='fetch javdb data.')
|
parser = argparse.ArgumentParser(description='fetch javdb data.')
|
||||||
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
||||||
|
parser.add_argument('--update', type=int, choices=[0, 1, 2, 3, 4], default=0, help='0-只遍历is_full_data=0, 1-只遍历is_full_data=1, 2-遍历is_full_data<=1, 3-只遍历is_full_data>1(异常数据), 4-遍历所有')
|
||||||
|
parser.add_argument('--fast_mode', action='store_true', help='只遍历所有 uncensored 的 makers/series/actors/movies')
|
||||||
|
parser.add_argument('--skip_local', action='store_true', help='如果本地缓存了页面,则跳过数据库操作')
|
||||||
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
||||||
parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
|
|
||||||
parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
|
|
||||||
parser.add_argument('--from_actor', action='store_true', help='只遍历来自 actor_list 的 演员或者影片 (在force模式下有效)')
|
|
||||||
parser.add_argument('--abnormal_only', action='store_true', help='只遍历异常URL(404或者需要登陆查看等) 的 演员或影片 (在force模式下有效)')
|
|
||||||
parser.add_argument('--fast_mode', action='store_true', help='只遍历所有 uncensored 的 makers 和 series ')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
set_env(args)
|
set_env(args)
|
||||||
|
|||||||
@ -64,7 +64,7 @@ def insert_actor_index(name, href, from_actor_list=None, from_movie_list=None):
|
|||||||
logging.error(f"未知错误: {e}")
|
logging.error(f"未知错误: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def insert_movie_index(title, href, from_actor_list=None, from_movie_makers=None, from_movie_series=None, maker_id=None, series_id=None, from_movie_publishers=None, pub_id=None):
|
def insert_movie_index_old(title, href, from_actor_list=None, from_movie_makers=None, from_movie_series=None, maker_id=None, series_id=None, from_movie_publishers=None, pub_id=None):
|
||||||
try:
|
try:
|
||||||
# **先检查数据库中是否已有该电影**
|
# **先检查数据库中是否已有该电影**
|
||||||
cursor.execute("SELECT id, from_actor_list, from_movie_makers, from_movie_series, maker_id, series_id, from_movie_publishers, pub_id FROM javdb_movies WHERE href = ?", (href,))
|
cursor.execute("SELECT id, from_actor_list, from_movie_makers, from_movie_series, maker_id, series_id, from_movie_publishers, pub_id FROM javdb_movies WHERE href = ?", (href,))
|
||||||
@ -114,6 +114,59 @@ def insert_movie_index(title, href, from_actor_list=None, from_movie_makers=None
|
|||||||
logging.error(f"Error inserting/updating movie: {e}")
|
logging.error(f"Error inserting/updating movie: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def insert_movie_index(title, href, **kwargs):
|
||||||
|
try:
|
||||||
|
# 先检查数据库中是否已有该电影
|
||||||
|
cursor.execute("SELECT * FROM javdb_movies WHERE href = ?", (href,))
|
||||||
|
existing_movie = cursor.fetchone()
|
||||||
|
|
||||||
|
# 获取列名
|
||||||
|
column_names = [description[0] for description in cursor.description]
|
||||||
|
|
||||||
|
fields = [
|
||||||
|
'from_actor_list', 'from_movie_makers', 'from_movie_series', 'from_movie_publishers',
|
||||||
|
'maker_id', 'series_id', 'pub_id', 'uncensored'
|
||||||
|
]
|
||||||
|
|
||||||
|
if existing_movie: # 如果电影已存在
|
||||||
|
existing_values = dict(zip(column_names, existing_movie))
|
||||||
|
movie_id = existing_values['id']
|
||||||
|
logging.debug(f"values in db: {existing_values}")
|
||||||
|
|
||||||
|
# 如果没有传入值,就用原来的值
|
||||||
|
for field in fields:
|
||||||
|
kwargs[field] = kwargs.get(field) if kwargs.get(field) is not None else existing_values[field]
|
||||||
|
|
||||||
|
set_clauses = ", ".join([f"{field} = ?" for field in fields])
|
||||||
|
sql = f"""
|
||||||
|
UPDATE javdb_movies
|
||||||
|
SET title = ?, {set_clauses}, updated_at = datetime('now', 'localtime')
|
||||||
|
WHERE href = ?
|
||||||
|
"""
|
||||||
|
values = [title] + [kwargs[field] for field in fields] + [href]
|
||||||
|
logging.debug(f"sql: {sql}, values: {values}")
|
||||||
|
cursor.execute(sql, values)
|
||||||
|
else: # 如果电影不存在,插入
|
||||||
|
columns = ', '.join(['title', 'href'] + fields)
|
||||||
|
placeholders = ', '.join(['?'] * (len(fields) + 2))
|
||||||
|
sql = f"INSERT INTO javdb_movies ({columns}) VALUES ({placeholders})"
|
||||||
|
values = [title, href] + [kwargs.get(field, 0) for field in fields]
|
||||||
|
logging.debug(f"sql: {sql}, values: {values}")
|
||||||
|
cursor.execute(sql, values)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
movie_id = get_id_by_href('javdb_movies', href)
|
||||||
|
if movie_id:
|
||||||
|
logging.debug(f'Inserted/Updated movie index, id: {movie_id}, title: {title}, href: {href}')
|
||||||
|
|
||||||
|
return movie_id
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
logging.error(f"Error inserting/updating movie: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# 插入演员和电影的关联数据
|
# 插入演员和电影的关联数据
|
||||||
def insert_actor_movie(performer_id, movie_id, tags=''):
|
def insert_actor_movie(performer_id, movie_id, tags=''):
|
||||||
@ -145,10 +198,11 @@ def insert_or_update_actor(actor):
|
|||||||
ON CONFLICT(href) DO UPDATE SET name=excluded.name, pic=excluded.pic, is_full_data=1, updated_at=datetime('now', 'localtime')
|
ON CONFLICT(href) DO UPDATE SET name=excluded.name, pic=excluded.pic, is_full_data=1, updated_at=datetime('now', 'localtime')
|
||||||
''', (actor['name'], actor['href'], actor['pic']))
|
''', (actor['name'], actor['href'], actor['pic']))
|
||||||
|
|
||||||
cursor.execute('SELECT id FROM javdb_actors WHERE href = ?', (actor['href'],))
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
actor_id = get_id_by_href('javdb_actors', actor['href'])
|
# 查询刚插入的数据
|
||||||
|
cursor.execute('SELECT id, from_actor_list FROM javdb_actors WHERE href = ?', (actor['href'],))
|
||||||
|
actor_id, uncensored = cursor.fetchone()
|
||||||
if actor_id is None:
|
if actor_id is None:
|
||||||
logging.warning(f'insert data error. name: {actor['name']}, href: {actor['href']}')
|
logging.warning(f'insert data error. name: {actor['name']}, href: {actor['href']}')
|
||||||
return None
|
return None
|
||||||
@ -166,10 +220,10 @@ def insert_or_update_actor(actor):
|
|||||||
|
|
||||||
# 插入影片列表
|
# 插入影片列表
|
||||||
for movie in actor.get("credits") or []:
|
for movie in actor.get("credits") or []:
|
||||||
movie_id = get_id_by_href('javdb_movies', movie['href'])
|
# from_actor_list = 1 表示无码影星的,其他不处理
|
||||||
# 影片不存在,先插入
|
if uncensored and uncensored > 0:
|
||||||
if movie_id is None:
|
movie_id = insert_movie_index(movie['title'], movie['href'], from_actor_list=1, uncensored=uncensored)
|
||||||
# TODO: from_actor_list 只标记无码女优的话,这里要修改,暂时不动
|
else:
|
||||||
movie_id = insert_movie_index(movie['title'], movie['href'], from_actor_list=1)
|
movie_id = insert_movie_index(movie['title'], movie['href'], from_actor_list=1)
|
||||||
if movie_id:
|
if movie_id:
|
||||||
tmp_id = insert_actor_movie(actor_id, movie_id)
|
tmp_id = insert_actor_movie(actor_id, movie_id)
|
||||||
@ -227,47 +281,40 @@ def query_actors(**filters):
|
|||||||
sql = "SELECT href, name FROM javdb_actors WHERE 1=1"
|
sql = "SELECT href, name FROM javdb_actors WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
if "id" in filters:
|
conditions = {
|
||||||
sql += " AND id = ?"
|
"id": " AND id = ?",
|
||||||
params.append(filters["id"])
|
"href": " AND href = ?",
|
||||||
if "href" in filters:
|
"name": " AND name LIKE ?",
|
||||||
sql += " AND href = ?"
|
"is_full_data": " AND is_full_data = ?",
|
||||||
params.append(filters["href"])
|
"from_actor_list": " AND from_actor_list = ?",
|
||||||
if "name" in filters:
|
"before_updated_at": " AND updated_at <= ?",
|
||||||
sql += " AND name LIKE ?"
|
"after_updated_at": " AND updated_at >= ?",
|
||||||
params.append(f"%{filters['name']}%")
|
"start_id": " AND id > ?",
|
||||||
if "is_full_data" in filters:
|
}
|
||||||
sql += " AND is_full_data = ?"
|
|
||||||
params.append(filters["is_full_data"])
|
for key, condition in conditions.items():
|
||||||
if "from_actor_list" in filters:
|
if key in filters:
|
||||||
sql += " AND from_actor_list = ?"
|
sql += condition
|
||||||
params.append(filters["from_actor_list"])
|
if key == "name":
|
||||||
if "is_full_data_in" in filters:
|
params.append(f"%{filters[key]}%")
|
||||||
values = filters["is_full_data_in"]
|
else:
|
||||||
if values:
|
params.append(filters[key])
|
||||||
placeholders = ", ".join(["?"] * len(values))
|
|
||||||
sql += f" AND is_full_data IN ({placeholders})"
|
for key in ["is_full_data_in", "is_full_data_not_in"]:
|
||||||
params.extend(values)
|
if key in filters:
|
||||||
if "is_full_data_not_in" in filters:
|
values = filters[key]
|
||||||
values = filters["is_full_data_not_in"]
|
if values:
|
||||||
if values:
|
placeholders = ", ".join(["?"] * len(values))
|
||||||
placeholders = ", ".join(["?"] * len(values))
|
operator = "IN" if key == "is_full_data_in" else "NOT IN"
|
||||||
sql += f" AND is_full_data NOT IN ({placeholders})"
|
sql += f" AND is_full_data {operator} ({placeholders})"
|
||||||
params.extend(values)
|
params.extend(values)
|
||||||
if "before_updated_at" in filters:
|
|
||||||
sql += " AND updated_at <= ?"
|
|
||||||
params.append(filters["before_updated_at"])
|
|
||||||
if "after_updated_at" in filters:
|
|
||||||
sql += " AND updated_at >= ?"
|
|
||||||
params.append(filters["after_updated_at"])
|
|
||||||
if "start_id" in filters:
|
|
||||||
sql += " AND id > ?"
|
|
||||||
params.append(filters["start_id"])
|
|
||||||
if "order_by" in filters:
|
if "order_by" in filters:
|
||||||
sql += " order by ? asc"
|
# 注意:这里 order by 后面直接跟字段名,不能用占位符,否则会被当作字符串处理
|
||||||
params.append(filters["order_by"])
|
sql += f" ORDER BY {filters['order_by']} "
|
||||||
|
|
||||||
if 'limit' in filters:
|
if 'limit' in filters:
|
||||||
sql += " limit ?"
|
sql += " LIMIT ?"
|
||||||
params.append(filters["limit"])
|
params.append(filters["limit"])
|
||||||
|
|
||||||
cursor.execute(sql, params)
|
cursor.execute(sql, params)
|
||||||
@ -353,7 +400,7 @@ def query_maker(identifier):
|
|||||||
# 按条件查询 href 列表
|
# 按条件查询 href 列表
|
||||||
def query_maker_hrefs(**filters):
|
def query_maker_hrefs(**filters):
|
||||||
try:
|
try:
|
||||||
sql = "SELECT href, id FROM javdb_makers WHERE 1=1"
|
sql = "SELECT href, id, from_list FROM javdb_makers WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
@ -368,10 +415,13 @@ def query_maker_hrefs(**filters):
|
|||||||
if "name" in filters:
|
if "name" in filters:
|
||||||
sql += " AND name LIKE ?"
|
sql += " AND name LIKE ?"
|
||||||
params.append(f"%{filters['name']}%")
|
params.append(f"%{filters['name']}%")
|
||||||
|
if 'limit' in filters:
|
||||||
|
sql += " limit ?"
|
||||||
|
params.append(filters["limit"])
|
||||||
|
|
||||||
cursor.execute(sql, params)
|
cursor.execute(sql, params)
|
||||||
#return [row[0] for row in cursor.fetchall()] # 链接使用小写
|
#return [row[0] for row in cursor.fetchall()] # 链接使用小写
|
||||||
return [{'href': row[0], 'id': row[1]} for row in cursor.fetchall()]
|
return [{'href': row[0], 'id': row[1], 'from_list':row[2]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
logging.error(f"查询 href 失败: {e}")
|
logging.error(f"查询 href 失败: {e}")
|
||||||
@ -451,7 +501,7 @@ def query_series(identifier):
|
|||||||
# 按条件查询 href 列表
|
# 按条件查询 href 列表
|
||||||
def query_series_hrefs(**filters):
|
def query_series_hrefs(**filters):
|
||||||
try:
|
try:
|
||||||
sql = "SELECT href, id FROM javdb_series WHERE 1=1"
|
sql = "SELECT href, id, from_list FROM javdb_series WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
@ -466,10 +516,14 @@ def query_series_hrefs(**filters):
|
|||||||
if "name" in filters:
|
if "name" in filters:
|
||||||
sql += " AND name LIKE ?"
|
sql += " AND name LIKE ?"
|
||||||
params.append(f"%{filters['name']}%")
|
params.append(f"%{filters['name']}%")
|
||||||
|
if 'limit' in filters:
|
||||||
|
sql += " limit ?"
|
||||||
|
params.append(filters["limit"])
|
||||||
|
|
||||||
cursor.execute(sql, params)
|
cursor.execute(sql, params)
|
||||||
#return [row[0] for row in cursor.fetchall()] # 链接使用小写
|
#return [row[0] for row in cursor.fetchall()] # 链接使用小写
|
||||||
return [{'href': row[0], 'id': row[1]} for row in cursor.fetchall()]
|
#return [{'href': row[0], 'id': row[1]} for row in cursor.fetchall()]
|
||||||
|
return [{'href': row[0], 'id': row[1], 'from_list':row[2]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
logging.error(f"查询 href 失败: {e}")
|
logging.error(f"查询 href 失败: {e}")
|
||||||
@ -795,7 +849,7 @@ def query_movies(identifier):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# 按条件查询 href 列表
|
# 按条件查询 href 列表
|
||||||
def query_movie_hrefs(**filters):
|
def query_movie_hrefs_old(**filters):
|
||||||
try:
|
try:
|
||||||
sql = "SELECT href, title, id FROM javdb_movies WHERE 1=1"
|
sql = "SELECT href, title, id FROM javdb_movies WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
@ -850,7 +904,58 @@ def query_movie_hrefs(**filters):
|
|||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
logging.error(f"查询 href 失败: {e}")
|
logging.error(f"查询 href 失败: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# 查询
|
||||||
|
def query_movie_hrefs(**filters):
|
||||||
|
try:
|
||||||
|
sql = "SELECT href, name FROM javdb_movies WHERE 1=1"
|
||||||
|
params = []
|
||||||
|
|
||||||
|
conditions = {
|
||||||
|
"id": " AND id = ?",
|
||||||
|
"href": " AND href = ?",
|
||||||
|
"title": " AND title LIKE ?",
|
||||||
|
"is_full_data": " AND is_full_data = ?",
|
||||||
|
"uncensored": " AND uncensored = ?",
|
||||||
|
"from_actor_list": " AND from_actor_list = ?",
|
||||||
|
"before_updated_at": " AND updated_at <= ?",
|
||||||
|
"after_updated_at": " AND updated_at >= ?",
|
||||||
|
"start_id": " AND id > ?",
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, condition in conditions.items():
|
||||||
|
if key in filters:
|
||||||
|
sql += condition
|
||||||
|
if key == "title":
|
||||||
|
params.append(f"%{filters[key]}%")
|
||||||
|
else:
|
||||||
|
params.append(filters[key])
|
||||||
|
|
||||||
|
for key in ["is_full_data_in", "is_full_data_not_in"]:
|
||||||
|
if key in filters:
|
||||||
|
values = filters[key]
|
||||||
|
if values:
|
||||||
|
placeholders = ", ".join(["?"] * len(values))
|
||||||
|
operator = "IN" if key == "is_full_data_in" else "NOT IN"
|
||||||
|
sql += f" AND is_full_data {operator} ({placeholders})"
|
||||||
|
params.extend(values)
|
||||||
|
|
||||||
|
if "order_by" in filters:
|
||||||
|
# 注意:这里 order by 后面直接跟字段名,不能用占位符,否则会被当作字符串处理
|
||||||
|
sql += f" ORDER BY {filters['order_by']} "
|
||||||
|
|
||||||
|
if 'limit' in filters:
|
||||||
|
sql += " LIMIT ?"
|
||||||
|
params.append(filters["limit"])
|
||||||
|
|
||||||
|
cursor.execute(sql, params)
|
||||||
|
#return [row[0].lower() for row in cursor.fetchall()] # 返回小写
|
||||||
|
return [{'href': row[0], 'name': row[1]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
logging.error(f"查询 href 失败: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# 插入一条任务日志
|
# 插入一条任务日志
|
||||||
def insert_task_log():
|
def insert_task_log():
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user