modify scripts
This commit is contained in:
@ -14,6 +14,7 @@ config.setup_logging()
|
|||||||
|
|
||||||
debug = False
|
debug = False
|
||||||
force = False
|
force = False
|
||||||
|
skip_local = True
|
||||||
|
|
||||||
# 按星座获取演员列表,无翻页
|
# 按星座获取演员列表,无翻页
|
||||||
def fetch_performers_by_astro():
|
def fetch_performers_by_astro():
|
||||||
@ -242,8 +243,13 @@ def fetch_performers_detail_once(perfomers_list):
|
|||||||
for performer in perfomers_list:
|
for performer in perfomers_list:
|
||||||
url = performer['href']
|
url = performer['href']
|
||||||
person = performer['name']
|
person = performer['name']
|
||||||
|
curr_id = performer['id']
|
||||||
logging.debug(f"Fetching data for performer ({person}), url {url} ...")
|
logging.debug(f"Fetching data for performer ({person}), url {url} ...")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id"))
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="headshot", attr_type="id"))
|
||||||
|
# 从本地读取的文件,忽略
|
||||||
|
if skip_local and status_code == 99 :
|
||||||
|
last_performer_id = curr_id
|
||||||
|
continue
|
||||||
if soup:
|
if soup:
|
||||||
data = scraper.parse_page_performer(soup, url)
|
data = scraper.parse_page_performer(soup, url)
|
||||||
if data:
|
if data:
|
||||||
@ -326,8 +332,13 @@ def fetch_movies_detail():
|
|||||||
for movie in movies_list:
|
for movie in movies_list:
|
||||||
url = movie['href']
|
url = movie['href']
|
||||||
title = movie['title']
|
title = movie['title']
|
||||||
logging.debug(f"Fetching data for movie ({title}), url {url} ...")
|
curr_id = movie['id']
|
||||||
|
logging.debug(f"Fetching data for movie: {curr_id}: ({title}), url {url} ...")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="col-xs-12 col-sm-3", attr_type="class"))
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="col-xs-12 col-sm-3", attr_type="class"))
|
||||||
|
# 从本地读取的文件,忽略
|
||||||
|
if skip_local and status_code == 99 :
|
||||||
|
last_movie_id = curr_id
|
||||||
|
continue
|
||||||
if soup:
|
if soup:
|
||||||
movie_data = scraper.parse_page_movie(soup, url, title)
|
movie_data = scraper.parse_page_movie(soup, url, title)
|
||||||
if movie_data :
|
if movie_data :
|
||||||
@ -378,7 +389,7 @@ function_map = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 主函数
|
# 主函数
|
||||||
def main(cmd, args_debug, args_force):
|
def main(cmd, args_debug, args_force, args_skip_local):
|
||||||
global debug
|
global debug
|
||||||
debug = args_debug
|
debug = args_debug
|
||||||
if debug:
|
if debug:
|
||||||
@ -388,6 +399,9 @@ def main(cmd, args_debug, args_force):
|
|||||||
global force
|
global force
|
||||||
force = args_force
|
force = args_force
|
||||||
|
|
||||||
|
global skip_local
|
||||||
|
skip_local = args_skip_local
|
||||||
|
|
||||||
# 开启任务
|
# 开启任务
|
||||||
task_id = db_tools.insert_task_log()
|
task_id = db_tools.insert_task_log()
|
||||||
if task_id is None:
|
if task_id is None:
|
||||||
@ -431,6 +445,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
||||||
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
||||||
parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
|
parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
|
||||||
|
parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
main(args.cmd, args.debug, args.force)
|
main(args.cmd, args.debug, args.force, args.skip_local)
|
||||||
|
|||||||
@ -329,7 +329,7 @@ def query_performer(identifier):
|
|||||||
# 按条件查询 href 列表
|
# 按条件查询 href 列表
|
||||||
def query_performer_hrefs(**filters):
|
def query_performer_hrefs(**filters):
|
||||||
try:
|
try:
|
||||||
sql = "SELECT href, name FROM iafd_performers WHERE 1=1"
|
sql = "SELECT href, name, id FROM iafd_performers WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
@ -374,7 +374,7 @@ def query_performer_hrefs(**filters):
|
|||||||
logging.debug(f"query sql: {sql}")
|
logging.debug(f"query sql: {sql}")
|
||||||
cursor.execute(sql, params)
|
cursor.execute(sql, params)
|
||||||
#return [row[0].lower() for row in cursor.fetchall()] # 返回小写
|
#return [row[0].lower() for row in cursor.fetchall()] # 返回小写
|
||||||
return [{'href': row[0], 'name': row[1]} for row in cursor.fetchall()]
|
return [{'href': row[0], 'name': row[1], 'id':row[2]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
logging.error(f"查询 href 失败: {e}")
|
logging.error(f"查询 href 失败: {e}")
|
||||||
@ -756,7 +756,7 @@ def query_movies(identifier):
|
|||||||
# 按条件查询 href 列表
|
# 按条件查询 href 列表
|
||||||
def query_movie_hrefs(**filters):
|
def query_movie_hrefs(**filters):
|
||||||
try:
|
try:
|
||||||
sql = "SELECT href, title FROM iafd_movies WHERE 1=1"
|
sql = "SELECT href, title, id FROM iafd_movies WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
@ -802,7 +802,7 @@ def query_movie_hrefs(**filters):
|
|||||||
logging.debug(f"query sql: {sql}")
|
logging.debug(f"query sql: {sql}")
|
||||||
cursor.execute(sql, params)
|
cursor.execute(sql, params)
|
||||||
#return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写
|
#return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写
|
||||||
return [{'href': row[0], 'title': row[1]} for row in cursor.fetchall()]
|
return [{'href': row[0], 'title': row[1], 'id':row[2]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
logging.error(f"查询 href 失败: {e}")
|
logging.error(f"查询 href 失败: {e}")
|
||||||
|
|||||||
@ -14,6 +14,7 @@ config.setup_logging()
|
|||||||
|
|
||||||
debug = False
|
debug = False
|
||||||
force = False
|
force = False
|
||||||
|
skip_local = True
|
||||||
|
|
||||||
# 获取演员列表
|
# 获取演员列表
|
||||||
def fetch_actor_list():
|
def fetch_actor_list():
|
||||||
@ -236,8 +237,14 @@ def fetch_movies_detail():
|
|||||||
for movie in movies_list:
|
for movie in movies_list:
|
||||||
url = movie['href']
|
url = movie['href']
|
||||||
title = movie['title']
|
title = movie['title']
|
||||||
|
curr_id = movie['id']
|
||||||
logging.debug(f"Fetching data for movie ({title}), url {url} ...")
|
logging.debug(f"Fetching data for movie ({title}), url {url} ...")
|
||||||
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="video-meta-panel", attr_type="class"))
|
soup, status_code = scraper.fetch_page(url, partial(scraper.generic_validator, tag="div", identifier="video-meta-panel", attr_type="class"))
|
||||||
|
# 从本地读取的文件,忽略
|
||||||
|
if skip_local and status_code == 99 :
|
||||||
|
last_movie_id = curr_id
|
||||||
|
continue
|
||||||
|
# 解析页面,写入数据库
|
||||||
if soup:
|
if soup:
|
||||||
movie_data = scraper.parse_movie_detail(soup, url, title)
|
movie_data = scraper.parse_movie_detail(soup, url, title)
|
||||||
if movie_data :
|
if movie_data :
|
||||||
@ -278,7 +285,7 @@ function_map = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 主函数
|
# 主函数
|
||||||
def main(cmd, args_debug, args_force):
|
def main(cmd, args_debug, args_force, args_skip_local):
|
||||||
global debug
|
global debug
|
||||||
debug = args_debug
|
debug = args_debug
|
||||||
if debug:
|
if debug:
|
||||||
@ -288,13 +295,16 @@ def main(cmd, args_debug, args_force):
|
|||||||
global force
|
global force
|
||||||
force = args_force
|
force = args_force
|
||||||
|
|
||||||
|
global skip_local
|
||||||
|
skip_local = args_skip_local
|
||||||
|
|
||||||
# 开启任务
|
# 开启任务
|
||||||
task_id = db_tools.insert_task_log()
|
task_id = db_tools.insert_task_log()
|
||||||
if task_id is None:
|
if task_id is None:
|
||||||
logging.warning(f'insert task log error.')
|
logging.warning(f'insert task log error.')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
logging.info(f'running task. id: {task_id}, debug: {debug}, force: {force}, cmd: {cmd}')
|
logging.info(f'running task. id: {task_id}, debug: {debug}, force: {force}, skip_local: {skip_local}, cmd: {cmd}')
|
||||||
|
|
||||||
# 执行指定的函数
|
# 执行指定的函数
|
||||||
if cmd:
|
if cmd:
|
||||||
@ -328,6 +338,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
parser.add_argument("--cmd", type=str, help=f"Comma-separated list of function shortcuts: {keys_str}")
|
||||||
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
|
||||||
parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
|
parser.add_argument('--force', action='store_true', help='force update (true for rewrite all)')
|
||||||
|
parser.add_argument('--skip_local', action='store_true', help='skip if cached html (true for skip)')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
main(args.cmd, args.debug, args.force)
|
main(args.cmd, args.debug, args.force, args.skip_local)
|
||||||
|
|||||||
@ -595,7 +595,7 @@ def query_movies(identifier):
|
|||||||
# 按条件查询 href 列表
|
# 按条件查询 href 列表
|
||||||
def query_movie_hrefs(**filters):
|
def query_movie_hrefs(**filters):
|
||||||
try:
|
try:
|
||||||
sql = "SELECT href, title FROM javdb_movies WHERE 1=1"
|
sql = "SELECT href, title, id FROM javdb_movies WHERE 1=1"
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
if "id" in filters:
|
if "id" in filters:
|
||||||
@ -643,7 +643,7 @@ def query_movie_hrefs(**filters):
|
|||||||
|
|
||||||
cursor.execute(sql, params)
|
cursor.execute(sql, params)
|
||||||
#return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写
|
#return [row[0].lower() for row in cursor.fetchall()] # 链接使用小写
|
||||||
return [{'href': row[0], 'title': row[1]} for row in cursor.fetchall()]
|
return [{'href': row[0], 'title': row[1], 'id':row[2]} for row in cursor.fetchall()]
|
||||||
|
|
||||||
except sqlite3.Error as e:
|
except sqlite3.Error as e:
|
||||||
logging.error(f"查询 href 失败: {e}")
|
logging.error(f"查询 href 失败: {e}")
|
||||||
|
|||||||
Reference in New Issue
Block a user