import json import time import csv import argparse import logging from functools import partial import config import sqlite_utils as db_tools import iafd_scraper as scraper import utils config.setup_logging() res_dir = '/root/hostdir/scripts_data/iafd_202503' # 演员列表 def load_performer_list(file, **from_fields): json_data = utils.read_json(file) if json_data is None: json_data = [] total_rows = len(json_data) loaded_rows = 0 succ = 0 for row in json_data: row_id = db_tools.insert_performer_index(name=row.get('person', ''), href=row.get('href', ''), **from_fields ) if row_id: logging.debug(f'insert one person, id: {row_id}, person: {row['person']}, url: {row['href']}') succ += 1 else: logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.') loaded_rows += 1 if loaded_rows % 10000 == 0: logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}') logging.info(f'load data succ. file: {file}, rows: {total_rows}, succ rows: {succ}') # movie 列表 def load_movie_list(file, **from_fields): json_data = utils.read_json(file) if json_data is None: json_data = [] total_rows = len(json_data) loaded_rows = 0 succ = 0 for row in json_data: row_id = db_tools.insert_movie_index(title=row.get('title', ''), href=row.get('href', ''), release_year=utils.to_number(row['year']), **from_fields ) if row_id: logging.debug(f'insert one movie, id: {row_id}, title: {row['title']}, url: {row['href']}') succ += 1 else: logging.warning(f'insert movie failed: {row['title']}, {row['href']} failed.') loaded_rows += 1 if loaded_rows % 10000 == 0: logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}') logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}') # 演员详情 def load_performers(file): json_data = utils.read_json(file) if json_data is None: json_data = [] total_rows = len(json_data) loaded_rows = 0 succ = 0 for row in json_data: performer_id = db_tools.insert_or_update_performer(row) if performer_id: logging.debug(f'insert one person, id: {performer_id}, person: {row['person']}, url: {row['href']}') succ += 1 else: logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.') loaded_rows += 1 if loaded_rows % 10000 == 0: logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}') logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}') if __name__ == "__main__": load_performer_list(f'{res_dir}/astro.json', from_astro_list=1) time.sleep(3) load_performer_list(f'{res_dir}/birth.json', from_birth_list=1) time.sleep(3) load_performer_list(f'{res_dir}/ethnic.json', from_ethnic_list=1) time.sleep(3) load_movie_list(f'{res_dir}/distributors.json', from_dist_list=1) time.sleep(3) load_movie_list(f'{res_dir}/studios.json', from_stu_list=1) time.sleep(3) load_performers(f'{res_dir}/performers.json')