108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
|
|
import json
|
|
import time
|
|
import csv
|
|
import argparse
|
|
import logging
|
|
from functools import partial
|
|
import config
|
|
import sqlite_utils as db_tools
|
|
import iafd_scraper as scraper
|
|
import utils
|
|
|
|
config.setup_logging()
|
|
|
|
res_dir = '/root/hostdir/scripts_data/iafd_202503'
|
|
|
|
# 演员列表
|
|
def load_performer_list(file, **from_fields):
|
|
json_data = utils.read_json(file)
|
|
if json_data is None:
|
|
json_data = []
|
|
|
|
total_rows = len(json_data)
|
|
loaded_rows = 0
|
|
succ = 0
|
|
for row in json_data:
|
|
row_id = db_tools.insert_performer_index(name=row.get('person', ''),
|
|
href=row.get('href', ''),
|
|
**from_fields
|
|
)
|
|
if row_id:
|
|
logging.debug(f'insert one person, id: {row_id}, person: {row['person']}, url: {row['href']}')
|
|
succ += 1
|
|
else:
|
|
logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.')
|
|
loaded_rows += 1
|
|
if loaded_rows % 10000 == 0:
|
|
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
|
|
|
|
logging.info(f'load data succ. file: {file}, rows: {total_rows}, succ rows: {succ}')
|
|
|
|
# movie 列表
|
|
def load_movie_list(file, **from_fields):
|
|
json_data = utils.read_json(file)
|
|
if json_data is None:
|
|
json_data = []
|
|
|
|
total_rows = len(json_data)
|
|
loaded_rows = 0
|
|
succ = 0
|
|
for row in json_data:
|
|
row_id = db_tools.insert_movie_index(title=row.get('title', ''),
|
|
href=row.get('href', ''),
|
|
release_year=utils.to_number(row['year']),
|
|
**from_fields
|
|
)
|
|
if row_id:
|
|
logging.debug(f'insert one movie, id: {row_id}, title: {row['title']}, url: {row['href']}')
|
|
succ += 1
|
|
else:
|
|
logging.warning(f'insert movie failed: {row['title']}, {row['href']} failed.')
|
|
loaded_rows += 1
|
|
if loaded_rows % 10000 == 0:
|
|
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
|
|
|
|
logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}')
|
|
|
|
|
|
# 演员详情
|
|
def load_performers(file):
|
|
json_data = utils.read_json(file)
|
|
if json_data is None:
|
|
json_data = []
|
|
|
|
total_rows = len(json_data)
|
|
loaded_rows = 0
|
|
succ = 0
|
|
for row in json_data:
|
|
performer_id = db_tools.insert_or_update_performer(row)
|
|
if performer_id:
|
|
logging.debug(f'insert one person, id: {performer_id}, person: {row['person']}, url: {row['href']}')
|
|
succ += 1
|
|
else:
|
|
logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.')
|
|
loaded_rows += 1
|
|
if loaded_rows % 10000 == 0:
|
|
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
|
|
|
|
logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
load_performer_list(f'{res_dir}/astro.json', from_astro_list=1)
|
|
time.sleep(3)
|
|
load_performer_list(f'{res_dir}/birth.json', from_birth_list=1)
|
|
time.sleep(3)
|
|
load_performer_list(f'{res_dir}/ethnic.json', from_ethnic_list=1)
|
|
time.sleep(3)
|
|
|
|
load_movie_list(f'{res_dir}/distributors.json', from_dist_list=1)
|
|
time.sleep(3)
|
|
load_movie_list(f'{res_dir}/studios.json', from_stu_list=1)
|
|
time.sleep(3)
|
|
|
|
load_performers(f'{res_dir}/performers.json')
|
|
|