Files
stock/scripts/iafd/src/load.py
2025-03-07 19:11:41 +08:00

108 lines
3.7 KiB
Python

import json
import time
import csv
import argparse
import logging
from functools import partial
import config
import sqlite_utils as db_tools
import iafd_scraper as scraper
import utils
config.setup_logging()
res_dir = '/root/hostdir/scripts_data/iafd_202503'
# 演员列表
def load_performer_list(file, **from_fields):
json_data = utils.read_json(file)
if json_data is None:
json_data = []
total_rows = len(json_data)
loaded_rows = 0
succ = 0
for row in json_data:
row_id = db_tools.insert_performer_index(name=row.get('person', ''),
href=row.get('href', ''),
**from_fields
)
if row_id:
logging.debug(f'insert one person, id: {row_id}, person: {row['person']}, url: {row['href']}')
succ += 1
else:
logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.')
loaded_rows += 1
if loaded_rows % 10000 == 0:
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
logging.info(f'load data succ. file: {file}, rows: {total_rows}, succ rows: {succ}')
# movie 列表
def load_movie_list(file, **from_fields):
json_data = utils.read_json(file)
if json_data is None:
json_data = []
total_rows = len(json_data)
loaded_rows = 0
succ = 0
for row in json_data:
row_id = db_tools.insert_movie_index(title=row.get('title', ''),
href=row.get('href', ''),
release_year=utils.to_number(row['year']),
**from_fields
)
if row_id:
logging.debug(f'insert one movie, id: {row_id}, title: {row['title']}, url: {row['href']}')
succ += 1
else:
logging.warning(f'insert movie failed: {row['title']}, {row['href']} failed.')
loaded_rows += 1
if loaded_rows % 10000 == 0:
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}')
# 演员详情
def load_performers(file):
json_data = utils.read_json(file)
if json_data is None:
json_data = []
total_rows = len(json_data)
loaded_rows = 0
succ = 0
for row in json_data:
performer_id = db_tools.insert_or_update_performer(row)
if performer_id:
logging.debug(f'insert one person, id: {performer_id}, person: {row['person']}, url: {row['href']}')
succ += 1
else:
logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.')
loaded_rows += 1
if loaded_rows % 10000 == 0:
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}')
if __name__ == "__main__":
load_performer_list(f'{res_dir}/astro.json', from_astro_list=1)
time.sleep(3)
load_performer_list(f'{res_dir}/birth.json', from_birth_list=1)
time.sleep(3)
load_performer_list(f'{res_dir}/ethnic.json', from_ethnic_list=1)
time.sleep(3)
load_movie_list(f'{res_dir}/distributors.json', from_dist_list=1)
time.sleep(3)
load_movie_list(f'{res_dir}/studios.json', from_stu_list=1)
time.sleep(3)
load_performers(f'{res_dir}/performers.json')