modify some scripts.
This commit is contained in:
107
scripts/iafd/src/load.py
Normal file
107
scripts/iafd/src/load.py
Normal file
@ -0,0 +1,107 @@
|
||||
|
||||
import json
|
||||
import time
|
||||
import csv
|
||||
import argparse
|
||||
import logging
|
||||
from functools import partial
|
||||
import config
|
||||
import sqlite_utils as db_tools
|
||||
import iafd_scraper as scraper
|
||||
import utils
|
||||
|
||||
config.setup_logging()
|
||||
|
||||
res_dir = '/root/hostdir/scripts_data/iafd_202503'
|
||||
|
||||
# 演员列表
|
||||
def load_performer_list(file, **from_fields):
|
||||
json_data = utils.read_json(file)
|
||||
if json_data is None:
|
||||
json_data = []
|
||||
|
||||
total_rows = len(json_data)
|
||||
loaded_rows = 0
|
||||
succ = 0
|
||||
for row in json_data:
|
||||
row_id = db_tools.insert_performer_index(name=row.get('person', ''),
|
||||
href=row.get('href', ''),
|
||||
**from_fields
|
||||
)
|
||||
if row_id:
|
||||
logging.debug(f'insert one person, id: {row_id}, person: {row['person']}, url: {row['href']}')
|
||||
succ += 1
|
||||
else:
|
||||
logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.')
|
||||
loaded_rows += 1
|
||||
if loaded_rows % 10000 == 0:
|
||||
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
|
||||
|
||||
logging.info(f'load data succ. file: {file}, rows: {total_rows}, succ rows: {succ}')
|
||||
|
||||
# movie 列表
|
||||
def load_movie_list(file, **from_fields):
|
||||
json_data = utils.read_json(file)
|
||||
if json_data is None:
|
||||
json_data = []
|
||||
|
||||
total_rows = len(json_data)
|
||||
loaded_rows = 0
|
||||
succ = 0
|
||||
for row in json_data:
|
||||
row_id = db_tools.insert_movie_index(title=row.get('title', ''),
|
||||
href=row.get('href', ''),
|
||||
release_year=utils.to_number(row['year']),
|
||||
**from_fields
|
||||
)
|
||||
if row_id:
|
||||
logging.debug(f'insert one movie, id: {row_id}, title: {row['title']}, url: {row['href']}')
|
||||
succ += 1
|
||||
else:
|
||||
logging.warning(f'insert movie failed: {row['title']}, {row['href']} failed.')
|
||||
loaded_rows += 1
|
||||
if loaded_rows % 10000 == 0:
|
||||
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
|
||||
|
||||
logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}')
|
||||
|
||||
|
||||
# 演员详情
|
||||
def load_performers(file):
|
||||
json_data = utils.read_json(file)
|
||||
if json_data is None:
|
||||
json_data = []
|
||||
|
||||
total_rows = len(json_data)
|
||||
loaded_rows = 0
|
||||
succ = 0
|
||||
for row in json_data:
|
||||
performer_id = db_tools.insert_or_update_performer(row)
|
||||
if performer_id:
|
||||
logging.debug(f'insert one person, id: {performer_id}, person: {row['person']}, url: {row['href']}')
|
||||
succ += 1
|
||||
else:
|
||||
logging.warning(f'insert person failed. {row['person']}, {row['href']} failed.')
|
||||
loaded_rows += 1
|
||||
if loaded_rows % 10000 == 0:
|
||||
logging.info(f'loading file: {file}, total rows: {total_rows}, loaded rows: {loaded_rows}, succ rows: {succ}')
|
||||
|
||||
logging.info(f'load data succ. file: {file}, rows: {len(json_data)}, succ rows: {succ}')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
load_performer_list(f'{res_dir}/astro.json', from_astro_list=1)
|
||||
time.sleep(3)
|
||||
load_performer_list(f'{res_dir}/birth.json', from_birth_list=1)
|
||||
time.sleep(3)
|
||||
load_performer_list(f'{res_dir}/ethnic.json', from_ethnic_list=1)
|
||||
time.sleep(3)
|
||||
|
||||
load_movie_list(f'{res_dir}/distributors.json', from_dist_list=1)
|
||||
time.sleep(3)
|
||||
load_movie_list(f'{res_dir}/studios.json', from_stu_list=1)
|
||||
time.sleep(3)
|
||||
|
||||
load_performers(f'{res_dir}/performers.json')
|
||||
|
||||
Reference in New Issue
Block a user