diff --git a/commit.sh b/commit.sh new file mode 100755 index 0000000..6a5de8f --- /dev/null +++ b/commit.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# 确保脚本有执行权限(只需执行一次) +# chmod +x git_commit.sh + +# 检查是否在 Git 仓库内 +if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + echo "❌ 当前目录不是 Git 仓库,请先执行 git init" + exit 1 +fi + +# 获取 commit message +commit_msg="$1" + +# 如果没有提供 commit message,提示用户输入 +if [ -z "$commit_msg" ]; then + read -p "请输入 commit message: " commit_msg + if [ -z "$commit_msg" ]; then + echo "❌ 提交信息不能为空!" + exit 1 + fi +fi + +# 添加所有更改 +git add . +if [ $? -ne 0 ]; then + echo "❌ git add 失败!" + exit 1 +fi + +# 提交更改 +git commit -m "$commit_msg" +if [ $? -ne 0 ]; then + echo "❌ git commit 失败!" + exit 1 +fi + +# 推送到远程仓库 +git push -u origin master +if [ $? -ne 0 ]; then + echo "❌ git push 失败!请检查远程仓库设置。" + exit 1 +fi + +echo "✅ 代码提交成功!" \ No newline at end of file diff --git a/scripts/iafd/src/fetch.py b/scripts/iafd/src/fetch.py index c20894b..227af58 100644 --- a/scripts/iafd/src/fetch.py +++ b/scripts/iafd/src/fetch.py @@ -233,6 +233,7 @@ def fetch_movies_by_stu(): # 更新演员信息,单次循环 def fetch_performers_detail_once(perfomers_list): + last_performer_id = 0 for performer in perfomers_list: url = performer['href'] person = performer['name'] @@ -247,7 +248,8 @@ def fetch_performers_detail_once(perfomers_list): **data }) if performer_id: - logging.info(f'insert one person, id: {performer_id}, person: ({person}), url: {url}') + logging.debug(f'insert one person, id: {performer_id}, person: ({person}), url: {url}') + last_performer_id = performer_id else: logging.warning(f'insert person: ({person}) {url} failed.') @@ -264,10 +266,11 @@ def fetch_performers_detail_once(perfomers_list): else: logging.warning(f'fetch_page error. person: ({person}), url: {url}') time.sleep(1) + return last_performer_id # 更新演员信息 def fetch_performers_detail(): - limit_count = 5 if debug else 1000 + limit_count = 5 if debug else 100 perfomers_list = [] # 获取新演员的列表 @@ -276,7 +279,8 @@ def fetch_performers_detail(): if len(perfomers_list) < 1: logging.info(f'all new performers fetched. ') break - fetch_performers_detail_once(perfomers_list) + last_perfomer_id = fetch_performers_detail_once(perfomers_list) + logging.info(f'insert {len(perfomers_list)} person. last performer id: {last_perfomer_id}') if debug: break @@ -286,19 +290,21 @@ def fetch_performers_detail(): if len(perfomers_list) < 1: logging.info(f'all existed performers updated. ') break - fetch_performers_detail_once(perfomers_list) + last_perfomer_id = fetch_performers_detail_once(perfomers_list) + logging.info(f'insert {len(perfomers_list)} person. last performer id: {last_perfomer_id}') if debug: break # 更新影片信息 def fetch_movies_detail(): - limit_count = 10 if debug else 1000 + limit_count = 10 if debug else 100 movies_list = [] while True: movies_list = db_tools.query_movie_hrefs(is_full_data=0, limit=limit_count) if len(movies_list) < 1: logging.info(f'all movies fetched.') break + last_movie_id = 0 for movie in movies_list: url = movie['href'] title = movie['title'] @@ -314,7 +320,8 @@ def fetch_movies_detail(): movie_data['StudioHref'] = utils.dist_stu_href_rewrite(movie_data['StudioHref'].lower()) movie_id = db_tools.insert_or_update_movie(movie_data) if movie_id: - logging.info(f'insert one movie, id: {movie_id}, title: ({title}) url: {url}') + logging.debug(f'insert one movie, id: {movie_id}, title: ({title}) url: {url}') + last_movie_id = movie_id else: logging.warning(f'insert movie {url} failed.') @@ -327,6 +334,7 @@ def fetch_movies_detail(): else: logging.warning(f'fetch_page error. url: {url}') time.sleep(1) + logging.info(f'insert {len(movies_list)} movies. last movie id: {last_movie_id}') # 调试增加break if debug: return True diff --git a/stockapp/reports_em/fetch.py b/stockapp/reports_em/fetch.py index 68d9368..00f1572 100644 --- a/stockapp/reports_em/fetch.py +++ b/stockapp/reports_em/fetch.py @@ -29,12 +29,12 @@ map_pdf_page = { utils.tbl_industry : "https://data.eastmoney.com/report/zw_industry.jshtml?infocode={}" } -map_pdf_path = { - utils.tbl_stock : f'{pdf_base_dir}/stock', - utils.tbl_new_stock : f'{pdf_base_dir}/newstock', - utils.tbl_strategy : f'{pdf_base_dir}/strategy', - utils.tbl_macresearch : f'{pdf_base_dir}/macresearch', - utils.tbl_industry : f'{pdf_base_dir}/industry' +map_tbl_name = { + utils.tbl_stock : '个股研报', + utils.tbl_new_stock : '新股研报', + utils.tbl_strategy : '策略报告', + utils.tbl_macresearch : '宏观研究', + utils.tbl_industry : '行业研报' } current_date = datetime.now() @@ -76,7 +76,7 @@ def fetch_reports_list_general(fetch_func, table_name, s_date, e_date, data_dir_ # 股票所用的url -def parse_func_stock(row, tbl_name): +def parse_func_general(row, tbl_name): info_code = row['infoCode'] title = row['title'].replace("/", "_").replace("\\", "_") org_sname = row['orgSName'] @@ -84,34 +84,28 @@ def parse_func_stock(row, tbl_name): industry_name = row['industryName'] publish_date = row['publishDate'].split(" ")[0] - file_name = f"{publish_date}_{org_sname}_{stock_name}_{title}.pdf" + # 建表的时候默认值有点问题 + if stock_name == '' or stock_name=="''": + stock_name = 'None' + if industry_name == '': + industry_name = 'None' + if org_sname == '': + org_sname = 'None' + report_type = map_tbl_name.get(tbl_name, 'None') + + file_name = f"{publish_date}_{report_type}_{org_sname}_{industry_name}_{stock_name}_{title}.pdf" url = map_pdf_page.get(tbl_name, None) if url is None: logging.warning(f'wrong table name: {tbl_name}') - return None, None, None + return None, None url = url.format(info_code) - os.makedirs(map_pdf_path[tbl_name], exist_ok=True) - return url, os.path.join(map_pdf_path[tbl_name], file_name), None + # 拼目录 + dir_year = publish_date[:4] if len(publish_date)>4 else '' + dir_path = f'{pdf_base_dir}/{dir_year}/{map_tbl_name[tbl_name]}' + os.makedirs(dir_path, exist_ok=True) + return url, os.path.join(dir_path, file_name) -# 其它所用的url -def parse_func_other(row, tbl_name): - info_code = row['infoCode'] - title = row['title'].replace("/", "_").replace("\\", "_") - org_sname = row['orgSName'] - industry_name = row['industryName'] - publish_date = row['publishDate'].split(" ")[0] - - file_name = f"{publish_date}_{org_sname}_{industry_name}_{title}.pdf" - old_file_name = f"{publish_date}_{industry_name}_{org_sname}_{title}.pdf" - url = map_pdf_page.get(tbl_name, None) - if url is None: - logging.warning(f'wrong table name: {tbl_name}') - return None, None, None - - url = url.format(info_code) - os.makedirs(map_pdf_path[tbl_name], exist_ok=True) - return url, os.path.join(map_pdf_path[tbl_name], file_name), os.path.join(map_pdf_path[tbl_name], old_file_name) # 通用下载函数 def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_date, e_date=end_date, limit=None): @@ -126,7 +120,7 @@ def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_d rows = [] for row in rows: - url, file_path, old_file_path = parse_func(row, tbl_name) + url, file_path = parse_func(row, tbl_name) if url is None or file_path is None: logging.warning(f'wrong url or file_path. tbl_name: {tbl_name}') continue @@ -134,11 +128,6 @@ def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_d if file_path and os.path.isfile(file_path): logging.info(f'{file_path} already exists. skipping...') continue - # 旧方式命名的,rename - if old_file_path and os.path.isfile(old_file_path): - shutil.move(old_file_path, file_path) - logging.info(f'rename existed file to {file_path}') - continue # 获取pdf链接地址 if url: pdf_url = em.fetch_pdf_link(url) @@ -175,19 +164,19 @@ def fetch_reports_list_strategy(s_date=start_date, e_date=end_date): # 下载股票pdf def download_pdf_stock(s_date=start_date, e_date=end_date): - download_pdf_stock_general(parse_func_stock, utils.tbl_stock, ' AND attachPages>=30', s_date, e_date, limit=2 if debug else None) + download_pdf_stock_general(parse_func_general, utils.tbl_stock, ' ', s_date, e_date, limit=2 if debug else None) def download_pdf_newstock(s_date=start_date, e_date=end_date): - download_pdf_stock_general(parse_func_stock, utils.tbl_new_stock, ' AND attachPages>=30', s_date, e_date, limit=2 if debug else None) + download_pdf_stock_general(parse_func_general, utils.tbl_new_stock, ' ', s_date, e_date, limit=2 if debug else None) def download_pdf_industry(s_date=start_date, e_date=end_date): - download_pdf_stock_general(parse_func_other, utils.tbl_industry, ' AND attachPages>=30', s_date, e_date, limit=2 if debug else None) + download_pdf_stock_general(parse_func_general, utils.tbl_industry, ' ', s_date, e_date, limit=2 if debug else None) def download_pdf_macresearch(s_date=start_date, e_date=end_date): - download_pdf_stock_general(parse_func_other, utils.tbl_macresearch, ' ', s_date, e_date, limit=2 if debug else None) + download_pdf_stock_general(parse_func_general, utils.tbl_macresearch, ' ', s_date, e_date, limit=2 if debug else None) def download_pdf_strategy(s_date=start_date, e_date=end_date): - download_pdf_stock_general(parse_func_other, utils.tbl_strategy, ' ', s_date, e_date, limit=2 if debug else None) + download_pdf_stock_general(parse_func_general, utils.tbl_strategy, ' ', s_date, e_date, limit=2 if debug else None) # 建立缩写到函数的映射 diff --git a/stockapp/reports_em/sqlite_utils.py b/stockapp/reports_em/sqlite_utils.py index c61e704..2814832 100644 --- a/stockapp/reports_em/sqlite_utils.py +++ b/stockapp/reports_em/sqlite_utils.py @@ -82,10 +82,8 @@ def insert_or_update_common(data, tbl_name, uniq_key='infoCode'): # 查询数据 def query_reports_comm(tbl_name, querystr='', limit=None ): try: - if tbl_name in [utils.tbl_stock, utils.tbl_new_stock] : + if tbl_name in [utils.tbl_stock, utils.tbl_new_stock, utils.tbl_industry, utils.tbl_macresearch, utils.tbl_strategy] : sql = f"SELECT id, infoCode, title, orgSName, industryName, stockName, publishDate FROM {tbl_name} WHERE 1=1 {querystr}" - elif tbl_name in [utils.tbl_industry, utils.tbl_macresearch, utils.tbl_strategy] : - sql = f"SELECT id, infoCode, title, orgSName, industryName, publishDate FROM {tbl_name} WHERE 1=1 {querystr}" else: logging.warning(f'wrong table name: {tbl_name}') return None