modify scripts
This commit is contained in:
45
commit.sh
Executable file
45
commit.sh
Executable file
@ -0,0 +1,45 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# 确保脚本有执行权限(只需执行一次)
|
||||||
|
# chmod +x git_commit.sh
|
||||||
|
|
||||||
|
# 检查是否在 Git 仓库内
|
||||||
|
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
||||||
|
echo "❌ 当前目录不是 Git 仓库,请先执行 git init"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 获取 commit message
|
||||||
|
commit_msg="$1"
|
||||||
|
|
||||||
|
# 如果没有提供 commit message,提示用户输入
|
||||||
|
if [ -z "$commit_msg" ]; then
|
||||||
|
read -p "请输入 commit message: " commit_msg
|
||||||
|
if [ -z "$commit_msg" ]; then
|
||||||
|
echo "❌ 提交信息不能为空!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 添加所有更改
|
||||||
|
git add .
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "❌ git add 失败!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 提交更改
|
||||||
|
git commit -m "$commit_msg"
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "❌ git commit 失败!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 推送到远程仓库
|
||||||
|
git push -u origin master
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "❌ git push 失败!请检查远程仓库设置。"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✅ 代码提交成功!"
|
||||||
@ -233,6 +233,7 @@ def fetch_movies_by_stu():
|
|||||||
|
|
||||||
# 更新演员信息,单次循环
|
# 更新演员信息,单次循环
|
||||||
def fetch_performers_detail_once(perfomers_list):
|
def fetch_performers_detail_once(perfomers_list):
|
||||||
|
last_performer_id = 0
|
||||||
for performer in perfomers_list:
|
for performer in perfomers_list:
|
||||||
url = performer['href']
|
url = performer['href']
|
||||||
person = performer['name']
|
person = performer['name']
|
||||||
@ -247,7 +248,8 @@ def fetch_performers_detail_once(perfomers_list):
|
|||||||
**data
|
**data
|
||||||
})
|
})
|
||||||
if performer_id:
|
if performer_id:
|
||||||
logging.info(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
logging.debug(f'insert one person, id: {performer_id}, person: ({person}), url: {url}')
|
||||||
|
last_performer_id = performer_id
|
||||||
else:
|
else:
|
||||||
logging.warning(f'insert person: ({person}) {url} failed.')
|
logging.warning(f'insert person: ({person}) {url} failed.')
|
||||||
|
|
||||||
@ -264,10 +266,11 @@ def fetch_performers_detail_once(perfomers_list):
|
|||||||
else:
|
else:
|
||||||
logging.warning(f'fetch_page error. person: ({person}), url: {url}')
|
logging.warning(f'fetch_page error. person: ({person}), url: {url}')
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
return last_performer_id
|
||||||
|
|
||||||
# 更新演员信息
|
# 更新演员信息
|
||||||
def fetch_performers_detail():
|
def fetch_performers_detail():
|
||||||
limit_count = 5 if debug else 1000
|
limit_count = 5 if debug else 100
|
||||||
perfomers_list = []
|
perfomers_list = []
|
||||||
|
|
||||||
# 获取新演员的列表
|
# 获取新演员的列表
|
||||||
@ -276,7 +279,8 @@ def fetch_performers_detail():
|
|||||||
if len(perfomers_list) < 1:
|
if len(perfomers_list) < 1:
|
||||||
logging.info(f'all new performers fetched. ')
|
logging.info(f'all new performers fetched. ')
|
||||||
break
|
break
|
||||||
fetch_performers_detail_once(perfomers_list)
|
last_perfomer_id = fetch_performers_detail_once(perfomers_list)
|
||||||
|
logging.info(f'insert {len(perfomers_list)} person. last performer id: {last_perfomer_id}')
|
||||||
if debug:
|
if debug:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -286,19 +290,21 @@ def fetch_performers_detail():
|
|||||||
if len(perfomers_list) < 1:
|
if len(perfomers_list) < 1:
|
||||||
logging.info(f'all existed performers updated. ')
|
logging.info(f'all existed performers updated. ')
|
||||||
break
|
break
|
||||||
fetch_performers_detail_once(perfomers_list)
|
last_perfomer_id = fetch_performers_detail_once(perfomers_list)
|
||||||
|
logging.info(f'insert {len(perfomers_list)} person. last performer id: {last_perfomer_id}')
|
||||||
if debug:
|
if debug:
|
||||||
break
|
break
|
||||||
|
|
||||||
# 更新影片信息
|
# 更新影片信息
|
||||||
def fetch_movies_detail():
|
def fetch_movies_detail():
|
||||||
limit_count = 10 if debug else 1000
|
limit_count = 10 if debug else 100
|
||||||
movies_list = []
|
movies_list = []
|
||||||
while True:
|
while True:
|
||||||
movies_list = db_tools.query_movie_hrefs(is_full_data=0, limit=limit_count)
|
movies_list = db_tools.query_movie_hrefs(is_full_data=0, limit=limit_count)
|
||||||
if len(movies_list) < 1:
|
if len(movies_list) < 1:
|
||||||
logging.info(f'all movies fetched.')
|
logging.info(f'all movies fetched.')
|
||||||
break
|
break
|
||||||
|
last_movie_id = 0
|
||||||
for movie in movies_list:
|
for movie in movies_list:
|
||||||
url = movie['href']
|
url = movie['href']
|
||||||
title = movie['title']
|
title = movie['title']
|
||||||
@ -314,7 +320,8 @@ def fetch_movies_detail():
|
|||||||
movie_data['StudioHref'] = utils.dist_stu_href_rewrite(movie_data['StudioHref'].lower())
|
movie_data['StudioHref'] = utils.dist_stu_href_rewrite(movie_data['StudioHref'].lower())
|
||||||
movie_id = db_tools.insert_or_update_movie(movie_data)
|
movie_id = db_tools.insert_or_update_movie(movie_data)
|
||||||
if movie_id:
|
if movie_id:
|
||||||
logging.info(f'insert one movie, id: {movie_id}, title: ({title}) url: {url}')
|
logging.debug(f'insert one movie, id: {movie_id}, title: ({title}) url: {url}')
|
||||||
|
last_movie_id = movie_id
|
||||||
else:
|
else:
|
||||||
logging.warning(f'insert movie {url} failed.')
|
logging.warning(f'insert movie {url} failed.')
|
||||||
|
|
||||||
@ -327,6 +334,7 @@ def fetch_movies_detail():
|
|||||||
else:
|
else:
|
||||||
logging.warning(f'fetch_page error. url: {url}')
|
logging.warning(f'fetch_page error. url: {url}')
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
logging.info(f'insert {len(movies_list)} movies. last movie id: {last_movie_id}')
|
||||||
# 调试增加break
|
# 调试增加break
|
||||||
if debug:
|
if debug:
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -29,12 +29,12 @@ map_pdf_page = {
|
|||||||
utils.tbl_industry : "https://data.eastmoney.com/report/zw_industry.jshtml?infocode={}"
|
utils.tbl_industry : "https://data.eastmoney.com/report/zw_industry.jshtml?infocode={}"
|
||||||
}
|
}
|
||||||
|
|
||||||
map_pdf_path = {
|
map_tbl_name = {
|
||||||
utils.tbl_stock : f'{pdf_base_dir}/stock',
|
utils.tbl_stock : '个股研报',
|
||||||
utils.tbl_new_stock : f'{pdf_base_dir}/newstock',
|
utils.tbl_new_stock : '新股研报',
|
||||||
utils.tbl_strategy : f'{pdf_base_dir}/strategy',
|
utils.tbl_strategy : '策略报告',
|
||||||
utils.tbl_macresearch : f'{pdf_base_dir}/macresearch',
|
utils.tbl_macresearch : '宏观研究',
|
||||||
utils.tbl_industry : f'{pdf_base_dir}/industry'
|
utils.tbl_industry : '行业研报'
|
||||||
}
|
}
|
||||||
|
|
||||||
current_date = datetime.now()
|
current_date = datetime.now()
|
||||||
@ -76,7 +76,7 @@ def fetch_reports_list_general(fetch_func, table_name, s_date, e_date, data_dir_
|
|||||||
|
|
||||||
|
|
||||||
# 股票所用的url
|
# 股票所用的url
|
||||||
def parse_func_stock(row, tbl_name):
|
def parse_func_general(row, tbl_name):
|
||||||
info_code = row['infoCode']
|
info_code = row['infoCode']
|
||||||
title = row['title'].replace("/", "_").replace("\\", "_")
|
title = row['title'].replace("/", "_").replace("\\", "_")
|
||||||
org_sname = row['orgSName']
|
org_sname = row['orgSName']
|
||||||
@ -84,34 +84,28 @@ def parse_func_stock(row, tbl_name):
|
|||||||
industry_name = row['industryName']
|
industry_name = row['industryName']
|
||||||
publish_date = row['publishDate'].split(" ")[0]
|
publish_date = row['publishDate'].split(" ")[0]
|
||||||
|
|
||||||
file_name = f"{publish_date}_{org_sname}_{stock_name}_{title}.pdf"
|
# 建表的时候默认值有点问题
|
||||||
|
if stock_name == '' or stock_name=="''":
|
||||||
|
stock_name = 'None'
|
||||||
|
if industry_name == '':
|
||||||
|
industry_name = 'None'
|
||||||
|
if org_sname == '':
|
||||||
|
org_sname = 'None'
|
||||||
|
report_type = map_tbl_name.get(tbl_name, 'None')
|
||||||
|
|
||||||
|
file_name = f"{publish_date}_{report_type}_{org_sname}_{industry_name}_{stock_name}_{title}.pdf"
|
||||||
url = map_pdf_page.get(tbl_name, None)
|
url = map_pdf_page.get(tbl_name, None)
|
||||||
if url is None:
|
if url is None:
|
||||||
logging.warning(f'wrong table name: {tbl_name}')
|
logging.warning(f'wrong table name: {tbl_name}')
|
||||||
return None, None, None
|
return None, None
|
||||||
|
|
||||||
url = url.format(info_code)
|
url = url.format(info_code)
|
||||||
os.makedirs(map_pdf_path[tbl_name], exist_ok=True)
|
# 拼目录
|
||||||
return url, os.path.join(map_pdf_path[tbl_name], file_name), None
|
dir_year = publish_date[:4] if len(publish_date)>4 else ''
|
||||||
|
dir_path = f'{pdf_base_dir}/{dir_year}/{map_tbl_name[tbl_name]}'
|
||||||
|
os.makedirs(dir_path, exist_ok=True)
|
||||||
|
return url, os.path.join(dir_path, file_name)
|
||||||
|
|
||||||
# 其它所用的url
|
|
||||||
def parse_func_other(row, tbl_name):
|
|
||||||
info_code = row['infoCode']
|
|
||||||
title = row['title'].replace("/", "_").replace("\\", "_")
|
|
||||||
org_sname = row['orgSName']
|
|
||||||
industry_name = row['industryName']
|
|
||||||
publish_date = row['publishDate'].split(" ")[0]
|
|
||||||
|
|
||||||
file_name = f"{publish_date}_{org_sname}_{industry_name}_{title}.pdf"
|
|
||||||
old_file_name = f"{publish_date}_{industry_name}_{org_sname}_{title}.pdf"
|
|
||||||
url = map_pdf_page.get(tbl_name, None)
|
|
||||||
if url is None:
|
|
||||||
logging.warning(f'wrong table name: {tbl_name}')
|
|
||||||
return None, None, None
|
|
||||||
|
|
||||||
url = url.format(info_code)
|
|
||||||
os.makedirs(map_pdf_path[tbl_name], exist_ok=True)
|
|
||||||
return url, os.path.join(map_pdf_path[tbl_name], file_name), os.path.join(map_pdf_path[tbl_name], old_file_name)
|
|
||||||
|
|
||||||
# 通用下载函数
|
# 通用下载函数
|
||||||
def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_date, e_date=end_date, limit=None):
|
def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_date, e_date=end_date, limit=None):
|
||||||
@ -126,7 +120,7 @@ def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_d
|
|||||||
rows = []
|
rows = []
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
url, file_path, old_file_path = parse_func(row, tbl_name)
|
url, file_path = parse_func(row, tbl_name)
|
||||||
if url is None or file_path is None:
|
if url is None or file_path is None:
|
||||||
logging.warning(f'wrong url or file_path. tbl_name: {tbl_name}')
|
logging.warning(f'wrong url or file_path. tbl_name: {tbl_name}')
|
||||||
continue
|
continue
|
||||||
@ -134,11 +128,6 @@ def download_pdf_stock_general(parse_func, tbl_name, querystr='', s_date=start_d
|
|||||||
if file_path and os.path.isfile(file_path):
|
if file_path and os.path.isfile(file_path):
|
||||||
logging.info(f'{file_path} already exists. skipping...')
|
logging.info(f'{file_path} already exists. skipping...')
|
||||||
continue
|
continue
|
||||||
# 旧方式命名的,rename
|
|
||||||
if old_file_path and os.path.isfile(old_file_path):
|
|
||||||
shutil.move(old_file_path, file_path)
|
|
||||||
logging.info(f'rename existed file to {file_path}')
|
|
||||||
continue
|
|
||||||
# 获取pdf链接地址
|
# 获取pdf链接地址
|
||||||
if url:
|
if url:
|
||||||
pdf_url = em.fetch_pdf_link(url)
|
pdf_url = em.fetch_pdf_link(url)
|
||||||
@ -175,19 +164,19 @@ def fetch_reports_list_strategy(s_date=start_date, e_date=end_date):
|
|||||||
|
|
||||||
# 下载股票pdf
|
# 下载股票pdf
|
||||||
def download_pdf_stock(s_date=start_date, e_date=end_date):
|
def download_pdf_stock(s_date=start_date, e_date=end_date):
|
||||||
download_pdf_stock_general(parse_func_stock, utils.tbl_stock, ' AND attachPages>=30', s_date, e_date, limit=2 if debug else None)
|
download_pdf_stock_general(parse_func_general, utils.tbl_stock, ' ', s_date, e_date, limit=2 if debug else None)
|
||||||
|
|
||||||
def download_pdf_newstock(s_date=start_date, e_date=end_date):
|
def download_pdf_newstock(s_date=start_date, e_date=end_date):
|
||||||
download_pdf_stock_general(parse_func_stock, utils.tbl_new_stock, ' AND attachPages>=30', s_date, e_date, limit=2 if debug else None)
|
download_pdf_stock_general(parse_func_general, utils.tbl_new_stock, ' ', s_date, e_date, limit=2 if debug else None)
|
||||||
|
|
||||||
def download_pdf_industry(s_date=start_date, e_date=end_date):
|
def download_pdf_industry(s_date=start_date, e_date=end_date):
|
||||||
download_pdf_stock_general(parse_func_other, utils.tbl_industry, ' AND attachPages>=30', s_date, e_date, limit=2 if debug else None)
|
download_pdf_stock_general(parse_func_general, utils.tbl_industry, ' ', s_date, e_date, limit=2 if debug else None)
|
||||||
|
|
||||||
def download_pdf_macresearch(s_date=start_date, e_date=end_date):
|
def download_pdf_macresearch(s_date=start_date, e_date=end_date):
|
||||||
download_pdf_stock_general(parse_func_other, utils.tbl_macresearch, ' ', s_date, e_date, limit=2 if debug else None)
|
download_pdf_stock_general(parse_func_general, utils.tbl_macresearch, ' ', s_date, e_date, limit=2 if debug else None)
|
||||||
|
|
||||||
def download_pdf_strategy(s_date=start_date, e_date=end_date):
|
def download_pdf_strategy(s_date=start_date, e_date=end_date):
|
||||||
download_pdf_stock_general(parse_func_other, utils.tbl_strategy, ' ', s_date, e_date, limit=2 if debug else None)
|
download_pdf_stock_general(parse_func_general, utils.tbl_strategy, ' ', s_date, e_date, limit=2 if debug else None)
|
||||||
|
|
||||||
|
|
||||||
# 建立缩写到函数的映射
|
# 建立缩写到函数的映射
|
||||||
|
|||||||
@ -82,10 +82,8 @@ def insert_or_update_common(data, tbl_name, uniq_key='infoCode'):
|
|||||||
# 查询数据
|
# 查询数据
|
||||||
def query_reports_comm(tbl_name, querystr='', limit=None ):
|
def query_reports_comm(tbl_name, querystr='', limit=None ):
|
||||||
try:
|
try:
|
||||||
if tbl_name in [utils.tbl_stock, utils.tbl_new_stock] :
|
if tbl_name in [utils.tbl_stock, utils.tbl_new_stock, utils.tbl_industry, utils.tbl_macresearch, utils.tbl_strategy] :
|
||||||
sql = f"SELECT id, infoCode, title, orgSName, industryName, stockName, publishDate FROM {tbl_name} WHERE 1=1 {querystr}"
|
sql = f"SELECT id, infoCode, title, orgSName, industryName, stockName, publishDate FROM {tbl_name} WHERE 1=1 {querystr}"
|
||||||
elif tbl_name in [utils.tbl_industry, utils.tbl_macresearch, utils.tbl_strategy] :
|
|
||||||
sql = f"SELECT id, infoCode, title, orgSName, industryName, publishDate FROM {tbl_name} WHERE 1=1 {querystr}"
|
|
||||||
else:
|
else:
|
||||||
logging.warning(f'wrong table name: {tbl_name}')
|
logging.warning(f'wrong table name: {tbl_name}')
|
||||||
return None
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user