modify dirs

This commit is contained in:
oscarz
2025-03-21 21:21:18 +08:00
parent 7c8b27339c
commit 564c7bd442
23 changed files with 975 additions and 7 deletions

View File

@ -0,0 +1,116 @@
"""
Script Name:
Description: 统计hs300的成分股在区间内的涨幅。取前复权值
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import pandas as pd
import time
from datetime import datetime
import src.logger.logger as logger
import src.config.config as config
# 设置日志
logger.setup_logging()
logger = logger.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**config.db_config)
# 获取 2024-09-23 对应的 close 值
def get_close_for_date(df, date):
filtered = df[df['time_key'] == date]
if not filtered.empty:
return filtered.iloc[0]['close']
else:
logger.warning(f"No data found for date: {date}")
return None
# 获取年内涨幅的 c1, c3 值(最早和最晚的 close 值)
def get_first_last_close(df):
df_sorted = df.sort_values(by='time_key')
c1 = df_sorted.iloc[0]['close'] # 最早的 close 值
c3 = df_sorted.iloc[-1]['close'] # 最晚的 close 值
return c1, c3
# 获取最大值和最小值的 close 值
def get_max_min_close(df):
max_close = df['close'].max()
min_close = df['close'].min()
return max_close, min_close
# 主函数
def main():
try:
connection = connect_to_db()
query = """
SELECT code, name, time_key, close
FROM hs300_qfq_his
WHERE time_key >= '2021-01-01 00:00:00'
"""
df = pd.read_sql(query, connection)
# 确定要查询的日期
target_date = '2024-09-23 00:00:00'
df['time_key'] = pd.to_datetime(df['time_key'])
results = []
for code, group in df.groupby('code'):
logger.info(f"Processing code: {code}")
# 获取 c1最早的 close和 c3最晚的 close
c1, c3 = get_first_last_close(group)
# 获取 c22024-09-23 的 close 值)
c2 = get_close_for_date(group, target_date)
if c1 is None or c2 is None or c3 is None:
logger.warning(f"Skipping code {code} due to missing close values.")
continue
# 计算年内涨幅和自2024-09-23以来的涨幅
year_growth_rate = (c3 / c1 - 1) if c1 else None
growth_since_2024_09_23 = (c3 / c2 - 1) if c2 else None
# 获取年内的最大和最小 close 值
c4, c5 = get_max_min_close(group)
year_volatility = (c4 / c5 - 1) if c4 and c5 else None
results.append({
'code': code,
'name': group['name'].iloc[0],
'year_growth_rate': year_growth_rate,
'growth_since_2024_09_23': growth_since_2024_09_23,
'year_volatility': year_volatility
})
time.sleep(1)
# 将结果转换为 DataFrame 并显示
result_df = pd.DataFrame(results)
print(result_df)
# 你可以选择将结果保存到 CSV 文件中
result_df.to_csv('./result/stat_grouth_rate_since2021.csv', index=False)
except Exception as e:
logger.error(f"Error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

101
src/static/get_futu_rehb.py Normal file
View File

@ -0,0 +1,101 @@
"""
Script Name:
Description: 从富途获取复权因子数据。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/get-rehab.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import logging
import pymysql
import time
from futu import *
import pandas as pd
import config
# 设置日志
config.setup_logging()
# 连接 MySQL 数据库
def get_mysql_connection():
return pymysql.connect(**config.db_config)
# 获取股票代码列表
def get_stock_codes(table_name):
connection = get_mysql_connection()
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute(f"SELECT code, code_name FROM {table_name} ")
result = cursor.fetchall()
connection.close()
return result
# 插入或更新复权信息
def insert_or_update_rehab_data(connection, rehab_data, code, name):
try:
with connection.cursor() as cursor:
sql = """
INSERT INTO futu_rehab (code, name, ex_div_date, forward_adj_factorA, forward_adj_factorB, backward_adj_factorA, backward_adj_factorB)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
forward_adj_factorA = VALUES(forward_adj_factorA),
forward_adj_factorB = VALUES(forward_adj_factorB),
backward_adj_factorA = VALUES(backward_adj_factorA),
backward_adj_factorB = VALUES(backward_adj_factorB)
"""
for row in rehab_data.itertuples(index=False):
cursor.execute(sql, (code, name, row.ex_div_date, row.forward_adj_factorA, row.forward_adj_factorB, row.backward_adj_factorA, row.backward_adj_factorB))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting or updating rehab data: {e}", exc_info=True)
# 从 Futu API 获取复权信息
def get_rehab_data(code):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_rehab(code)
quote_ctx.close()
if ret == RET_OK:
return data
else:
logging.error(f"Failed to get rehab data for {code}: {data}")
return None
# 主函数
def process_stock_data(table_name, prefix=''):
stocks = get_stock_codes(table_name)
connection = get_mysql_connection()
for stock in stocks:
code = stock['code']
name = stock['code_name']
# 拼接 'US.' 前缀对于 sp500 表中的股票
full_code = f"{prefix}{code}" if prefix else code
logging.info(f"Processing {full_code} ({name})")
# 获取复权数据
rehab_data = get_rehab_data(full_code)
if rehab_data is not None:
# 插入或更新复权数据
insert_or_update_rehab_data(connection, rehab_data, full_code, name)
logging.info(f"Inserted/Updated rehab data for {full_code} ({name})")
time.sleep(3)
connection.close()
if __name__ == "__main__":
# 处理 hs300 表数据,不需要加 'US.' 前缀
process_stock_data("hs300")
# 处理 sp500 表数据,加 'US.' 前缀
process_stock_data("sp500", prefix='US.')

View File

@ -0,0 +1,242 @@
import time
import logging
import pandas as pd
import os
import sys
import config
import crawling.stock_hist_em as his_em
file_selected_codes = './cursor/his_kline_em_codes.txt' # 指定拉取的代码列表,每行一个代码
file_done_codes = './cursor/his_kline_em_done_codes.txt' # 已完成拉取的代码列表,每行一个代码
dir_his_kline_em = '../data/his_kline_em'
config.setup_logging()
# 刷新代码列表,并返回
def flush_code_map():
code_id_map_em_df = his_em.code_id_map_em()
print(code_id_map_em_df)
return code_id_map_em_df
# 获取历史K线如果失败就重试
def fetch_with_retry(code: str, adjust: str = '', max_retries: int = 20) -> pd.DataFrame :
retries = 0
while retries < max_retries:
try:
# 调用 stock_zh_a_hist 获取历史数据
df = his_em.stock_zh_a_hist(
symbol=code,
period="daily",
start_date="19000101",
end_date="20241020",
adjust=adjust,
)
# 如果获取到的数据为空,记录日志并重试
if df.empty:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
else:
return df
except Exception as e:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
return pd.DataFrame()
# 检查子目录是否存在,不存在则创建
def create_directory_if_not_exists(dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
logging.info(f"Created directory: {dir_name}")
# 读取 code.txt 文件,并获取每个股票代码
def read_stock_codes(filename: str) -> list:
try:
with open(filename, 'r') as f:
codes = [line.strip() for line in f if line.strip()]
return codes
except FileNotFoundError:
logging.error(f"文件 {filename} 未找到。")
return []
# 从文件获取指定的代码并拉取历史K线
def fetch_parts_by_codes():
# 读取股票代码列表
codes = read_stock_codes(file_selected_codes)
# 如果没有代码,结束程序
if not codes:
logging.error("没有找到有效的股票代码,程序终止。")
return
# 读取已经下载的代码列表,后续下载时忽略
done_codes = []
if os.path.exists(file_done_codes):
with open(file_done_codes, 'r', encoding='utf-8') as f:
done_codes = [line.strip() for line in f] # 使用strip()去掉每行的换行符和多余的空白
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for key in codes:
val = code_id_map_em_df.get(key)
if key in done_codes:
logging.info(f'Skipping already code. code: ({key})')
continue
if val is None:
logging.error(f'cannot find stock code. code: ({key}), adjust: ({adjust_str})')
continue
succ = True
start_time = time.time() # 在函数执行前获取当前时间
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.info(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
succ = False
else:
# 将 DataFrame 输出为 CSV 文件
curr_dir = f'{dir_his_kline_em}/{val}_{adjust_str}'
create_directory_if_not_exists(curr_dir)
curr_file = f'{curr_dir}/{key}_{adjust_str}_his_data.csv'
stock_zh_a_hist_df.to_csv(curr_file, index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), file: ({curr_file}) lines: ({lines})')
time.sleep(5)
end_time = time.time() # 在函数执行后获取当前时间
elapsed_time = int(end_time - start_time) # 计算时间差,秒
if succ:
# 下载后,记录日志
with open(file_done_codes, 'a', encoding='utf-8') as done_list:
done_list.write(f"{key}\n")
logging.info(f"Downloaded and recorded: ({key}) total lines: {lines} time cost: {elapsed_time} s")
time.sleep(10)
# 获取全量代码的历史K线
def fetch_all_by_codes():
# 读取已经下载的代码列表,后续下载时忽略
done_codes = []
if os.path.exists(file_done_codes):
with open(file_done_codes, 'r', encoding='utf-8') as f:
done_codes = [line.strip() for line in f] # 使用strip()去掉每行的换行符和多余的空白
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for key, val in code_id_map_em_df.items():
if key in done_codes:
logging.info(f'Skipping already code. code: ({key})')
continue
succ = True
start_time = time.time() # 在函数执行前获取当前时间
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.error(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
succ = False
else:
# 将 DataFrame 输出为 CSV 文件
curr_dir = f'{dir_his_kline_em}/{val}_{adjust_str}'
create_directory_if_not_exists(curr_dir)
curr_file = f'{curr_dir}/{key}_{adjust_str}_his_data.csv'
stock_zh_a_hist_df.to_csv(curr_file, index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), file: ({curr_file}) lines: ({lines})')
time.sleep(5)
end_time = time.time() # 在函数执行后获取当前时间
elapsed_time = int(end_time - start_time) # 计算时间差,秒
if succ:
# 下载后,记录日志
with open(file_done_codes, 'a', encoding='utf-8') as done_list:
done_list.write(f"{key}\n")
logging.info(f"Downloaded and recorded: ({key}) total lines: {lines} time cost: {elapsed_time} s")
time.sleep(10)
# 从文件获取指定的代码并拉取历史K线废弃
def fetch_parts():
# 读取股票代码列表
codes = read_stock_codes(file_selected_codes)
# 如果没有代码,结束程序
if not codes:
logging.error("没有找到有效的股票代码,程序终止。")
return
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
for key in codes:
val = code_id_map_em_df.get(key)
if val is None:
logging.error(f'cannot find stock code. code: ({key}), adjust: ({adjust_str})')
continue
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.info(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
else:
# 将 DataFrame 输出为 CSV 文件
stock_zh_a_hist_df.to_csv(f'../data/{val}/{key}_{adjust_str}_his_data.csv', index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), lines: ({lines})')
time.sleep(5)
time.sleep(10)
# 获取全量代码的历史K线废弃
def fetch_all():
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
for key, val in code_id_map_em_df.items():
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.error(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
else:
# 将 DataFrame 输出为 CSV 文件
stock_zh_a_hist_df.to_csv(f'../data/{val}/{key}_{adjust_str}_his_data.csv', index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), lines: ({lines})')
time.sleep(5)
time.sleep(10)
# 主函数
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python script.py <cmd>")
print("cmd: all, parts")
sys.exit(1)
cmd = sys.argv[1]
if cmd == "all":
fetch_all_by_codes() # 拉取所有的代码
elif cmd == "parts":
fetch_parts_by_codes() # 拉取指定的代码
elif cmd == "all_other":
fetch_all()
elif cmd == "parts_other":
fetch_parts()
else:
print(f"Unknown command: {cmd}")

View File

@ -0,0 +1,160 @@
"""
Script Name:
Description: 从富途获取历史K线。通过不同变量可以获取不复权、前复权、后复权等数据。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/request-history-kline.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import time
import logging
from futu import *
from datetime import datetime, timedelta
import config
config.setup_logging()
# 连接 MySQL
connection = pymysql.connect(**config.db_config)
# 复权类型,不复权
# selected_autype = AuType.NONE
# selected_table = "hs300_his_kline_none"
# 复权类型,后复权
selected_autype = AuType.HFQ
selected_table = "hs300_his_kline_hfq"
# 复权类型,默认为 AuType.QFQ ,即前复权
# selected_autype = AuType.QFQ
# selected_table = "hs300_qfq_his"
# 获取当前日期
end_date = datetime.now().strftime('%Y-%m-%d')
# 计算 start_date 为当前日期减去10年再加一天
start_date = (datetime.now() - timedelta(days=365*10-1)).strftime('%Y-%m-%d')
# 定义插入数据的函数
def insert_data(connection, data, s_table=selected_table):
try:
with connection.cursor() as cursor:
for index, row in data.iterrows():
sql = f"""
INSERT INTO {s_table} (code, name, time_key, open, close, high, low, pe_ratio, turnover_rate, volume, turnover, change_rate, last_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
open = VALUES(open),
close = VALUES(close),
high = VALUES(high),
low = VALUES(low),
pe_ratio = VALUES(pe_ratio),
turnover_rate = VALUES(turnover_rate),
volume = VALUES(volume),
turnover = VALUES(turnover),
change_rate = VALUES(change_rate),
last_close = VALUES(last_close)
"""
cursor.execute(sql, (
row['code'], row['name'], row['time_key'], row['open'], row['close'],
row['high'], row['low'], row['pe_ratio'], row['turnover_rate'],
row['volume'], row['turnover'], row['change_rate'], row['last_close']
))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting data: {e}")
print(f"Error occurred while inserting data: {e}")
# 获取 hs300 表中的所有股票代码
def get_hs300_codes():
with connection.cursor() as cursor:
cursor.execute("SELECT code FROM hs300 ")
return cursor.fetchall()
def stat_growth(s_autype = selected_autype, s_table = selected_table, s_start = start_date, s_end = end_date):
# 初始化 futu 行情连接
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
hs300_codes = get_hs300_codes()
for code_row in hs300_codes:
code = code_row[0] # 从数据库行中提取 code
# 获取历史 K 线数据,设置分页请求
ret, data, page_req_key = quote_ctx.request_history_kline(code, autype=s_autype, start=s_start, end=s_end, max_count=500)
if ret == RET_OK:
logging.info(f"成功获取 {code} 的第一页数据,共 {len(data)}")
print(f"成功获取 {code} 的第一页数据,共 {len(data)}")
# 插入第一页数据
insert_data(connection, data, s_table)
else:
logging.error(f"获取 {code} 的数据失败: {data}")
print(f"获取 {code} 的数据失败: {data}")
# 分页拉取
while page_req_key is not None:
time.sleep(1) # 休眠 5 秒
ret, data, page_req_key = quote_ctx.request_history_kline(code, autype=s_autype, start=s_start, end=s_end, max_count=500, page_req_key=page_req_key)
if ret == RET_OK:
logging.info(f"成功获取 {code} 的分页数据,共 {len(data)}")
print(f"成功获取 {code} 的分页数据,共 {len(data)}")
# 插入分页数据
insert_data(connection, data, s_table)
else:
logging.error(f"分页数据获取失败: {data}")
print(f"分页数据获取失败: {data}")
# 每次获取完一个股票的数据后,休眠 5 秒
time.sleep(2)
finally:
quote_ctx.close() # 关闭 futu 连接
connection.close() # 关闭 MySQL 连接
def print_help():
print("Usage: python script.py type end_date start_date")
print("type: qfq|hfq|none, default for none")
print('start_date: yyyy-mm-dd, default for end_date - 10 years ')
print('end_date: yyyy-mm-dd, default for current date ')
def main():
type = selected_autype
table = selected_table
start = start_date
end = end_date
args_num = len(sys.argv)
if args_num > 1 :
if sys.argv[1] == 'none':
type = AuType.NONE
table = "hs300_his_kline_none"
elif sys.argv[1] == 'qfq':
type = AuType.QFQ
table = "hs300_qfq_his"
elif sys.argv[1] == 'hfq':
type = AuType.HFQ
table = "hs300_his_kline_hfq"
else:
print_help()
exit(1)
if args_num > 2 :
end = sys.argv[2]
if args_num > 3 :
start = sys.argv[3]
print(f'fetching his kline... type: {type}, table: {table}, start: {start}, end: {end}\n\n')
stat_growth(type, table, start, end)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,78 @@
"""
Script Name:
Description: 从富途获取历史K线。通过不同变量可以获取不复权、前复权、后复权等数据。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/get-market-snapshot.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import pandas as pd
from futu import *
import logging
import config
# 设置日志
config.setup_logging()
logger = logging.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**config.db_config)
# 从sp300表中获取所有股票代码
def fetch_sp300_codes(connection):
query = "SELECT code FROM hs300"
return pd.read_sql(query, connection)
# 获取市场快照并保存到 CSV 文件
def get_market_snapshot_and_save_to_csv(stock_codes, output_file):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_market_snapshot(stock_codes)
if ret == RET_OK:
logger.info(f"Successfully fetched market snapshot for {len(stock_codes)} codes.")
# 将数据写入CSV文件
data.to_csv(output_file, index=False)
logger.info(f"Snapshot data saved to {output_file}")
else:
logger.error(f"Error fetching market snapshot: {data}")
quote_ctx.close()
# 主函数
def main():
try:
# 连接数据库
connection = connect_to_db()
# 从 sp300 表中获取所有的股票代码
sp300_codes_df = fetch_sp300_codes(connection)
# 提取股票代码列表
stock_codes = sp300_codes_df['code'].tolist()
if not stock_codes:
logger.warning("No stock codes found in sp300 table.")
return
# 获取市场快照并保存到 CSV 文件
output_file = "./result/market_snapshot.csv"
get_market_snapshot_and_save_to_csv(stock_codes, output_file)
except Exception as e:
logger.error(f"An error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,97 @@
"""
Script Name:
Description: 从富途获取板块列表。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/get-plate-list.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import time
from futu import *
import pymysql
from datetime import datetime
import logging
import config
# 市场映射
market_mapping = {
'US': Market.US,
'HK': Market.HK,
'SZ': Market.SZ,
'SH': Market.SH
}
# 设置日志
config.setup_logging()
# 板块集合类型映射
plat_mapping = {
'INDUSTRY': Plate.INDUSTRY,
'ALL': Plate.ALL,
'CONCEPT': Plate.CONCEPT
}
# 建立 MySQL 连接
connection = pymysql.connect(**config.db_config)
# 定义插入或更新函数
def insert_or_update_data(connection, data, market, plat):
try:
with connection.cursor() as cursor:
for index, row in data.iterrows():
code = row['code']
plate_name = row['plate_name']
plate_id = row['plate_id']
up_date = datetime.now().strftime('%Y-%m-%d') # 当前日期
# MySQL 插入或更新语句
sql = """
INSERT INTO futu_plat_list (code, plate_name, plate_id, market, plat, up_date)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
plate_name = VALUES(plate_name),
plate_id = VALUES(plate_id),
market = VALUES(market),
up_date = VALUES(up_date)
"""
cursor.execute(sql, (code, plate_name, plate_id, market, plat, up_date))
# 提交事务
connection.commit()
except pymysql.MySQLError as e:
# 捕获 MySQL 错误并打印日志
print(f"Error occurred while inserting/updating data for market {market}, plat {plat}: {e}")
# 可根据需要记录到文件或其他日志工具
# 初始化行情连接
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
# 双重循环:市场和板块类型
for market in market_mapping:
for plat_name, plat_enum in plat_mapping.items():
# 拉取数据
ret, data = quote_ctx.get_plate_list(market_mapping[market], plat_enum)
if ret == RET_OK:
row_count = len(data) # 获取行数
print(f"成功获取 {market} 市场的 {plat_name} 板块数据,共 {row_count}")
# 插入或更新数据到 MySQL
insert_or_update_data(connection, data, market, plat_name)
else:
print(f"获取 {market} 市场的 {plat_name} 板块数据失败: {data}")
# 每次循环后休眠10秒
time.sleep(10)
finally:
quote_ctx.close() # 关闭行情连接
connection.close() # 关闭 MySQL 连接

View File

@ -0,0 +1,135 @@
"""
Script Name:
Description: 从yahoo获取美股股票的历史K线, 通过 auto_adjust 参数来控制是否获取前复权数据。默认为true, 如果设置为false, 那么结果中会自动带 adj Close.
参考地址: https://github.com/ranaroussi/yfinance
https://aroussi.com/post/python-yahoo-finance
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import yfinance as yf
import pymysql
import logging
import time
import sys
import os
from datetime import datetime
import config # 引入 config.py 中的配置
# 股票代码集合,如果属于这些股票,则使用 "max" 时间段
special_stock_codes = ('ABNB', 'CARR', 'CEG', 'GEHC', 'GEV', 'HUBB', 'KVUE', 'OTIS', 'PLTR', 'SOLV', 'VLTO')
# K线调整选项决定是否使用前复权价格
kline_adjust = True
# 根据 kline_adjust 决定使用的表名
table_name = 'sp500_qfq_his_202410' if kline_adjust else 'sp500_his_kline_none'
# 使用 config.py 中的日志配置
config.setup_logging()
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
#return pymysql.connect(**config.db_config)
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor # 确保使用字典形式的游标
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从MySQL读取sp500表中的股票代码和名称
def fetch_sp500_codes():
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
cursor.execute("SELECT code_inner, code_name FROM sp500")
codes = cursor.fetchall()
return codes
finally:
db.close()
# 插入数据到指定表名
def insert_stock_data_to_db(data, code, name):
try:
db = connect_to_db()
if db is None:
return
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {table_name} (time_key, open, high, low, close, adj_close, volume, dividends, stock_splits, code, name)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
open = VALUES(open),
high = VALUES(high),
low = VALUES(low),
close = VALUES(close),
adj_close = VALUES(adj_close),
volume = VALUES(volume),
dividends = VALUES(dividends),
stock_splits = VALUES(stock_splits)
"""
# auto_adjust=True: Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
# auto_adjust=False: Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
for index, row in data.iterrows():
time_key = index.strftime('%Y-%m-%d %H:%M:%S')
# 判断 row['Adj Close'] 是否存在,若不存在则使用 0
adj_close = row['Adj Close'] if 'Adj Close' in row else 0
values = (time_key, row['Open'], row['High'], row['Low'], row['Close'], adj_close, row['Volume'], row['Dividends'], row['Stock Splits'], code, name)
cursor.execute(insert_query, values)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error occurred while inserting data: {e}", exc_info=True)
finally:
if db:
db.close()
# 拉取股票的历史数据
def fetch_and_store_stock_data():
codes = fetch_sp500_codes()
for row in codes:
code_inner = row['code_inner']
code_name = row['code_name']
logger.info(f"Fetching data for {code_name} ({code_inner})...")
# 判断使用的时间段,特殊股票使用 max其他使用 10y
period = "max" if code_inner in special_stock_codes else "10y"
try:
stock = yf.Ticker(code_inner)
# 拉取股票历史数据,使用 kline_adjust 决定 auto_adjust 是否为 True
hist_data = stock.history(period=period, auto_adjust=kline_adjust)
if not hist_data.empty:
logger.info(f"Inserting data for {code_name} ({code_inner}) into {table_name}...")
insert_stock_data_to_db(hist_data, code_inner, code_name)
else:
logger.warning(f"No data found for {code_name} ({code_inner})")
# 每次请求完后休眠3秒
time.sleep(3)
except Exception as e:
logger.error(f"Error fetching data for {code_name} ({code_inner}): {e}", exc_info=True)
if __name__ == "__main__":
fetch_and_store_stock_data()

View File

@ -0,0 +1,218 @@
"""
Script Name: stat_adjust_kline.py
Description: 根据从数据源获取的历史K线计算股票的前复权和后复权值.
注意:
本程序只适合沪深300的前复权和后复权计算使用的是富途提供的不复权数据和复权因子。
处理sp500有问题yahoo 提供的不复权收据 + 富途提供的复权因子计算前复权与yahoo提供的前复权数据差别很大。
尚未找到原因所以目前只能用hs300
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import sys
import time
import config
import argparse
# 配置不同市场的表名管理
tables_mapping = {
'sp500': {
'none_his_kline': 'sp500_his_kline_none',
'adjust_his_kline': 'sp500_ajust_kline_202410',
'rehab_table': 'futu_rehab'
},
'hs300': {
'none_his_kline': 'hs300_his_kline_none',
'adjust_his_kline': 'hs300_ajust_kline_202410',
'rehab_table': 'futu_rehab'
}
}
# 日志配置
config.setup_logging()
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从指定市场表中读取code和code_name字段
def fetch_codes_from_market_table(market, debug=False):
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
query = f"SELECT code, code_name FROM {market} "
if debug:
#query += " LIMIT 2"
query += " where code in ('GE', 'WTW')"
cursor.execute(query)
codes = cursor.fetchall()
return codes
finally:
db.close()
# 读取复权因子数据
def fetch_rehab_data(db, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT ex_div_date, forward_adj_factorA, forward_adj_factorB, backward_adj_factorA, backward_adj_factorB FROM futu_rehab WHERE code = %s ORDER BY ex_div_date DESC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching rehab data for {code}: {e}", exc_info=True)
return []
# 读取不复权的股票价格数据
def fetch_kline_none_data(db, table_name, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT code, time_key, open, close FROM {table_name} WHERE code = %s ORDER BY time_key ASC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching kline none data for {code}: {e}", exc_info=True)
return []
# 插入前后复权价格到数据库
def insert_hfq_data(db, hfq_data, hfq_table):
try:
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {hfq_table} (code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
hfq_open = VALUES(hfq_open),
hfq_close = VALUES(hfq_close),
qfq_open = VALUES(qfq_open),
qfq_close = VALUES(qfq_close),
none_open = VALUES(none_open),
none_close = VALUES(none_close)
"""
cursor.executemany(insert_query, hfq_data)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error inserting hfq data: {e}", exc_info=True)
# 计算后复权价格和前复权价格
def calculate_hfq_qfq_price(market, debug=False):
db = connect_to_db()
if db is None:
return
# 获取表名
table_names = tables_mapping[market]
none_his_kline_table = table_names['none_his_kline']
adjust_kline_table = table_names['adjust_his_kline']
# 获取股票代码
codes = fetch_codes_from_market_table(market, debug)
isSP500 = True if market == 'sp500' else False
for row in codes:
code = row['code']
name = row['code_name']
# 如果是 sp500 市场,拼接 'US.' + code
rehab_code = code
if isSP500:
rehab_code = 'US.' + code
logger.info(f"Processing {code} ({name})...")
# 获取复权因子数据,没有的话就直接使用不复权值
rehab_res = fetch_rehab_data(db, rehab_code)
if not rehab_res:
logger.warning(f"No rehab data found for {code}, use non close")
rehab_res = list()
# 反转复权因子行,为了计算前复权
rehab_res_asc = list(reversed(rehab_res))
# 获取不复权的价格数据
kline_none = fetch_kline_none_data(db, none_his_kline_table, code)
if not kline_none:
logger.warning(f"No kline none data found for {code}")
continue
hfq_data = []
# 遍历kline_none计算前后复权价格
for kline_row in kline_none:
none_open = kline_row['open']
none_close = kline_row['close']
time_key = kline_row['time_key']
# 将 time_key 转换为 date 格式
time_key_date = time_key.date()
# 计算后复权价格
hfq_open = none_open
hfq_close = none_close
tmp_close = none_close
tmp_open = none_open
for rehab_row in rehab_res:
# Yahoo Finance 提供的不复权数据,已经处理了拆股;所以对于富途复权因子表里的拆股数据,要忽略
if isSP500 and rehab_row['backward_adj_factorA'] != 1 :
continue
if rehab_row['ex_div_date'] <= time_key_date:
hfq_close = (tmp_close * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
hfq_open = (tmp_open * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
tmp_close = hfq_close
tmp_open = hfq_open
# 计算前复权价格
qfq_close = none_close
qfq_open = none_open
tmp_close = none_close
tmp_open = none_open
for rehab_row in rehab_res_asc:
# Yahoo Finance 提供的不复权数据,已经处理了拆股;所以对于富途复权因子表里的拆股和合股数据,要忽略
if isSP500 and rehab_row['backward_adj_factorA'] != 1 :
continue
# 富途对美股的前复权价格计算,要忽略 forward_adj_factorB
# https://openapi.futunn.com/futu-api-doc/qa/quote.html
factorB = 0 if isSP500 else rehab_row['forward_adj_factorB']
if rehab_row['ex_div_date'] > time_key_date:
qfq_close = (tmp_close * rehab_row['forward_adj_factorA']) + factorB
qfq_open = (tmp_open * rehab_row['forward_adj_factorA']) + factorB
tmp_close = qfq_close
tmp_open = qfq_open
# 保存计算后的复权价格
hfq_data.append((code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close))
# 插入后复权价格数据
insert_hfq_data(db, hfq_data, adjust_kline_table)
logger.info(f"Inserted HFQ/QFQ data for {code} ({name})")
time.sleep(1)
db.close()
if __name__ == "__main__":
# 命令行参数处理
parser = argparse.ArgumentParser(description='Calculate HFQ and QFQ Prices for Market')
parser.add_argument('--market', type=str, default='hs300', help='Market to process (sp500 or hs300)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
args = parser.parse_args()
# 调用主函数
calculate_hfq_qfq_price(args.market, args.debug)

271
src/static/stat_growth.py Normal file
View File

@ -0,0 +1,271 @@
"""
Script Name:
Description: 获取沪深300成分股的最新股价, 并计算年内涨幅, 924以来的涨幅, 市盈率, 股息率等。
需要调用futu的获取快照接口。
https://openapi.futunn.com/futu-api-doc/quote/get-market-snapshot.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import csv
import os
import config
import time
from datetime import datetime
from futu import OpenQuoteContext, RET_OK # Futu API client
# 配置日志
config.setup_logging()
# 1. 获取 hs300 表数据
def get_hs300_data():
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
cursor.execute("SELECT code, code_name FROM hs300")
hs300_data = cursor.fetchall()
cursor.close()
conn.close()
return hs300_data
# 2. 批量获取市场快照
def get_market_snapshots(codes):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111) # 替换为实际Futu API地址和端口
snapshot_results = []
batch_size = 350
for i in range(0, len(codes), batch_size):
code_batch = codes[i:i+batch_size]
ret, data = quote_ctx.get_market_snapshot(code_batch)
if ret == RET_OK:
snapshot_results.extend(data.to_dict('records'))
else:
logging.error(f"获取市场快照失败: {data}")
quote_ctx.close()
return snapshot_results
# 3. 插入或更新 futu_market_snapshot 表
def insert_or_update_snapshot(snapshot_data):
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor()
query = """
INSERT INTO futu_market_snapshot (
code, name, update_time, last_price, open_price, high_price, low_price, prev_close_price,
volume, turnover, turnover_rate, suspension, listing_date, equity_valid, issued_shares,
total_market_val, net_asset, net_profit, earning_per_share, outstanding_shares, net_asset_per_share,
circular_market_val, ey_ratio, pe_ratio, pb_ratio, pe_ttm_ratio, dividend_ttm, dividend_ratio_ttm,
dividend_lfy, dividend_lfy_ratio
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
last_price = VALUES(last_price), open_price = VALUES(open_price), high_price = VALUES(high_price),
low_price = VALUES(low_price), prev_close_price = VALUES(prev_close_price), volume = VALUES(volume),
turnover = VALUES(turnover), turnover_rate = VALUES(turnover_rate), suspension = VALUES(suspension),
issued_shares = VALUES(issued_shares), total_market_val = VALUES(total_market_val), net_asset = VALUES(net_asset),
net_profit = VALUES(net_profit), earning_per_share = VALUES(earning_per_share), outstanding_shares = VALUES(outstanding_shares),
net_asset_per_share = VALUES(net_asset_per_share), circular_market_val = VALUES(circular_market_val),
ey_ratio = VALUES(ey_ratio), pe_ratio = VALUES(pe_ratio), pb_ratio = VALUES(pb_ratio),
pe_ttm_ratio = VALUES(pe_ttm_ratio), dividend_ttm = VALUES(dividend_ttm), dividend_ratio_ttm = VALUES(dividend_ratio_ttm),
dividend_lfy = VALUES(dividend_lfy), dividend_lfy_ratio = VALUES(dividend_lfy_ratio)
"""
try:
for record in snapshot_data:
cursor.execute(query, (
record['code'], record['name'], record['update_time'], record['last_price'], record['open_price'],
record['high_price'], record['low_price'], record['prev_close_price'], record['volume'], record['turnover'],
record['turnover_rate'], record['suspension'], record['listing_date'], record['equity_valid'], record['issued_shares'],
record['total_market_val'], record['net_asset'], record['net_profit'], record['earning_per_share'], record['outstanding_shares'],
record['net_asset_per_share'], record['circular_market_val'], record['ey_ratio'], record['pe_ratio'], record['pb_ratio'],
record['pe_ttm_ratio'], record['dividend_ttm'], record['dividend_ratio_ttm'], record['dividend_lfy'], record['dividend_lfy_ratio']
))
conn.commit()
except Exception as e:
logging.error(f"插入或更新快照数据时出错: {e}")
conn.rollback()
finally:
cursor.close()
conn.close()
def process_snapshot_data():
hs300_data = get_hs300_data()
codes = [item['code'] for item in hs300_data]
# 分批拉取市场快照数据
snapshot_data = get_market_snapshots(codes)
# 插入或更新数据到数据库
insert_or_update_snapshot(snapshot_data)
logging.info(f"Successfully get market snapshots and write to db.")
def write_to_csv(file_path, fieldnames, data):
"""
Writes data to a CSV file.
:param file_path: Path to the CSV file.
:param fieldnames: A list of field names (headers) for the CSV file.
:param data: A list of dictionaries where each dictionary represents a row.
"""
try:
# Check if file exists to determine if we need to write headers
file_exists = os.path.isfile(file_path)
# Open file in append mode ('a' means append, newline='' is needed for correct CSV formatting)
with open(file_path, mode='w+', newline='', encoding='utf-8') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
# Write headers
writer.writeheader()
# Write the data rows
writer.writerows(data)
logging.info(f"Successfully wrote data to {file_path}")
except Exception as e:
logging.error(f"Error while writing to CSV file {file_path}: {str(e)}")
raise
def calculate_yield():
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
hs300_data = get_hs300_data()
results = []
for item in hs300_data:
code = item['code']
name = item['code_name']
# 1. 获取最近快照数据
cursor.execute("SELECT * FROM futu_market_snapshot WHERE code=%s ORDER BY update_time DESC LIMIT 1", (code,))
row1 = cursor.fetchone()
row1['close'] = row1['last_price']
row1['time_key'] = row1['update_time']
# 2. 获取 2024-01-01 的年初数据
cursor.execute("SELECT * FROM hs300_his_kline_none WHERE code=%s AND time_key<'2024-01-01' ORDER BY time_key DESC LIMIT 1", (code,))
row2 = cursor.fetchone()
# 3. 获取 2024-09-24 的初始数据
cursor.execute("SELECT * FROM hs300_his_kline_none WHERE code=%s AND time_key<'2024-09-24' ORDER BY time_key DESC LIMIT 1", (code,))
row3 = cursor.fetchone()
# 3.1 获取 2021-01-01 之后的历史数据中最大值(前复权)
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY close DESC LIMIT 1", (code,))
row4 = cursor.fetchone()
# 3.2 获取 2021-01-01 之后的历史数据中最小值(前复权)
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY close ASC LIMIT 1", (code,))
row5 = cursor.fetchone()
# 3.4 获取 2021-01-01 之后的历史数据中pe最大值
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY pe_ratio DESC LIMIT 1", (code,))
row6 = cursor.fetchone()
# 3.5 获取 2021-01-01 之后的历史数据中pe最小值
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY pe_ratio ASC LIMIT 1", (code,))
row7 = cursor.fetchone()
# 3.6 获取 2024-10-08 的收盘数据
cursor.execute("SELECT * FROM hs300_his_kline_none WHERE code=%s AND time_key<'2024-10-09' ORDER BY time_key DESC LIMIT 1", (code,))
row8 = cursor.fetchone()
# 4. 读取复权数据
cursor.execute("SELECT * FROM futu_rehab WHERE code=%s ORDER BY ex_div_date ASC", (code,))
rehab_res = cursor.fetchall()
# 5. 计算复权价格
for row in [row1, row2, row3, row8]:
qfq_close = row['close']
time_key_date = row['time_key'].date() # 将 datetime 转换为 date
for rehab in rehab_res:
if rehab['ex_div_date'] > time_key_date:
qfq_close = (qfq_close * rehab['forward_adj_factorA']) + rehab['forward_adj_factorB']
row['qfq_close'] = qfq_close
# 6. 计算收益率
year_yield = row1['qfq_close'] / row2['qfq_close'] - 1 if row1 and row2 else None
yield_0924 = row1['qfq_close'] / row3['qfq_close'] - 1 if row3 and row1 else None
yield_1008 = row1['qfq_close'] / row8['qfq_close'] - 1 if row8 and row1 else None
# 6.1 计算当前股价是 2021 年之后最高点及最低点的百分比
max_price_pct = row1['qfq_close'] / row4['close'] if row1 and row4 and row4['close'] !=0 else None
max_price_pe_pct = row1['pe_ttm_ratio'] / row4['pe_ratio'] if row1 and row4 and row4['pe_ratio'] !=0 else None
min_price_pct = row1['qfq_close'] / row5['close'] if row1 and row5 and row5['close'] !=0 else None
min_price_pe_pct = row1['pe_ttm_ratio'] / row5['pe_ratio'] if row1 and row5 and row5['pe_ratio'] !=0 else None
max_pe_pct = row1['pe_ttm_ratio'] / row6['pe_ratio'] if row1 and row6 and row6['pe_ratio'] !=0 else None
min_pe_pct = row1['pe_ttm_ratio'] / row7['pe_ratio'] if row1 and row7 and row7['pe_ratio'] !=0 else None
# 7. 收集结果
result = {
'code': code,
'name': name,
'year_begin_date': row2['time_key'].date(),
'year_begin_close': round(row2['qfq_close'], 4),
'0924_date': row3['time_key'].date(),
'0924_close': round(row3['qfq_close'],4),
'1008_date': row8['time_key'].date(),
'1008_close': round(row8['qfq_close'], 4),
'max_price_date': row4['time_key'].date(),
'max_price': round(row4['close'], 4),
'max_price_pe': row4['pe_ratio'],
'max_pe_date': row6['time_key'].date(),
'max_pe': row6['pe_ratio'],
'min_price_date': row5['time_key'].date(),
'min_price': round(row5['close'], 4),
'min_price_pe': row5['pe_ratio'],
'min_pe_date': row7['time_key'].date(),
'min_pe': row7['pe_ratio'],
'latest_date': row1['time_key'].date(),
'latest_close': round(row1['qfq_close'], 4),
'year_yield': round(year_yield, 4),
'yield_0924': round(yield_0924, 4),
'yield_1008': round(yield_1008, 4),
'total_market_val': row1.get('total_market_val', None),
'pe_ttm_ratio': row1.get('pe_ttm_ratio', None),
'dividend_ratio_ttm': row1.get('dividend_ratio_ttm', None),
'dividend_lfy_ratio': row1.get('dividend_lfy_ratio', None),
'max_price_pct': max_price_pct,
'max_price_pe_pct': max_price_pe_pct,
'min_price_pct': min_price_pct,
'min_price_pe_pct': min_price_pe_pct,
'max_pe_pct': max_pe_pct,
'min_pe_pct': min_pe_pct,
#'price_pe': round(max_price_pct - max_price_pe_pct,4)
}
results.append(result)
logging.info(f"{result}")
time.sleep(0.1)
# 写入CSV
# 指定字段名称
# 获取当前日期 格式化为 yyyymmdd
current_date = datetime.now()
date_string = current_date.strftime('%Y%m%d')
fieldnames = ['code', 'name', 'year_begin_date', 'year_begin_close', '0924_date', '0924_close', '1008_date', '1008_close',
'max_price_date', 'max_price', 'max_price_pe', 'max_pe_date', 'max_pe', 'min_price_date', 'min_price', 'min_price_pe', 'min_pe_date', 'min_pe',
'latest_date', 'latest_close', 'year_yield', 'yield_0924', 'yield_1008', 'total_market_val', 'pe_ttm_ratio', 'dividend_ratio_ttm', 'dividend_lfy_ratio',
'max_price_pct', 'max_price_pe_pct', 'min_price_pct', 'min_price_pe_pct', 'max_pe_pct', 'min_pe_pct'
]
write_to_csv(f'../result/stat_growth{date_string}.csv', fieldnames, results)
except Exception as e:
logging.error(f"计算收益率时出错: {e}")
finally:
cursor.close()
conn.close()
if __name__ == "__main__":
process_snapshot_data()
calculate_yield()

View File

@ -0,0 +1,350 @@
"""
Script Name:
Description: 获取沪深300成分股的最新股价, 并计算年内涨幅, 924以来的涨幅, 市盈率, 股息率等。
调用em历史数据接口。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import csv
import os
import time
from datetime import datetime
from futu import OpenQuoteContext, RET_OK # Futu API client
from futu import *
import argparse
import src.crawling.stock_hist_em as his_em
import src.logger.logger as logger
import src.config.config as config
# 配置日志
logger.setup_logging()
current_date = datetime.now().strftime("%Y%m%d")
current_year = datetime.now().strftime("%Y")
res_dir = config.global_stock_data_dir
# 刷新代码列表,并返回
def flush_code_map():
code_id_map_em_df = his_em.code_id_map_em()
print(code_id_map_em_df)
return code_id_map_em_df
# 获取历史K线如果失败就重试
def fetch_with_retry(code: str, s_date, e_date, adjust: str = '', max_retries: int = 3) -> pd.DataFrame :
retries = 0
while retries < max_retries:
try:
# 调用 stock_zh_a_hist 获取历史数据
df = his_em.stock_zh_a_hist(
symbol=code,
period="daily",
start_date=s_date,
end_date=e_date,
adjust=adjust,
)
# 如果获取到的数据为空,记录日志并重试
if df.empty:
logging.info(f'{code} empty data. retry...')
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
else:
return df
except Exception as e:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
return pd.DataFrame()
# 获取所有市场的当年股价快照,带重试机制。
def fetch_snap_all(max_retries: int = 3) -> pd.DataFrame:
market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"}
result = pd.DataFrame()
for market_id, fs in market_fs.items():
retries = 0
while retries < max_retries:
try:
df = his_em.stock_zh_a_spot_em(fs)
# 如果获取到的数据为空,记录日志并重试
if df.empty:
logging.warning(f'{market_id} empty data. retry...')
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
else:
print(f'get {market_id} stock snapshot. stock count: {len(df)}')
result = pd.concat([result, df], ignore_index=True)
break
except Exception as e:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
if retries >= max_retries:
logging.warning(f'{market_id} fetching error.')
return result
# 从数据库中读取指定指数的成分股
def load_index_codes():
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
#沪深300
#cursor.execute("SELECT code, code_name FROM index_hs where index_code='000300' ")
#中证A500
#cursor.execute("SELECT code, code_name FROM index_hs where index_code='000510' ")
#沪深300和中证A500的并集去重
#cursor.execute("SELECT DISTINCT CONCAT('index-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ")
#沪深300和中证A500的合并不去重
cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ")
#沪深300、中证A500、中证A50、科创芯片、科创创业50不去重
#cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510', '930050', '000685', '931643') ")
hs300_data = cursor.fetchall()
#港股国企指数成分股、恒生科技指数成分股等
cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hk where index_code IN ('HSCEI', 'HSTECH') ")
hk_data = cursor.fetchall()
#美股中概股等
cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_us where index_code IN ('CN_US') ")
us_data = cursor.fetchall()
cursor.close()
conn.close()
return hs300_data + hk_data + us_data
# 读取富途自选股的指定分类股
def load_futu_all_codes():
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
stock_data = []
ret, data = quote_ctx.get_user_security("全部")
if ret == RET_OK:
if data.shape[0] > 0: # 如果自选股列表不为空
stock_data = [{'code': row['code'], 'code_name': row['name']} for _, row in data.iterrows() if row['stock_type'] == 'STOCK']
#stock_data = [{'code': row['code'], 'code_name': row['name']} for _, row in data.iterrows()]
else:
logging.error('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽
return stock_data
# 获取特定的行
def get_specific_date_row(data, date):
"""获取特定日期的行"""
for row in data:
if row['日期'] == date:
return row
return None
# 获取股票数据,并统计收益率
def calculate_stock_statistics(market, code, code_name):
try:
# 获取当前日期(用于比较)
last_year = datetime.now().year - 1
last_year_str = str(last_year)
# 获取历史数据
data = fetch_with_retry(code, "20210101", current_date, 'qfq')
if data.empty:
logging.warning(f'{code}, {code_name} has no data. skipping...')
return None
# 获取当前日期的股价
current_row = data.loc[data['日期'].idxmax()]
# 默认行如果该股票没有年初股价0923股价1008股价等以此记录代替
defaut_row = data.loc[data['日期'].idxmin()]
# 获取年初股价,也就是上一年的最后一个交易日的收盘价
year_data = data[data['日期'].str.startswith(last_year_str)]
if year_data.empty:
logging.warning(f"{code}, {code_name} 未找到上一年的数据 ({last_year_str}), 以 {defaut_row['日期']} 的数据来代替")
year_begin_row = defaut_row
else:
year_begin_row = year_data.loc[year_data['日期'].idxmax()]
# 获取0923收盘价
try:
row_0923 = data[data['日期'] == '2024-09-23'].iloc[0]
except IndexError:
logging.warning(f"{code}, {code_name} 未找到0923的数据, 以 {defaut_row['日期']} 的数据来代替")
row_0923 = defaut_row
# 获取0930收盘价
try:
row_0930 = data[data['日期'] == '2024-09-30'].iloc[0]
except IndexError:
logging.warning(f"{code}, {code_name} 未找到0930的数据, 以 {defaut_row['日期']} 的数据来代替")
row_0930 = defaut_row
# 获取1008开盘价、收盘价
try:
row_1008 = data[data['日期'] == '2024-10-08'].iloc[0]
except IndexError:
logging.warning(f"{code}, {code_name} 未找到1008的数据, 以 {defaut_row['日期']} 的数据来代替")
row_1008 = defaut_row
# 获取2021年以来的最高价
max_close_row = data.loc[data['收盘'].idxmax()]
# 获取2021年以来的最低价
min_close_row = data.loc[data['收盘'].idxmin()]
# 获取年内的最高价、最低价
year_data = data[data['日期'].str.startswith(current_year)]
if year_data.empty:
logging.warning(f"{code}, {code_name} 未找到年内的数据, 以 {defaut_row['日期']} 的数据来代替")
year_min_row = defaut_row
year_max_row = defaut_row
else:
year_min_row = year_data.loc[year_data['收盘'].idxmin()]
year_max_row = year_data.loc[year_data['收盘'].idxmax()]
# 计算统计数据
try:
year_increase = (current_row['收盘'] / year_begin_row['收盘'] - 1)
growth_0923 = (current_row['收盘'] / row_0923['收盘'] - 1)
growth_0930 = (current_row['收盘'] / row_0930['收盘'] - 1)
growth_1008 = (current_row['收盘'] / row_1008['收盘'] - 1)
growth_1008_open = (current_row['收盘'] / row_1008['开盘'] - 1)
year_amplitude = (year_max_row['收盘'] / year_min_row['收盘'] - 1)
max_amplitude = (max_close_row['收盘'] / min_close_row['收盘'] - 1)
stock_recovery = (current_row['收盘'] / max_close_row['收盘'] - 1)
except ZeroDivisionError:
logging.error(f"股票 {code} 计算时遇到除零错误")
return None
# 组织结果
result = [
market,
code,
code_name,
current_row['日期'], current_row['收盘'],
year_begin_row['日期'], year_begin_row['收盘'],
row_0923['日期'], row_0923['收盘'] ,
row_0930['日期'], row_0930['收盘'] ,
row_1008['日期'], row_1008['开盘'] ,row_1008['收盘'] ,
max_close_row['日期'], max_close_row['收盘'],
min_close_row['日期'], min_close_row['收盘'],
year_max_row['日期'], year_max_row['收盘'],
year_min_row['日期'], year_min_row['收盘'],
year_increase,
growth_0923 if growth_0923 is not None else 'N/A',
growth_0930 if growth_0930 is not None else 'N/A',
growth_1008 if growth_1008 is not None else 'N/A',
growth_1008_open if growth_1008_open is not None else 'N/A',
year_amplitude,
max_amplitude,
stock_recovery
]
return result
except Exception as e:
logging.error(f"处理股票 {code} 时出错: {e}")
return None
# 写入到文件中
def write_to_csv(results, filename):
"""将所有结果写入CSV文件"""
try:
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
# 写入表头
writer.writerow([
"股市", "股票代码", "股票名称", "当前日期", "当前收盘", "年初日期", "年初收盘",
"0923日期", "0923收盘", "0930日期", "0930收盘", "1008日期", "1008开盘", "1008收盘",
"最高日期", "最高收盘", "最低日期", "最低收盘", "年内最高日期", "年内最高收盘", "年内最低日期", "年内最低收盘", "年内涨幅",
"相比0923收盘价涨幅", "相比0930收盘价涨幅", "相比1008收盘价涨幅", "相比1008开盘价涨幅",
"年内振幅", "最大振幅", "股价自最高点恢复", "市盈率TTM", "市净率", "总市值"
])
# 写入每行数据
for result in results:
writer.writerow(result)
except Exception as e:
logging.error(f"写入CSV文件时出错: {e}")
# 主函数,执行逻辑
def main(list, debug):
futu_codes = []
index_codes = []
if list == 'futu':
futu_codes = load_futu_all_codes()
elif list == 'all':
futu_codes = load_futu_all_codes()
index_codes = load_index_codes()
else:
index_codes = load_index_codes()
codes = futu_codes + index_codes
all_results = []
# 获取快照数据,并保存到文件
snap_data = fetch_snap_all()
if snap_data.empty:
logging.error(f"fetching snapshot data error!")
return
file_name = f'{res_dir}/snapshot_em_{current_date}.csv'
snap_data.to_csv(file_name, index=False, encoding='utf-8')
logging.info(f"市场快照数据已经写入 CSV 文件 {file_name}\n\n")
for item in codes:
code = item['code']
code_name = item['code_name']
# 清理股票代码中的前缀
try:
market, clean_code = code.split(".")
except ValueError:
logging.error(f"wrong format code: {code}")
logging.info(f"正在处理股票 {market}.{clean_code}, {code_name}...")
result = calculate_stock_statistics(market, clean_code, code_name)
if result:
match = snap_data.loc[snap_data['代码'] == clean_code]
if not match.empty: # 如果找到了匹配项
result.append(match['市盈率TTM'].iloc[0])
result.append(match['市净率'].iloc[0])
result.append(match['总市值'].iloc[0])
else:
logging.warning(f'{market}.{clean_code} has no snapshot data.')
all_results.append(result)
if debug:
break
if all_results:
file_name = f'{res_dir}/stock_statistics_{list}_{current_date}'
if debug:
file_name = f'{file_name}_debug'
file_name = f'{file_name}.csv'
write_to_csv(all_results, f'{file_name}')
logging.info(f"统计结果已写入 CSV 文件 {file_name}")
else:
logging.warning("没有可写入的统计数据")
if __name__ == "__main__":
# 命令行参数处理
parser = argparse.ArgumentParser(description='计算指定股票的区间收益率')
parser.add_argument('--list', type=str, default='futu', help='Stocklist to process (futu , index, all)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
args = parser.parse_args()
# 调用主函数
#flush_code_map()
main(args.list, args.debug)

View File

@ -0,0 +1,125 @@
"""
Script Name:
Description: 根据yahoo提供的不复权数据和分红及拆股数据来计算前复权和后复权数据。
注意:
结果对不上!
按照yahoo规则不复权数据已经处理了拆股所以只要把分红加上去就行但处理结果与它返回的前复权数据仍然对不对上
有些比如AAPL差不多可以对得上但对于KDP等差异甚大找不到原因。。
所以,这个程序暂时无法使用。。。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import pandas as pd
import logging
import os
import time
import config
# 设置日志
config.setup_logging()
logger = logging.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**config.db_config)
# 读取 sp500 表中的所有行,获取 code 和 name
def fetch_sp500_codes(connection):
query = "SELECT code, code_name as name FROM sp500 "
return pd.read_sql(query, connection)
# 读取 sp500_his_kline_none 表中的数据并按 time_key 降序排列
def fetch_sp500_his_kline_none(connection, code):
query = f"SELECT * FROM sp500_his_kline_none WHERE code = '{code}' ORDER BY time_key DESC"
return pd.read_sql(query, connection)
# 将计算结果插入到 sp500_ajust_kline_202410 表中
def insert_adjusted_kline_data(connection, data):
try:
with connection.cursor() as cursor:
insert_query = """
INSERT INTO sp500_ajust_kline_202410 (code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
hfq_open = VALUES(hfq_open),
hfq_close = VALUES(hfq_close),
qfq_open = VALUES(qfq_open),
qfq_close = VALUES(qfq_close),
none_open = VALUES(none_open),
none_close = VALUES(none_close)
"""
cursor.executemany(insert_query, data)
connection.commit()
except Exception as e:
logger.error(f"Error inserting data: {e}")
# 计算前复权和后复权的价格,并插入到 sp500_ajust_kline_202410
def process_and_insert_adjusted_kline(connection, code, name, result_none):
dividends_qfq = 0
dividends_hfq = 0
dividends_total = result_none['dividends'].sum()
adjusted_data = []
for index, row in result_none.iterrows():
# 计算前复权和后复权的开盘价和收盘价
qfq_close = row['close'] - dividends_qfq
qfq_open = row['open'] - dividends_qfq
hfq_close = row['close'] + dividends_hfq
hfq_open = row['open'] + dividends_hfq
adjusted_data.append((
row['code'], row['name'], row['time_key'],
hfq_open, hfq_close, qfq_open, qfq_close,
row['open'], row['close']
))
dividends_qfq += row['dividends']
dividends_hfq = dividends_total - dividends_qfq
# 插入到 sp500_ajust_kline_202410 表中
insert_adjusted_kline_data(connection, adjusted_data)
logger.info(f"Successfully processed and inserted data for code {code}")
# 主函数
def main():
try:
connection = connect_to_db()
# 读取 sp500 表中的所有行,得到 code 和 name 字段
sp500_codes = fetch_sp500_codes(connection)
for index, row in sp500_codes.iterrows():
code = row['code']
name = row['name']
logger.info(f"Processing data for code: {code}, name: {name}")
# 读取 sp500_his_kline_none 表中的数据并按 time_key 降序排列
result_none = fetch_sp500_his_kline_none(connection, code)
if result_none.empty:
logger.warning(f"No data found for code: {code}")
continue
# 处理并插入调整后的 K 线数据
process_and_insert_adjusted_kline(connection, code, name, result_none)
except Exception as e:
logger.error(f"Error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,280 @@
"""
Script Name:
Description: 统计过去十年来hs300 和 sp500成分股的投资胜率年化回报率等数据。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import sys
import time
import numpy as np
from datetime import datetime
import argparse
import config
# 配置日志
config.setup_logging()
# 设置默认值
default_min_stat_years = 5
allowed_min_stat_years = [3, 5] # 允许的年份统计范围
default_debug = False
default_market_key = "hs300"
allowed_market_keys = ['hs300', 'sp500']
# 配置命令行参数
def parse_arguments():
parser = argparse.ArgumentParser(description="Run stock yield statistics.")
# 添加 min_stat_years 参数
parser.add_argument('--market', type=str, choices=allowed_market_keys,
help=f'Set market key for statistics (allowed: {allowed_market_keys}). Default is {default_market_key}.')
# 添加 min_stat_years 参数
parser.add_argument('--min_stat_years', type=int, choices=allowed_min_stat_years,
help=f'Set minimum years for statistics (allowed: {allowed_min_stat_years}). Default is {default_min_stat_years}.')
# 添加 debug 参数
parser.add_argument('--debug', action='store_true', help='Enable debug mode (default: False).')
args = parser.parse_args()
# 如果没有提供 --min_stat_years使用默认值
min_stat_years = args.min_stat_years if args.min_stat_years else default_min_stat_years
debug = args.debug if args.debug else default_debug
market_key = args.market if args.market else default_market_key
return min_stat_years, debug, market_key
# 获取用户输入的参数
min_stat_years, debug, market_key = parse_arguments()
# 传入表名的映射
table_mapping = {
"hs300": {
"codes": "hs300",
"his_data": "hs300_ajust_kline_202410",
"stat_res": f"hs300_{min_stat_years}years_yield_stats_2410"
},
"sp500": {
"codes": "sp500",
"his_data": "sp500_ajust_kline_202410",
"stat_res": f"sp500_{min_stat_years}years_yield_stats_2410"
}
}
# 连接 MySQL
connection = pymysql.connect(**config.db_config)
# 获取股票代码
def get_codes(table_mapping, index_name):
try:
with connection.cursor() as cursor:
if debug:
# 如果 debug 模式开启,查询一条数据
sql = f"SELECT code, code_name FROM {table_mapping[index_name]['codes']} LIMIT 1"
else:
# 否则查询所有数据
sql = f"SELECT code, code_name FROM {table_mapping[index_name]['codes']} "
cursor.execute(sql)
return cursor.fetchall()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while reading {table_mapping[index_name]['codes']} : {e}", exc_info=True)
# 获取历史行情数据
def get_historical_data(table_mapping, index_name, code):
try:
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
sql = f"SELECT * FROM {table_mapping[index_name]['his_data']} WHERE code = %s ORDER BY time_key"
cursor.execute(sql, (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while reading {table_mapping[index_name]['his_data']}: {e}", exc_info=True)
# 插入统计结果
def insert_yield_stats(connection, table_mapping, index_name, code, name, diff_year, max_entry, min_entry, avg_yield, median_yield, win_rate, annual_max_entry, annual_min_entry, annual_avg_yield, annual_median_yield, max_deficit_entry, annual_yield_variance):
try:
with connection.cursor() as cursor:
sql = f"""
INSERT INTO {table_mapping[index_name]['stat_res']}
(code, name, year_diff, max_yield_rate, max_yield_rate_start, max_yield_rate_end,
min_yield_rate, min_yield_rate_start, min_yield_rate_end, avg_yield_rate,
median_yield_rate, win_rate, annual_max_yield_rate, annual_max_yield_rate_start,
annual_max_yield_rate_end, annual_min_yield_rate, annual_min_yield_rate_start,
annual_min_yield_rate_end, annual_avg_yield_rate, annual_median_yield_rate,
max_deficit_days, max_deficit_start, max_deficit_end, annual_yield_variance)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
max_yield_rate = VALUES(max_yield_rate),
max_yield_rate_start = VALUES(max_yield_rate_start),
max_yield_rate_end = VALUES(max_yield_rate_end),
min_yield_rate = VALUES(min_yield_rate),
min_yield_rate_start = VALUES(min_yield_rate_start),
min_yield_rate_end = VALUES(min_yield_rate_end),
avg_yield_rate = VALUES(avg_yield_rate),
median_yield_rate = VALUES(median_yield_rate),
win_rate = VALUES(win_rate),
annual_max_yield_rate = VALUES(annual_max_yield_rate),
annual_max_yield_rate_start = VALUES(annual_max_yield_rate_start),
annual_max_yield_rate_end = VALUES(annual_max_yield_rate_end),
annual_min_yield_rate = VALUES(annual_min_yield_rate),
annual_min_yield_rate_start = VALUES(annual_min_yield_rate_start),
annual_min_yield_rate_end = VALUES(annual_min_yield_rate_end),
annual_avg_yield_rate = VALUES(annual_avg_yield_rate),
annual_median_yield_rate = VALUES(annual_median_yield_rate),
max_deficit_days = VALUES(max_deficit_days),
max_deficit_start = VALUES(max_deficit_start),
max_deficit_end = VALUES(max_deficit_end),
annual_yield_variance = VALUES(annual_yield_variance)
"""
cursor.execute(sql, (
code, name, int(diff_year),
float(max_entry['yield_rate']), max_entry['start_time_key'], max_entry['end_time_key'],
float(min_entry['yield_rate']), min_entry['start_time_key'], min_entry['end_time_key'],
float(avg_yield), float(median_yield), win_rate,
float(annual_max_entry['annual_yield_rate']), annual_max_entry['start_time_key'], annual_max_entry['end_time_key'],
float(annual_min_entry['annual_yield_rate']), annual_min_entry['start_time_key'], annual_min_entry['end_time_key'],
float(annual_avg_yield), float(annual_median_yield),
max_deficit_entry['max_deficit_days'], max_deficit_entry['max_deficit_start'], max_deficit_entry['max_deficit_end'],
annual_yield_variance
))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting yield stats for code {code}: {e}", exc_info=True)
# 计算收益率并计算最长连续亏损
def calculate_yield_rate(data):
results = {}
all_entries = []
num_rows = len(data)
for i in range(num_rows):
for j in range(i + 1, num_rows):
try:
start_time_key = data[i]['time_key']
end_time_key = data[j]['time_key']
time_diff = int((end_time_key - start_time_key).days / 365.0)
if time_diff < min_stat_years:
continue
# 使用后复权数据避免前复权中出现负数0从而影响统计。
close_start = data[i]['hfq_close']
close_end = data[j]['hfq_close']
yield_rate = (close_end / close_start) - 1
annual_yield_rate = yield_rate * 365 / (end_time_key - start_time_key).days
# 找到从 data[i]['close'] 到 data[j]['close'] 之间的最大连续亏损
max_deficit_days = 0
max_deficit_start = start_time_key
max_deficit_end = end_time_key
for k in range(i + 1, j):
if data[k]['hfq_close'] > close_start:
deficit_days = (data[k]['time_key'] - start_time_key).days
max_deficit_days = deficit_days
max_deficit_end = data[k]['time_key']
break
# 如果没有找到符合条件的亏损结束点,则认为 j 是亏损结束点
if max_deficit_days == 0:
max_deficit_days = (end_time_key - start_time_key).days
max_deficit_end = end_time_key
entry = {
'diff_year': time_diff,
'start_time_key': start_time_key,
'end_time_key': end_time_key,
'yield_rate': yield_rate,
'annual_yield_rate': annual_yield_rate,
'max_deficit_days': max_deficit_days,
'max_deficit_start': max_deficit_start,
'max_deficit_end': max_deficit_end
}
all_entries.append(entry)
if time_diff not in results:
results[time_diff] = []
results[time_diff].append(entry)
except ZeroDivisionError:
logging.warning(f"Zero division error for code {data[i]['code']}")
except Exception as e:
logging.error(f"Error occurred while calculating yield rate: {e}", exc_info=True)
# 将全局最大亏损信息加入到汇总部分
results[10000] = all_entries # 汇总
return results
# 统计结果并输出
def compute_statistics(connection, table_mapping, index_name, code, name, results):
for diff_year, entries in results.items():
yield_rates = [entry['yield_rate'] for entry in entries]
annual_yield_rates = [entry['annual_yield_rate'] for entry in entries]
if yield_rates:
max_yield = max(yield_rates)
min_yield = min(yield_rates)
avg_yield = np.mean(yield_rates)
median_yield = np.median(yield_rates)
max_entry = next(entry for entry in entries if entry['yield_rate'] == max_yield)
min_entry = next(entry for entry in entries if entry['yield_rate'] == min_yield)
# 年化收益率统计
annual_max_yield = max(annual_yield_rates)
annual_min_yield = min(annual_yield_rates)
annual_avg_yield = np.mean(annual_yield_rates)
annual_median_yield = np.median(annual_yield_rates)
annual_max_entry = next(entry for entry in entries if entry['annual_yield_rate'] == annual_max_yield)
annual_min_entry = next(entry for entry in entries if entry['annual_yield_rate'] == annual_min_yield)
# 计算 win_rate
win_rate = len([r for r in yield_rates if r > 0]) / len(yield_rates)
# 计算年化收益率方差
annual_yield_variance = np.var(annual_yield_rates)
# 处理最大连续亏损
max_deficit_entry = max(entries, key=lambda x: x['max_deficit_days'])
# 插入数据库
insert_yield_stats(connection, table_mapping, index_name, code, name, diff_year,
max_entry, min_entry, avg_yield, median_yield, win_rate,
annual_max_entry, annual_min_entry, annual_avg_yield, annual_median_yield,
max_deficit_entry, annual_yield_variance)
# 主函数
def main(index_name):
try:
codes = get_codes(table_mapping, index_name)
for code_row in codes:
code, name = code_row[0], code_row[1]
logging.info(f"开始处理 {code} ({name}) 的数据")
data = get_historical_data(table_mapping, index_name, code)
if not data:
logging.warning(f"未找到 {code} 的历史数据")
continue
results = calculate_yield_rate(data)
compute_statistics(connection, table_mapping, index_name, code, name, results)
logging.info(f"完成 {code} 的处理")
time.sleep(1)
except Exception as e:
logging.error(f"处理过程中出现错误: {e}", exc_info=True)
finally:
connection.close()
if __name__ == "__main__":
main(market_key)

View File

@ -0,0 +1,145 @@
import pandas as pd
import pymysql
import backtrader as bt
import config
# 从 MySQL 提取数据
def fetch_data_from_mysql():
connection = pymysql.connect(**config.db_config)
# ['datetime', 'open', 'high', 'low', 'close', 'volume', 'openinterest']]
#query = "SELECT code, time_key, qfq_close as close FROM hs300_ajust_kline_202410 where time_key>='2023-01-01 00:00:00' "
query = "SELECT code, time_key, open, high, low, close, volume FROM hs300_his_kline_none where time_key>='2023-01-01 00:00:00' "
data = pd.read_sql(query, connection)
data['time_key'] = pd.to_datetime(data['time_key'], errors='coerce') # 确保为datetime格式
data['openinterest'] = 0 # 添加 openinterest 列并赋值为 0
connection.close()
return data
# 格式化数据为Backtrader兼容的格式
data = fetch_data_from_mysql()
data_by_code = {code: df for code, df in data.groupby('code')}
class BestPortfolioStrategy(bt.Strategy):
params = dict(
max_stocks=30,
rebalance_monthly=True,
commission=0.0003,
slippage=0.005,
maperiod = 15,
printlog = False
)
def __init__(self):
self.stocks = [] # 持有的股票
self.rebalance_date = None
def next(self):
# 判断是否是月初
if self.rebalance_date and self.data.datetime.date(0) < self.rebalance_date.date():
return
print(f"rebalance date: {self.rebalance_date}")
# 设置下次调仓日
self.rebalance_date = self.data.datetime.date(0).replace(day=1) + pd.DateOffset(months=1)
# 选股逻辑计算过去一段时间的收益率选取最高的30只股票
returns = {}
for data in self.datas:
returns[data._name] = (data.close[0] / data.close[-20]) - 1 # 上个月的收益率
# 按收益率排序并选择最佳组合
sorted_stocks = sorted(returns, key=returns.get, reverse=True)[:self.params.max_stocks]
print(sorted_stocks)
# 调仓:卖出非选中股票,买入新选股票
self.rebalance(sorted_stocks)
def rebalance(self, selected_stocks):
for stock in self.stocks:
if stock not in selected_stocks:
self.close(self.getdatabyname(stock))
for stock in selected_stocks:
if stock not in self.stocks:
self.buy(self.getdatabyname(stock), size=100)
self.stocks = selected_stocks
# 交易记录日志(可省略,默认不输出结果)
def log(self, txt, dt=None, doprint=False):
if self.params.printlog or doprint:
dt = dt or self.datas[0].datetime.date(0)
print(f'{dt.isoformat()},{txt}')
def notify_order(self, order):
# 未被处理的订单
if order.status in [order.Submitted, order.Accepted]:
return
# 已经处理的订单
if order.status in [order.Completed, order.Canceled, order.Margin]:
if order.isbuy():
self.log(
'BUY EXECUTED, ref:%.0fPrice: %.2f, Cost: %.2f, Comm %.2f, Size: %.2f, Stock: %s' %
(order.ref, # 订单编号
order.executed.price, # 成交价
order.executed.value, # 成交额
order.executed.comm, # 佣金
order.executed.size, # 成交量
order.data._name)) # 股票名称
else: # Sell
self.log('SELL EXECUTED, ref:%.0f, Price: %.2f, Cost: %.2f, Comm %.2f, Size: %.2f, Stock: %s' %
(order.ref,
order.executed.price,
order.executed.value,
order.executed.comm,
order.executed.size,
order.data._name))
cerebro = bt.Cerebro()
cerebro.broker.setcash(1_000_000) # 初始资金 100万
cerebro.broker.setcommission(commission=0.0003) # 设置交易费率 万分之三
cerebro.broker.set_slippage_perc(0.005) # 设置滑点 0.005
# 加载数据
for code, df in data_by_code.items():
# 删除缺失值
df = df.dropna(subset=['time_key'])
# 设置DataFrame为Backtrader的数据格式
data_feed = bt.feeds.PandasData(
dataname=df,
datetime='time_key',
openinterest=None # 如果没有openinterest列则为None
)
#bt_data = bt.feeds.PandasData(dataname=df)
cerebro.adddata(data_feed, name=code)
cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='pnl') # 返回收益率时序数据
cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='_AnnualReturn') # 年化收益率
cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='_SharpeRatio') # 夏普比率
cerebro.addanalyzer(bt.analyzers.DrawDown, _name='_DrawDown') # 回撤
# 加载策略
cerebro.addstrategy(BestPortfolioStrategy, printlog=True)
# 运行回测
results = cerebro.run()
# 获取分析结果
results = cerebro.run()
strat = results[0]
# 返回日度收益率序列
daily_return = pd.Series(strat.analyzers.pnl.get_analysis())
# 打印评价指标
print("--------------- AnnualReturn -----------------")
print(strat.analyzers._AnnualReturn.get_analysis())
print("--------------- SharpeRatio -----------------")
print(strat.analyzers._SharpeRatio.get_analysis())
print("--------------- DrawDown -----------------")
print(strat.analyzers._DrawDown.get_analysis())

11
src/static/test_log.py Normal file
View File

@ -0,0 +1,11 @@
import logging
import src.config.config as config
import src.config.log_config as log_conf
log_conf.setup_logging()
def test():
logging.info(f'test logging')
if __name__ == "__main__":
test()

83
src/static/test_quant.py Normal file
View File

@ -0,0 +1,83 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as sco
import yfinance as yf
# 下载资产历史数据
assets = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA'] # 资产代码
data = yf.download(assets, start="2020-01-01", end="2023-01-01")['Adj Close']
print(data)
# 计算每日收益率
returns = data.pct_change().dropna()
print(returns)
# 计算年化预期收益和协方差矩阵
annual_returns = returns.mean() * 252
cov_matrix = returns.cov() * 252
print(annual_returns)
print(cov_matrix)
# 定义组合的预期收益和风险(方差)
def portfolio_performance(weights, mean_returns, cov_matrix):
returns = np.sum(weights * mean_returns)
volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
return returns, volatility
# 定义目标函数:最小化风险(方差)
def minimize_volatility(weights, mean_returns, cov_matrix):
return portfolio_performance(weights, mean_returns, cov_matrix)[1]
# 定义约束条件
def constraint_sum(weights):
return np.sum(weights) - 1
# 初始化权重和边界每个资产的权重在0-1之间
num_assets = len(assets)
bounds = tuple((0, 1) for asset in range(num_assets))
initial_weights = num_assets * [1. / num_assets] # 初始等权重
# 定义约束条件总权重为1
constraints = ({'type': 'eq', 'fun': constraint_sum})
# 优化组合,使得组合的方差最小
opt_result = sco.minimize(minimize_volatility, initial_weights, args=(annual_returns, cov_matrix),
method='SLSQP', bounds=bounds, constraints=constraints)
# 提取最优权重
optimal_weights = opt_result.x
# 计算最优组合的收益和风险
optimal_return, optimal_volatility = portfolio_performance(optimal_weights, annual_returns, cov_matrix)
# 输出最优组合结果
print("最优资产组合权重:")
for i, asset in enumerate(assets):
print(f"{asset}: {optimal_weights[i]:.2%}")
print(f"\n最优组合的年化预期收益: {optimal_return:.2%}")
print(f"最优组合的年化风险(标准差): {optimal_volatility:.2%}")
# 可视化有效前沿
def plot_efficient_frontier(mean_returns, cov_matrix, num_portfolios=10000):
results = np.zeros((3, num_portfolios))
for i in range(num_portfolios):
weights = np.random.random(num_assets)
weights /= np.sum(weights)
portfolio_return, portfolio_volatility = portfolio_performance(weights, mean_returns, cov_matrix)
results[0, i] = portfolio_return
results[1, i] = portfolio_volatility
results[2, i] = results[0, i] / results[1, i] # 计算夏普比率
plt.figure(figsize=(10, 6))
plt.scatter(results[1, :], results[0, :], c=results[2, :], cmap='viridis')
plt.colorbar(label='Sharpe Ratio')
plt.scatter(optimal_volatility, optimal_return, c='red', marker='*', s=200) # 最优组合
plt.title('Efficient Frontier')
plt.xlabel('Volatility (Risk)')
plt.ylabel('Return')
plt.show()
# 绘制有效前沿
plot_efficient_frontier(annual_returns, cov_matrix)