Update .gitignore and add files.

This commit is contained in:
2024-10-05 16:38:23 +08:00
parent a48dd47ebe
commit 50bbcd7ca5
21 changed files with 9531 additions and 7 deletions

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# 忽略 log 目录
log/
# 忽略 Python 编译文件
*.pyc
# 忽略环境配置文件
.env

View File

@ -0,0 +1,105 @@
import yfinance as yf
import pymysql
import logging
import time
import sys
from datetime import datetime
# 配置日志格式
formatter = logging.Formatter('%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] - %(message)s')
log_filename = f'./log/get_sp500_his.log'
file_handler = logging.FileHandler(log_filename)
file_handler.setFormatter(formatter)
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
logging.basicConfig(level=logging.INFO, handlers=[file_handler, console_handler])
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
return pymysql.connect(
host="172.18.0.2",
user="root",
password="mysqlpw",
database="stockdb",
charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor
)
# 从MySQL读取sp500表中的股票代码和名称
def fetch_sp500_codes():
db = connect_to_db()
with db.cursor() as cursor:
# 全量
cursor.execute("SELECT code_inner, code_name FROM sp500 ")
# 下面这些股票不满10年所以要用 max 参数,而不能用 10y
#cursor.execute("SELECT code_inner, code_name FROM sp500 where code_inner in ('ABNB', 'CARR', 'CEG', 'GEHC', 'GEV', 'HUBB', 'KVUE', 'OTIS', 'PLTR', 'SOLV', 'VLTO') ")
# 失败重跑数据
#cursor.execute("SELECT code_inner, code_name FROM sp500 where code_inner in ('HUBB') ")
codes = cursor.fetchall()
db.close()
return codes
# 插入数据到sp500_his_kline_none表
def insert_stock_data_to_db(data, code, name):
try:
db = connect_to_db()
with db.cursor() as cursor:
insert_query = """
INSERT INTO sp500_his_kline_none (time_key, open, high, low, close, volume, dividends, stock_splits, code, name)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
open = VALUES(open),
high = VALUES(high),
low = VALUES(low),
close = VALUES(close),
volume = VALUES(volume),
dividends = VALUES(dividends),
stock_splits = VALUES(stock_splits)
"""
for index, row in data.iterrows():
time_key = index.strftime('%Y-%m-%d %H:%M:%S')
values = (time_key, row['Open'], row['High'], row['Low'], row['Close'], row['Volume'], row['Dividends'], row['Stock Splits'], code, name)
cursor.execute(insert_query, values)
db.commit()
db.close()
except pymysql.MySQLError as e:
logger.error(f"Error occurred while inserting data: {e}")
# 拉取股票的历史数据
def fetch_and_store_stock_data():
codes = fetch_sp500_codes()
for row in codes:
code_inner = row['code_inner']
code_name = row['code_name']
logger.info(f"Fetching data for {code_name} ({code_inner})...")
try:
stock = yf.Ticker(code_inner)
# 默认获取 十年的数据
hist_data = stock.history(period="10y", auto_adjust=False)
# 有些公司不满10年不能用 10y需要改为 max
#hist_data = stock.history(period="max", auto_adjust=True)
if not hist_data.empty:
logger.info(f"Inserting data for {code_name} ({code_inner})...")
insert_stock_data_to_db(hist_data, code_inner, code_name)
else:
logger.warning(f"No data found for {code_name} ({code_inner})")
# 每次请求完后休眠3秒
time.sleep(3)
except Exception as e:
logger.error(f"Error fetching data for {code_name} ({code_inner}): {e}")
if __name__ == "__main__":
fetch_and_store_stock_data()

View File

@ -0,0 +1,156 @@
import pymysql
import logging
import sys
import time
import config
import argparse
# 配置不同市场的表名管理
tables_mapping = {
'sp500': {
'none_his_kline': 'sp500_his_kline_none',
'hfq_his_kline': 'sp500_hfq_his_202410',
'rehab_table': 'futu_rehab'
},
'hs300': {
'none_his_kline': 'hs300_his_kline_none',
'hfq_his_kline': 'hs300_hfq_his_202410',
'rehab_table': 'futu_rehab'
}
}
# 日志配置
config.setup_logging("./log/stat_hfq_kline.log")
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从指定市场表中读取code和code_name字段
def fetch_codes_from_market_table(market):
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT code, code_name FROM {market} limit 1")
codes = cursor.fetchall()
return codes
finally:
db.close()
# 读取复权因子数据
def fetch_rehab_data(db, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT ex_div_date, backward_adj_factorA, backward_adj_factorB FROM futu_rehab WHERE code = %s ORDER BY ex_div_date DESC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching rehab data for {code}: {e}", exc_info=True)
return []
# 读取不复权的股票价格数据
def fetch_kline_none_data(db, table_name, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT code, time_key, close FROM {table_name} WHERE code = %s ORDER BY time_key ASC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching kline none data for {code}: {e}", exc_info=True)
return []
# 插入后复权价格到 hfq 表
def insert_hfq_data(db, hfq_data, hfq_table):
try:
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {hfq_table} (code, name, time_key, open, close)
VALUES (%s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE close = VALUES(close)
"""
cursor.executemany(insert_query, hfq_data)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error inserting hfq data: {e}", exc_info=True)
# 计算后复权价格
def calculate_hfq_price(market):
db = connect_to_db()
if db is None:
return
# 获取表名
table_names = tables_mapping[market]
none_his_kline_table = table_names['none_his_kline']
hfq_his_kline_table = table_names['hfq_his_kline']
# 获取股票代码
codes = fetch_codes_from_market_table(market)
for row in codes:
code = row['code']
name = row['code_name']
# 如果是 sp500 市场,拼接 'US.' + code
rehab_code = code
if market == 'sp500':
rehab_code = 'US.' + code
logger.info(f"Processing {code} ({name})...")
# 获取复权因子数据
rehab_res = fetch_rehab_data(db, rehab_code)
if not rehab_res:
logger.warning(f"No rehab data found for {code}")
continue
# 获取不复权的价格数据
kline_none = fetch_kline_none_data(db, none_his_kline_table, code)
if not kline_none:
logger.warning(f"No kline none data found for {code}")
continue
hfq_data = []
# 遍历kline_none计算后复权价格
for kline_row in kline_none:
close_price = kline_row['close']
time_key = kline_row['time_key']
# 将 time_key 转换为 date 格式
time_key_date = time_key.date()
# 遍历 rehab_res找到相应的复权因子
for rehab_row in rehab_res:
if rehab_row['ex_div_date'] <= time_key_date:
hfq_close = (close_price * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
close_price = hfq_close
# 保存计算后的复权价格
hfq_data.append((code, name, time_key, 0.0, hfq_close))
# 插入后复权价格数据
insert_hfq_data(db, hfq_data, hfq_his_kline_table)
logger.info(f"Inserted HFQ data for {code} ({name})")
time.sleep(1)
db.close()
if __name__ == "__main__":
# 命令行参数处理
parser = argparse.ArgumentParser(description='Calculate HFQ Prices for Market')
parser.add_argument('--market', type=str, default='hs300', help='Market to process (sp500 or hs300)')
args = parser.parse_args()
# 调用主函数
calculate_hfq_price(args.market)

17
stockapp/config.py Normal file
View File

@ -0,0 +1,17 @@
import logging
# MySQL 配置
db_config = {
'host': '172.18.0.2',
'user': 'root',
'password': 'mysqlpw',
'database': 'stockdb'
}
# 设置日志配置
def setup_logging(log_filename):
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] - %(message)s',
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
])

85
stockapp/get_futu_rehb.py Normal file
View File

@ -0,0 +1,85 @@
import logging
import pymysql
import time
from futu import *
import pandas as pd
import config
# 设置日志
config.setup_logging("./log/futu_rehab.log")
# 连接 MySQL 数据库
def get_mysql_connection():
return pymysql.connect(**config.db_config)
# 获取股票代码列表
def get_stock_codes(table_name):
connection = get_mysql_connection()
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute(f"SELECT code, code_name FROM {table_name} ")
result = cursor.fetchall()
connection.close()
return result
# 插入或更新复权信息
def insert_or_update_rehab_data(connection, rehab_data, code, name):
try:
with connection.cursor() as cursor:
sql = """
INSERT INTO futu_rehab (code, name, ex_div_date, forward_adj_factorA, forward_adj_factorB, backward_adj_factorA, backward_adj_factorB)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
forward_adj_factorA = VALUES(forward_adj_factorA),
forward_adj_factorB = VALUES(forward_adj_factorB),
backward_adj_factorA = VALUES(backward_adj_factorA),
backward_adj_factorB = VALUES(backward_adj_factorB)
"""
for row in rehab_data.itertuples(index=False):
cursor.execute(sql, (code, name, row.ex_div_date, row.forward_adj_factorA, row.forward_adj_factorB, row.backward_adj_factorA, row.backward_adj_factorB))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting or updating rehab data: {e}", exc_info=True)
# 从 Futu API 获取复权信息
def get_rehab_data(code):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_rehab(code)
quote_ctx.close()
if ret == RET_OK:
return data
else:
logging.error(f"Failed to get rehab data for {code}: {data}")
return None
# 主函数
def process_stock_data(table_name, prefix=''):
stocks = get_stock_codes(table_name)
connection = get_mysql_connection()
for stock in stocks:
code = stock['code']
name = stock['code_name']
# 拼接 'US.' 前缀对于 sp500 表中的股票
full_code = f"{prefix}{code}" if prefix else code
logging.info(f"Processing {full_code} ({name})")
# 获取复权数据
rehab_data = get_rehab_data(full_code)
if rehab_data is not None:
# 插入或更新复权数据
insert_or_update_rehab_data(connection, rehab_data, full_code, name)
logging.info(f"Inserted/Updated rehab data for {full_code} ({name})")
time.sleep(3)
connection.close()
if __name__ == "__main__":
# 处理 hs300 表数据,不需要加 'US.' 前缀
process_stock_data("hs300")
# 处理 sp500 表数据,加 'US.' 前缀
process_stock_data("sp500", prefix='US.')

View File

@ -0,0 +1,106 @@
import pymysql
import time
import logging
from futu import *
from datetime import datetime, timedelta
import config
config.setup_logging("./log/get_hs300_his_kline.log")
# 连接 MySQL
connection = pymysql.connect(**config.db_config)
# 复权类型,不复权
# selected_autype = AuType.NONE
# selected_table = "hs300_his_kline_none"
# 复权类型,后复权
selected_autype = AuType.HFQ
selected_table = "hs300_his_kline_hfq"
# 复权类型,默认为 AuType.QFQ ,即前复权
# selected_autype = AuType.QFQ
# selected_table = "hs300_qfq_his"
# 获取当前日期
end_date = datetime.now().strftime('%Y-%m-%d')
# 计算 start_date 为当前日期减去10年再加一天
start_date = (datetime.now() - timedelta(days=365*10-1)).strftime('%Y-%m-%d')
# 定义插入数据的函数
def insert_data(connection, data):
try:
with connection.cursor() as cursor:
for index, row in data.iterrows():
sql = f"""
INSERT INTO {selected_table} (code, name, time_key, open, close, high, low, pe_ratio, turnover_rate, volume, turnover, change_rate, last_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
open = VALUES(open),
close = VALUES(close),
high = VALUES(high),
low = VALUES(low),
pe_ratio = VALUES(pe_ratio),
turnover_rate = VALUES(turnover_rate),
volume = VALUES(volume),
turnover = VALUES(turnover),
change_rate = VALUES(change_rate),
last_close = VALUES(last_close)
"""
cursor.execute(sql, (
row['code'], row['name'], row['time_key'], row['open'], row['close'],
row['high'], row['low'], row['pe_ratio'], row['turnover_rate'],
row['volume'], row['turnover'], row['change_rate'], row['last_close']
))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting data: {e}")
print(f"Error occurred while inserting data: {e}")
# 获取 hs300 表中的所有股票代码
def get_hs300_codes():
with connection.cursor() as cursor:
cursor.execute("SELECT code FROM hs300 ")
return cursor.fetchall()
# 初始化 futu 行情连接
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
hs300_codes = get_hs300_codes()
for code_row in hs300_codes:
code = code_row[0] # 从数据库行中提取 code
# 获取历史 K 线数据,设置分页请求
ret, data, page_req_key = quote_ctx.request_history_kline(code, autype=selected_autype, start=start_date, end=end_date, max_count=500)
if ret == RET_OK:
logging.info(f"成功获取 {code} 的第一页数据,共 {len(data)}")
print(f"成功获取 {code} 的第一页数据,共 {len(data)}")
# 插入第一页数据
insert_data(connection, data)
else:
logging.error(f"获取 {code} 的数据失败: {data}")
print(f"获取 {code} 的数据失败: {data}")
# 分页拉取
while page_req_key is not None:
time.sleep(1) # 休眠 5 秒
ret, data, page_req_key = quote_ctx.request_history_kline(code, autype=selected_autype, start=start_date, end=end_date, max_count=500, page_req_key=page_req_key)
if ret == RET_OK:
logging.info(f"成功获取 {code} 的分页数据,共 {len(data)}")
print(f"成功获取 {code} 的分页数据,共 {len(data)}")
# 插入分页数据
insert_data(connection, data)
else:
logging.error(f"分页数据获取失败: {data}")
print(f"分页数据获取失败: {data}")
# 每次获取完一个股票的数据后,休眠 5 秒
time.sleep(2)
finally:
quote_ctx.close() # 关闭 futu 连接
connection.close() # 关闭 MySQL 连接

View File

@ -46,7 +46,7 @@ def insert_or_update_data(connection, data, market, plat):
# MySQL 插入或更新语句
sql = """
INSERT INTO plat_list (code, plate_name, plate_id, market, plat, up_date)
INSERT INTO futu_plat_list (code, plate_name, plate_id, market, plat, up_date)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
plate_name = VALUES(plate_name),

View File

@ -0,0 +1,110 @@
import yfinance as yf
import pymysql
import logging
import time
import sys
from datetime import datetime
import config # 引入 config.py 中的配置
# 股票代码集合,如果属于这些股票,则使用 "max" 时间段
special_stock_codes = ('ABNB', 'CARR', 'CEG', 'GEHC', 'GEV', 'HUBB', 'KVUE', 'OTIS', 'PLTR', 'SOLV', 'VLTO')
# K线调整选项决定是否使用前复权价格
kline_adjust = False
# 根据 kline_adjust 决定使用的表名
table_name = 'sp500_qfq_his_202410' if kline_adjust else 'sp500_his_kline_none'
# 使用 config.py 中的日志配置
config.setup_logging("./log/get_sp500_his_kline.log")
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
#return pymysql.connect(**config.db_config)
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor # 确保使用字典形式的游标
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从MySQL读取sp500表中的股票代码和名称
def fetch_sp500_codes():
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
cursor.execute("SELECT code_inner, code_name FROM sp500 ")
codes = cursor.fetchall()
return codes
finally:
db.close()
# 插入数据到指定表名
def insert_stock_data_to_db(data, code, name):
try:
db = connect_to_db()
if db is None:
return
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {table_name} (time_key, open, high, low, close, volume, dividends, stock_splits, code, name)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
open = VALUES(open),
high = VALUES(high),
low = VALUES(low),
close = VALUES(close),
volume = VALUES(volume),
dividends = VALUES(dividends),
stock_splits = VALUES(stock_splits)
"""
for index, row in data.iterrows():
time_key = index.strftime('%Y-%m-%d %H:%M:%S')
values = (time_key, row['Open'], row['High'], row['Low'], row['Close'], row['Volume'], row['Dividends'], row['Stock Splits'], code, name)
cursor.execute(insert_query, values)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error occurred while inserting data: {e}", exc_info=True)
finally:
if db:
db.close()
# 拉取股票的历史数据
def fetch_and_store_stock_data():
codes = fetch_sp500_codes()
for row in codes:
code_inner = row['code_inner']
code_name = row['code_name']
logger.info(f"Fetching data for {code_name} ({code_inner})...")
# 判断使用的时间段,特殊股票使用 max其他使用 10y
period = "max" if code_inner in special_stock_codes else "10y"
try:
stock = yf.Ticker(code_inner)
# 拉取股票历史数据,使用 kline_adjust 决定 auto_adjust 是否为 True
hist_data = stock.history(period=period, auto_adjust=kline_adjust)
if not hist_data.empty:
logger.info(f"Inserting data for {code_name} ({code_inner}) into {table_name}...")
insert_stock_data_to_db(hist_data, code_inner, code_name)
else:
logger.warning(f"No data found for {code_name} ({code_inner})")
# 每次请求完后休眠3秒
time.sleep(3)
except Exception as e:
logger.error(f"Error fetching data for {code_name} ({code_inner}): {e}", exc_info=True)
if __name__ == "__main__":
fetch_and_store_stock_data()

File diff suppressed because it is too large Load Diff

2516
stockapp/sample/AAPL.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data, page_req_key = quote_ctx.request_history_kline('HK.09992', start='2021-10-03', end='2021-11-08', max_count=50) # 每页5个请求第一页
ret, data, page_req_key = quote_ctx.request_history_kline('HK.00700', autype=AuType.NONE, start='2021-10-03', end='2021-11-08', max_count=50) # 每页5个请求第一页
if ret == RET_OK:
print(data)
print(data['code'][0]) # 取第一条的股票代码

View File

@ -0,0 +1,12 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_user_security("港股")
if ret == RET_OK:
print(data)
if data.shape[0] > 0: # 如果自选股列表不为空
print(data['code'][0]) # 取第一条的股票代码
print(data['code'].values.tolist()) # 转为 list
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,11 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_rehab("US.AAPL")
if ret == RET_OK:
print(data)
print(data['ex_div_date'][0]) # 取第一条的除权除息日
print(data['ex_div_date'].values.tolist()) # 转为 list
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,15 @@
import yfinance as yf
import pandas as pd
# 获取AAPL的股票数据
stock = yf.Ticker("AAPL")
# 获取过去十年的日K线数据前复权
hist_data = stock.history(period="10y", auto_adjust=False)
# 打印数据前几行
print(hist_data.head())
# 保存到CSV文件
hist_data.to_csv("AAPL_10year_data.csv")

View File

@ -0,0 +1,17 @@
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
# 获取当前日期
end_date = datetime.today().strftime('%Y-%m-%d')
# 获取十年前的日期
start_date = (datetime.today() - timedelta(days=365*10)).strftime('%Y-%m-%d')
# 下载 AAPL 股票数据
data = yf.download('AAPL', start=start_date, end=end_date)
# 将数据保存为 CSV 文件
data.to_csv('AAPL.csv')
print(f"Downloaded AAPL stock data from {start_date} to {end_date} and saved to AAPL.csv")

126
stockapp/shell/stat.sql Normal file
View File

@ -0,0 +1,126 @@
-- 统计 year_diff = 10000 的所有记录中, win_rate 在不同区间的分布情况,给出每个区间的 行数,以及占整体的比例
WITH filtered_data AS (
SELECT win_rate
FROM hs300_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
CASE
WHEN win_rate >= 0.99995 THEN '100%'
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%100%'
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
ELSE '20% 以下'
END AS win_rate_range,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM filtered_data) * 100, 2) AS percentage,
COUNT(*) AS count
FROM filtered_data
GROUP BY win_rate_range
ORDER BY
CASE win_rate_range
WHEN '100%' THEN 1
WHEN '90%100%' THEN 2
WHEN '50% ~ 90%' THEN 3
WHEN '20% ~ 50%' THEN 4
ELSE 5
END;
;
-- annual_median_yield_rate 代表了投资的年化回报率的中位数,我们想要统计在 year_diff = 10000 的所有记录中,中位数的分布情况。
-- 首先把所有记录按照中位数降序排列然后把所有记录的条数分成10等份我们要输出的是 每一个等份下面,中位数的区间,以及等份的记录条数。
WITH ranked_data AS (
SELECT
annual_median_yield_rate,
NTILE(10) OVER (ORDER BY annual_median_yield_rate DESC) AS tile_rank
FROM sp500_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
MAX(annual_median_yield_rate) AS max_yield_rate,
MIN(annual_median_yield_rate) AS min_yield_rate,
COUNT(*) AS record_count
FROM ranked_data
GROUP BY tile_rank
ORDER BY tile_rank;
-- max_deficit_days 是买入即亏损的最大时长。我们统计在 year_diff = 10000 的所有记录中按照之前定义的胜率分布下每一个胜率分布区间内max_deficit_days 的最大值,最小值,和平均值。
WITH filtered_data AS (
SELECT win_rate, max_deficit_days
FROM sp500_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
CASE
WHEN win_rate >= 0.99995 THEN '100%'
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%100%'
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
ELSE '20% 以下'
END AS win_rate_range,
MAX(max_deficit_days) AS max_deficit,
MIN(max_deficit_days) AS min_deficit,
ROUND(AVG(max_deficit_days), 2) AS avg_deficit
FROM filtered_data
GROUP BY win_rate_range
ORDER BY
CASE win_rate_range
WHEN '100%' THEN 1
WHEN '90%100%' THEN 2
WHEN '50% ~ 90%' THEN 3
WHEN '20% ~ 50%' THEN 4
ELSE 5
END;
-- 提取 year_diff = 10000 的所有记录中win_rate >= 0.99995 的所有记录的 code, name, max_yield_rate, median_yield_rate, annual_max_yield_rate, annual_median_yield_rate, max_deficit_days
SELECT
code,
name,
max_yield_rate,
median_yield_rate,
annual_max_yield_rate,
annual_median_yield_rate,
max_deficit_days
FROM hs300_5years_yield_stats_2410
WHERE year_diff = 10000
AND win_rate >= 0.99995;
-- 对每个不同的 code统计不同 year_diff year_diff != 10000下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;然后对 year_diff 进行分组统计对应的行数,以及占总行数的比例。
WITH best_year_diff_per_code AS (
SELECT code,
year_diff,
annual_median_yield_rate,
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
FROM sp500_5years_yield_stats_2410
WHERE year_diff != 10000
)
SELECT
year_diff,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM best_year_diff_per_code WHERE rank_by_yield = 1) * 100, 2) AS percentage,
COUNT(*) AS record_count
FROM best_year_diff_per_code
WHERE rank_by_yield = 1
GROUP BY year_diff
ORDER BY year_diff;
-- 对每个不同的 code统计不同 year_diff year_diff != 10000下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;输出这个结果。
WITH best_year_diff_per_code AS (
SELECT
code,
year_diff,
annual_median_yield_rate,
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
FROM sp500_5years_yield_stats_2410
WHERE year_diff != 10000
)
SELECT
code,
year_diff AS best_year_diff,
annual_median_yield_rate AS max_annual_median_yield_rate
FROM best_year_diff_per_code
WHERE rank_by_yield = 1
ORDER BY code;

View File

@ -0,0 +1,189 @@
import pymysql
import logging
import sys
import time
import config
import argparse
# 配置不同市场的表名管理
tables_mapping = {
'sp500': {
'none_his_kline': 'sp500_his_kline_none',
'adjust_his_kline': 'sp500_ajust_kline_202410',
'rehab_table': 'futu_rehab'
},
'hs300': {
'none_his_kline': 'hs300_his_kline_none',
'adjust_his_kline': 'hs300_ajust_kline_202410',
'rehab_table': 'futu_rehab'
}
}
# 日志配置
config.setup_logging("./log/stat_adjust_kline.log")
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从指定市场表中读取code和code_name字段
def fetch_codes_from_market_table(market, debug=False):
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
query = f"SELECT code, code_name FROM {market}"
if debug:
query += " LIMIT 2"
cursor.execute(query)
codes = cursor.fetchall()
return codes
finally:
db.close()
# 读取复权因子数据
def fetch_rehab_data(db, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT ex_div_date, forward_adj_factorA, forward_adj_factorB, backward_adj_factorA, backward_adj_factorB FROM futu_rehab WHERE code = %s ORDER BY ex_div_date DESC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching rehab data for {code}: {e}", exc_info=True)
return []
# 读取不复权的股票价格数据
def fetch_kline_none_data(db, table_name, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT code, time_key, open, close FROM {table_name} WHERE code = %s ORDER BY time_key ASC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching kline none data for {code}: {e}", exc_info=True)
return []
# 插入前后复权价格到数据库
def insert_hfq_data(db, hfq_data, hfq_table):
try:
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {hfq_table} (code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
hfq_open = VALUES(hfq_open),
hfq_close = VALUES(hfq_close),
qfq_open = VALUES(qfq_open),
qfq_close = VALUES(qfq_close),
none_open = VALUES(none_open),
none_close = VALUES(none_close)
"""
cursor.executemany(insert_query, hfq_data)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error inserting hfq data: {e}", exc_info=True)
# 计算后复权价格和前复权价格
def calculate_hfq_qfq_price(market, debug=False):
db = connect_to_db()
if db is None:
return
# 获取表名
table_names = tables_mapping[market]
none_his_kline_table = table_names['none_his_kline']
adjust_kline_table = table_names['adjust_his_kline']
# 获取股票代码
codes = fetch_codes_from_market_table(market, debug)
for row in codes:
code = row['code']
name = row['code_name']
# 如果是 sp500 市场,拼接 'US.' + code
rehab_code = code
if market == 'sp500':
rehab_code = 'US.' + code
logger.info(f"Processing {code} ({name})...")
# 获取复权因子数据
rehab_res = fetch_rehab_data(db, rehab_code)
if not rehab_res:
logger.warning(f"No rehab data found for {code}")
continue
# 反转复权因子行,为了计算前复权
rehab_res_asc = list(reversed(rehab_res))
# 获取不复权的价格数据
kline_none = fetch_kline_none_data(db, none_his_kline_table, code)
if not kline_none:
logger.warning(f"No kline none data found for {code}")
continue
hfq_data = []
# 遍历kline_none计算前后复权价格
for kline_row in kline_none:
none_open = kline_row['open']
none_close = kline_row['close']
time_key = kline_row['time_key']
# 将 time_key 转换为 date 格式
time_key_date = time_key.date()
# 计算后复权价格
hfq_open = none_open
hfq_close = none_close
tmp_close = none_close
tmp_open = none_open
for rehab_row in rehab_res:
if rehab_row['ex_div_date'] <= time_key_date:
hfq_close = (tmp_close * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
hfq_open = (tmp_open * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
tmp_close = hfq_close
tmp_open = hfq_open
# 计算前复权价格
qfq_close = none_close
qfq_open = none_open
tmp_close = none_close
tmp_open = none_open
zeroFactorB = True if market == 'sp500' else False
for rehab_row in rehab_res_asc:
factorB = 0 if zeroFactorB else rehab_row['forward_adj_factorB']
if rehab_row['ex_div_date'] > time_key_date:
qfq_close = (tmp_close * rehab_row['forward_adj_factorA']) + factorB
qfq_open = (tmp_open * rehab_row['forward_adj_factorA']) + factorB
tmp_close = qfq_close
tmp_open = qfq_open
# 保存计算后的复权价格
hfq_data.append((code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close))
# 插入后复权价格数据
insert_hfq_data(db, hfq_data, adjust_kline_table)
logger.info(f"Inserted HFQ/QFQ data for {code} ({name})")
time.sleep(1)
db.close()
if __name__ == "__main__":
# 命令行参数处理
parser = argparse.ArgumentParser(description='Calculate HFQ and QFQ Prices for Market')
parser.add_argument('--market', type=str, default='hs300', help='Market to process (sp500 or hs300)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
args = parser.parse_args()
# 调用主函数
calculate_hfq_qfq_price(args.market, args.debug)

View File

@ -10,11 +10,17 @@ import argparse
default_min_stat_years = 5
allowed_min_stat_years = [3, 5] # 允许的年份统计范围
default_debug = False
default_market_key = "hs300"
allowed_market_keys = ['hs300', 'sp500']
# 配置命令行参数
def parse_arguments():
parser = argparse.ArgumentParser(description="Run stock yield statistics.")
# 添加 min_stat_years 参数
parser.add_argument('--market', type=str, choices=allowed_market_keys,
help=f'Set market key for statistics (allowed: {allowed_market_keys}). Default is {default_market_key}.')
# 添加 min_stat_years 参数
parser.add_argument('--min_stat_years', type=int, choices=allowed_min_stat_years,
help=f'Set minimum years for statistics (allowed: {allowed_min_stat_years}). Default is {default_min_stat_years}.')
@ -27,18 +33,19 @@ def parse_arguments():
# 如果没有提供 --min_stat_years使用默认值
min_stat_years = args.min_stat_years if args.min_stat_years else default_min_stat_years
debug = args.debug if args.debug else default_debug
market_key = args.market if args.market else default_market_key
return min_stat_years, debug
return min_stat_years, debug, market_key
# 获取用户输入的参数
min_stat_years, debug = parse_arguments()
min_stat_years, debug, market_key = parse_arguments()
# 配置日志格式
formatter = logging.Formatter('%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] - %(message)s')
# 动态生成日志文件名,基于 min_stat_years 的值
log_filename = f'./log/stat_yield_{min_stat_years}years_rate.log'
log_filename = f'./log/stat_yield_{market_key}_{min_stat_years}years_rate.log'
file_handler = logging.FileHandler(log_filename)
file_handler.setFormatter(formatter)
@ -65,7 +72,7 @@ table_mapping = {
},
"sp500": {
"codes": "sp500",
"his_data": "sp500_qfq_his",
"his_data": "sp500_qfq_his_202410",
"stat_res": f"sp500_{min_stat_years}years_yield_stats_2410"
}
}
@ -272,4 +279,4 @@ def main(index_name):
connection.close()
if __name__ == "__main__":
main("hs300")
main(market_key)