-- 更新 hs_index 表中的code UPDATE hs_index SET code = CONCAT('SZ.' , code_inner) WHERE exchange_eng LIKE 'Shenzhen%' UPDATE hs_index SET code = CONCAT('SH.' , code_inner) WHERE exchange_eng LIKE 'Shanghai%' -- 沪深300独有代码 SELECT * FROM hs_index hi WHERE code_inner not in (SELECT code_inner from hs_index hi2 WHERE index_code='000510') -- 上证A510独有代码 SELECT * FROM hs_index hi WHERE code_inner not in (SELECT code_inner from hs_index hi2 WHERE index_code='000300') -- 共同的代码 SELECT * FROM hs_index hi WHERE index_code='000510' and code_inner in (SELECT code_inner from hs_index hi2 WHERE index_code='000300') -- 校验使用复权因子计算出的前复权价格,与直接从接口读取的前复权价格的差异 select a.code, a.time_key, a.close, b.qfq_close, a.close - b.qfq_close as diff from stockdb.sp500_qfq_his_202410 a, stockdb.sp500_ajust_kline_202410 b WHERE a.code = b.code and a.time_key = b.time_key -- 校验使用复权因子计算出的前复权价格,与直接从接口读取的前复权价格的差异 select a.code, a.time_key, a.close, b.qfq_close, a.close - b.qfq_close as diff from stockdb.hs300_qfq_his a, stockdb.hs300_ajust_kline_202410 b WHERE a.code = b.code and a.time_key = b.time_key -- 统计 year_diff = 10000 的所有记录中, win_rate 在不同区间的分布情况,给出每个区间的 行数,以及占整体的比例 WITH filtered_data AS ( SELECT win_rate FROM hs300_5years_yield_stats_2410 WHERE year_diff = 10000 ) SELECT CASE WHEN win_rate >= 0.99995 THEN '100%' WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%~100%' WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%' WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%' ELSE '20% 以下' END AS win_rate_range, ROUND(COUNT(*) / (SELECT COUNT(*) FROM filtered_data) * 100, 2) AS percentage, COUNT(*) AS count FROM filtered_data GROUP BY win_rate_range ORDER BY CASE win_rate_range WHEN '100%' THEN 1 WHEN '90%~100%' THEN 2 WHEN '50% ~ 90%' THEN 3 WHEN '20% ~ 50%' THEN 4 ELSE 5 END; ; -- annual_median_yield_rate 代表了投资的年化回报率的中位数,我们想要统计在 year_diff = 10000 的所有记录中,中位数的分布情况。 -- 首先把所有记录按照中位数降序排列,然后把所有记录的条数分成10等份,我们要输出的是 每一个等份下面,中位数的区间,以及等份的记录条数。 WITH ranked_data AS ( SELECT annual_median_yield_rate, NTILE(10) OVER (ORDER BY annual_median_yield_rate DESC) AS tile_rank FROM sp500_5years_yield_stats_2410 WHERE year_diff = 10000 ) SELECT MAX(annual_median_yield_rate) AS max_yield_rate, MIN(annual_median_yield_rate) AS min_yield_rate, COUNT(*) AS record_count FROM ranked_data GROUP BY tile_rank ORDER BY tile_rank; -- max_deficit_days 是买入即亏损的最大时长。我们统计在 year_diff = 10000 的所有记录中,按照之前定义的胜率分布下,每一个胜率分布区间内,max_deficit_days 的最大值,最小值,和平均值。 WITH filtered_data AS ( SELECT win_rate, max_deficit_days FROM sp500_5years_yield_stats_2410 WHERE year_diff = 10000 ) SELECT CASE WHEN win_rate >= 0.99995 THEN '100%' WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%~100%' WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%' WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%' ELSE '20% 以下' END AS win_rate_range, MAX(max_deficit_days) AS max_deficit, MIN(max_deficit_days) AS min_deficit, ROUND(AVG(max_deficit_days), 2) AS avg_deficit FROM filtered_data GROUP BY win_rate_range ORDER BY CASE win_rate_range WHEN '100%' THEN 1 WHEN '90%~100%' THEN 2 WHEN '50% ~ 90%' THEN 3 WHEN '20% ~ 50%' THEN 4 ELSE 5 END; -- 提取 year_diff = 10000 的所有记录中,win_rate >= 0.99995 的所有记录的 code, name, max_yield_rate, median_yield_rate, annual_max_yield_rate, annual_median_yield_rate, max_deficit_days SELECT code, name, max_yield_rate, median_yield_rate, annual_max_yield_rate, annual_median_yield_rate, max_deficit_days FROM hs300_5years_yield_stats_2410 WHERE year_diff = 10000 AND win_rate >= 0.99995; -- 对每个不同的 code,统计不同 year_diff (year_diff != 10000)下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;然后对 year_diff 进行分组统计对应的行数,以及占总行数的比例。 WITH best_year_diff_per_code AS ( SELECT code, year_diff, annual_median_yield_rate, RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield FROM sp500_5years_yield_stats_2410 WHERE year_diff != 10000 ) SELECT year_diff, ROUND(COUNT(*) / (SELECT COUNT(*) FROM best_year_diff_per_code WHERE rank_by_yield = 1) * 100, 2) AS percentage, COUNT(*) AS record_count FROM best_year_diff_per_code WHERE rank_by_yield = 1 GROUP BY year_diff ORDER BY year_diff; -- 对每个不同的 code,统计不同 year_diff (year_diff != 10000)下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;输出这个结果。 WITH best_year_diff_per_code AS ( SELECT code, year_diff, annual_median_yield_rate, RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield FROM sp500_5years_yield_stats_2410 WHERE year_diff != 10000 ) SELECT code, year_diff AS best_year_diff, annual_median_yield_rate AS max_annual_median_yield_rate FROM best_year_diff_per_code WHERE rank_by_yield = 1 ORDER BY code;