Files
stock/shell/stat.sql
2025-03-17 11:08:13 +08:00

147 lines
5.8 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- 更新 hs_index 表中的code
UPDATE hs_index SET code = CONCAT('SZ.' , code_inner) WHERE exchange_eng LIKE 'Shenzhen%'
UPDATE hs_index SET code = CONCAT('SH.' , code_inner) WHERE exchange_eng LIKE 'Shanghai%'
-- 沪深300独有代码
SELECT * FROM hs_index hi WHERE code_inner not in (SELECT code_inner from hs_index hi2 WHERE index_code='000510')
-- 上证A510独有代码
SELECT * FROM hs_index hi WHERE code_inner not in (SELECT code_inner from hs_index hi2 WHERE index_code='000300')
-- 共同的代码
SELECT * FROM hs_index hi WHERE index_code='000510' and code_inner in (SELECT code_inner from hs_index hi2 WHERE index_code='000300')
-- 校验使用复权因子计算出的前复权价格,与直接从接口读取的前复权价格的差异
select a.code, a.time_key, a.close, b.qfq_close, a.close - b.qfq_close as diff
from stockdb.sp500_qfq_his_202410 a, stockdb.sp500_ajust_kline_202410 b
WHERE a.code = b.code and a.time_key = b.time_key
-- 校验使用复权因子计算出的前复权价格,与直接从接口读取的前复权价格的差异
select a.code, a.time_key, a.close, b.qfq_close, a.close - b.qfq_close as diff
from stockdb.hs300_qfq_his a, stockdb.hs300_ajust_kline_202410 b
WHERE a.code = b.code and a.time_key = b.time_key
-- 统计 year_diff = 10000 的所有记录中, win_rate 在不同区间的分布情况,给出每个区间的 行数,以及占整体的比例
WITH filtered_data AS (
SELECT win_rate
FROM hs300_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
CASE
WHEN win_rate >= 0.99995 THEN '100%'
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%100%'
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
ELSE '20% 以下'
END AS win_rate_range,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM filtered_data) * 100, 2) AS percentage,
COUNT(*) AS count
FROM filtered_data
GROUP BY win_rate_range
ORDER BY
CASE win_rate_range
WHEN '100%' THEN 1
WHEN '90%100%' THEN 2
WHEN '50% ~ 90%' THEN 3
WHEN '20% ~ 50%' THEN 4
ELSE 5
END;
;
-- annual_median_yield_rate 代表了投资的年化回报率的中位数,我们想要统计在 year_diff = 10000 的所有记录中,中位数的分布情况。
-- 首先把所有记录按照中位数降序排列然后把所有记录的条数分成10等份我们要输出的是 每一个等份下面,中位数的区间,以及等份的记录条数。
WITH ranked_data AS (
SELECT
annual_median_yield_rate,
NTILE(10) OVER (ORDER BY annual_median_yield_rate DESC) AS tile_rank
FROM sp500_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
MAX(annual_median_yield_rate) AS max_yield_rate,
MIN(annual_median_yield_rate) AS min_yield_rate,
COUNT(*) AS record_count
FROM ranked_data
GROUP BY tile_rank
ORDER BY tile_rank;
-- max_deficit_days 是买入即亏损的最大时长。我们统计在 year_diff = 10000 的所有记录中按照之前定义的胜率分布下每一个胜率分布区间内max_deficit_days 的最大值,最小值,和平均值。
WITH filtered_data AS (
SELECT win_rate, max_deficit_days
FROM sp500_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
CASE
WHEN win_rate >= 0.99995 THEN '100%'
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%100%'
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
ELSE '20% 以下'
END AS win_rate_range,
MAX(max_deficit_days) AS max_deficit,
MIN(max_deficit_days) AS min_deficit,
ROUND(AVG(max_deficit_days), 2) AS avg_deficit
FROM filtered_data
GROUP BY win_rate_range
ORDER BY
CASE win_rate_range
WHEN '100%' THEN 1
WHEN '90%100%' THEN 2
WHEN '50% ~ 90%' THEN 3
WHEN '20% ~ 50%' THEN 4
ELSE 5
END;
-- 提取 year_diff = 10000 的所有记录中win_rate >= 0.99995 的所有记录的 code, name, max_yield_rate, median_yield_rate, annual_max_yield_rate, annual_median_yield_rate, max_deficit_days
SELECT
code,
name,
max_yield_rate,
median_yield_rate,
annual_max_yield_rate,
annual_median_yield_rate,
max_deficit_days
FROM hs300_5years_yield_stats_2410
WHERE year_diff = 10000
AND win_rate >= 0.99995;
-- 对每个不同的 code统计不同 year_diff year_diff != 10000下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;然后对 year_diff 进行分组统计对应的行数,以及占总行数的比例。
WITH best_year_diff_per_code AS (
SELECT code,
year_diff,
annual_median_yield_rate,
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
FROM sp500_5years_yield_stats_2410
WHERE year_diff != 10000
)
SELECT
year_diff,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM best_year_diff_per_code WHERE rank_by_yield = 1) * 100, 2) AS percentage,
COUNT(*) AS record_count
FROM best_year_diff_per_code
WHERE rank_by_yield = 1
GROUP BY year_diff
ORDER BY year_diff;
-- 对每个不同的 code统计不同 year_diff year_diff != 10000下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;输出这个结果。
WITH best_year_diff_per_code AS (
SELECT
code,
year_diff,
annual_median_yield_rate,
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
FROM sp500_5years_yield_stats_2410
WHERE year_diff != 10000
)
SELECT
code,
year_diff AS best_year_diff,
annual_median_yield_rate AS max_annual_median_yield_rate
FROM best_year_diff_per_code
WHERE rank_by_yield = 1
ORDER BY code;