Update .gitignore and add files.

This commit is contained in:
2024-10-05 16:38:23 +08:00
parent a48dd47ebe
commit 50bbcd7ca5
21 changed files with 9531 additions and 7 deletions

126
stockapp/shell/stat.sql Normal file
View File

@ -0,0 +1,126 @@
-- 统计 year_diff = 10000 的所有记录中, win_rate 在不同区间的分布情况,给出每个区间的 行数,以及占整体的比例
WITH filtered_data AS (
SELECT win_rate
FROM hs300_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
CASE
WHEN win_rate >= 0.99995 THEN '100%'
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%100%'
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
ELSE '20% 以下'
END AS win_rate_range,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM filtered_data) * 100, 2) AS percentage,
COUNT(*) AS count
FROM filtered_data
GROUP BY win_rate_range
ORDER BY
CASE win_rate_range
WHEN '100%' THEN 1
WHEN '90%100%' THEN 2
WHEN '50% ~ 90%' THEN 3
WHEN '20% ~ 50%' THEN 4
ELSE 5
END;
;
-- annual_median_yield_rate 代表了投资的年化回报率的中位数,我们想要统计在 year_diff = 10000 的所有记录中,中位数的分布情况。
-- 首先把所有记录按照中位数降序排列然后把所有记录的条数分成10等份我们要输出的是 每一个等份下面,中位数的区间,以及等份的记录条数。
WITH ranked_data AS (
SELECT
annual_median_yield_rate,
NTILE(10) OVER (ORDER BY annual_median_yield_rate DESC) AS tile_rank
FROM sp500_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
MAX(annual_median_yield_rate) AS max_yield_rate,
MIN(annual_median_yield_rate) AS min_yield_rate,
COUNT(*) AS record_count
FROM ranked_data
GROUP BY tile_rank
ORDER BY tile_rank;
-- max_deficit_days 是买入即亏损的最大时长。我们统计在 year_diff = 10000 的所有记录中按照之前定义的胜率分布下每一个胜率分布区间内max_deficit_days 的最大值,最小值,和平均值。
WITH filtered_data AS (
SELECT win_rate, max_deficit_days
FROM sp500_5years_yield_stats_2410
WHERE year_diff = 10000
)
SELECT
CASE
WHEN win_rate >= 0.99995 THEN '100%'
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%100%'
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
ELSE '20% 以下'
END AS win_rate_range,
MAX(max_deficit_days) AS max_deficit,
MIN(max_deficit_days) AS min_deficit,
ROUND(AVG(max_deficit_days), 2) AS avg_deficit
FROM filtered_data
GROUP BY win_rate_range
ORDER BY
CASE win_rate_range
WHEN '100%' THEN 1
WHEN '90%100%' THEN 2
WHEN '50% ~ 90%' THEN 3
WHEN '20% ~ 50%' THEN 4
ELSE 5
END;
-- 提取 year_diff = 10000 的所有记录中win_rate >= 0.99995 的所有记录的 code, name, max_yield_rate, median_yield_rate, annual_max_yield_rate, annual_median_yield_rate, max_deficit_days
SELECT
code,
name,
max_yield_rate,
median_yield_rate,
annual_max_yield_rate,
annual_median_yield_rate,
max_deficit_days
FROM hs300_5years_yield_stats_2410
WHERE year_diff = 10000
AND win_rate >= 0.99995;
-- 对每个不同的 code统计不同 year_diff year_diff != 10000下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;然后对 year_diff 进行分组统计对应的行数,以及占总行数的比例。
WITH best_year_diff_per_code AS (
SELECT code,
year_diff,
annual_median_yield_rate,
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
FROM sp500_5years_yield_stats_2410
WHERE year_diff != 10000
)
SELECT
year_diff,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM best_year_diff_per_code WHERE rank_by_yield = 1) * 100, 2) AS percentage,
COUNT(*) AS record_count
FROM best_year_diff_per_code
WHERE rank_by_yield = 1
GROUP BY year_diff
ORDER BY year_diff;
-- 对每个不同的 code统计不同 year_diff year_diff != 10000下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;输出这个结果。
WITH best_year_diff_per_code AS (
SELECT
code,
year_diff,
annual_median_yield_rate,
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
FROM sp500_5years_yield_stats_2410
WHERE year_diff != 10000
)
SELECT
code,
year_diff AS best_year_diff,
annual_median_yield_rate AS max_annual_median_yield_rate
FROM best_year_diff_per_code
WHERE rank_by_yield = 1
ORDER BY code;