modify scripts
This commit is contained in:
129
shell/mysql-ddl.txt
Normal file
129
shell/mysql-ddl.txt
Normal file
@ -0,0 +1,129 @@
|
||||
CREATE DATABASE `stockdb` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci */ /*!80016 DEFAULT ENCRYPTION='N' */;
|
||||
|
||||
-- stockdb.hs300 definition
|
||||
|
||||
CREATE TABLE `hs300` (
|
||||
`up_date` varchar(100) NOT NULL DEFAULT '',
|
||||
`index_code` varchar(100) NOT NULL DEFAULT '000300',
|
||||
`index_name` varchar(100) NOT NULL DEFAULT '',
|
||||
`index_name_eng` varchar(100) NOT NULL DEFAULT '',
|
||||
`code_inner` varchar(100) NOT NULL DEFAULT '',
|
||||
`code_name` varchar(100) NOT NULL DEFAULT '',
|
||||
`code_name_eng` varchar(100) NOT NULL DEFAULT '',
|
||||
`exchange` varchar(100) NOT NULL DEFAULT '',
|
||||
`exchange_eng` varchar(100) NOT NULL DEFAULT '',
|
||||
`code` varchar(100) NOT NULL DEFAULT '',
|
||||
PRIMARY KEY (`up_date`,`code_inner`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
|
||||
-- stockdb.hs300_3years_yield_stats_2410 definition
|
||||
|
||||
CREATE TABLE `hs300_3years_yield_stats_2410` (
|
||||
`code` varchar(100) NOT NULL,
|
||||
`name` varchar(100) NOT NULL,
|
||||
`year_diff` int NOT NULL,
|
||||
`max_yield_rate` float NOT NULL,
|
||||
`max_yield_rate_start` datetime NOT NULL,
|
||||
`max_yield_rate_end` datetime NOT NULL,
|
||||
`min_yield_rate` float NOT NULL,
|
||||
`min_yield_rate_start` datetime NOT NULL,
|
||||
`min_yield_rate_end` datetime NOT NULL,
|
||||
`avg_yield_rate` float NOT NULL,
|
||||
`median_yield_rate` float NOT NULL,
|
||||
`win_rate` float NOT NULL,
|
||||
`annual_max_yield_rate` float NOT NULL,
|
||||
`annual_max_yield_rate_start` datetime NOT NULL,
|
||||
`annual_max_yield_rate_end` datetime NOT NULL,
|
||||
`annual_min_yield_rate` float NOT NULL,
|
||||
`annual_min_yield_rate_start` datetime NOT NULL,
|
||||
`annual_min_yield_rate_end` datetime NOT NULL,
|
||||
`annual_avg_yield_rate` float NOT NULL,
|
||||
`annual_median_yield_rate` float NOT NULL,
|
||||
`max_deficit_days` int NOT NULL,
|
||||
`max_deficit_start` datetime NOT NULL,
|
||||
`max_deficit_end` datetime NOT NULL,
|
||||
`annual_yield_variance` float NOT NULL,
|
||||
PRIMARY KEY (`code`,`year_diff`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
|
||||
|
||||
-- stockdb.hs300_5years_yield_stats_2410 definition
|
||||
|
||||
CREATE TABLE `hs300_5years_yield_stats_2410` (
|
||||
`code` varchar(100) NOT NULL,
|
||||
`name` varchar(100) NOT NULL,
|
||||
`year_diff` int NOT NULL,
|
||||
`max_yield_rate` float NOT NULL,
|
||||
`max_yield_rate_start` datetime NOT NULL,
|
||||
`max_yield_rate_end` datetime NOT NULL,
|
||||
`min_yield_rate` float NOT NULL,
|
||||
`min_yield_rate_start` datetime NOT NULL,
|
||||
`min_yield_rate_end` datetime NOT NULL,
|
||||
`avg_yield_rate` float NOT NULL,
|
||||
`median_yield_rate` float NOT NULL,
|
||||
`win_rate` float NOT NULL,
|
||||
`annual_max_yield_rate` float NOT NULL,
|
||||
`annual_max_yield_rate_start` datetime NOT NULL,
|
||||
`annual_max_yield_rate_end` datetime NOT NULL,
|
||||
`annual_min_yield_rate` float NOT NULL,
|
||||
`annual_min_yield_rate_start` datetime NOT NULL,
|
||||
`annual_min_yield_rate_end` datetime NOT NULL,
|
||||
`annual_avg_yield_rate` float NOT NULL,
|
||||
`annual_median_yield_rate` float NOT NULL,
|
||||
`max_deficit_days` int NOT NULL,
|
||||
`max_deficit_start` datetime NOT NULL,
|
||||
`max_deficit_end` datetime NOT NULL,
|
||||
`annual_yield_variance` float NOT NULL,
|
||||
PRIMARY KEY (`code`,`year_diff`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
|
||||
-- stockdb.hs300_qfq_his definition
|
||||
|
||||
CREATE TABLE `hs300_qfq_his` (
|
||||
`code` varchar(100) NOT NULL DEFAULT '',
|
||||
`name` varchar(100) NOT NULL DEFAULT '',
|
||||
`time_key` datetime NOT NULL,
|
||||
`open` float NOT NULL,
|
||||
`close` float NOT NULL,
|
||||
`high` float NOT NULL,
|
||||
`low` float NOT NULL,
|
||||
`pe_ratio` float DEFAULT NULL,
|
||||
`turnover_rate` float DEFAULT NULL,
|
||||
`volume` bigint DEFAULT NULL,
|
||||
`turnover` double DEFAULT NULL,
|
||||
`change_rate` float DEFAULT NULL,
|
||||
`last_close` float DEFAULT NULL,
|
||||
PRIMARY KEY (`code`,`time_key`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
|
||||
-- stockdb.plat_list definition
|
||||
|
||||
CREATE TABLE `plat_list` (
|
||||
`up_date` date NOT NULL,
|
||||
`market` varchar(50) NOT NULL,
|
||||
`plat` varchar(50) NOT NULL DEFAULT 'INDUSTRY',
|
||||
`code` varchar(50) NOT NULL,
|
||||
`plate_id` varchar(50) NOT NULL,
|
||||
`plate_name` varchar(255) NOT NULL,
|
||||
PRIMARY KEY (`up_date`,`plat`,`code`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
|
||||
-- stockdb.sp500 definition
|
||||
|
||||
CREATE TABLE `sp500` (
|
||||
`sp_no` int NOT NULL DEFAULT '0',
|
||||
`code_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '',
|
||||
`code_inner` varchar(100) NOT NULL DEFAULT '',
|
||||
`sector` varchar(100) NOT NULL DEFAULT '',
|
||||
`code` varchar(100) NOT NULL DEFAULT '',
|
||||
`up_date` varchar(100) NOT NULL DEFAULT '2024-10-02',
|
||||
PRIMARY KEY (`up_date`,`code_inner`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
|
||||
|
||||
147
shell/stat.sql
Normal file
147
shell/stat.sql
Normal file
@ -0,0 +1,147 @@
|
||||
-- 更新 hs_index 表中的code
|
||||
UPDATE hs_index SET code = CONCAT('SZ.' , code_inner) WHERE exchange_eng LIKE 'Shenzhen%'
|
||||
UPDATE hs_index SET code = CONCAT('SH.' , code_inner) WHERE exchange_eng LIKE 'Shanghai%'
|
||||
|
||||
-- 沪深300独有代码
|
||||
SELECT * FROM hs_index hi WHERE code_inner not in (SELECT code_inner from hs_index hi2 WHERE index_code='000510')
|
||||
-- 上证A510独有代码
|
||||
SELECT * FROM hs_index hi WHERE code_inner not in (SELECT code_inner from hs_index hi2 WHERE index_code='000300')
|
||||
-- 共同的代码
|
||||
SELECT * FROM hs_index hi WHERE index_code='000510' and code_inner in (SELECT code_inner from hs_index hi2 WHERE index_code='000300')
|
||||
|
||||
-- 校验使用复权因子计算出的前复权价格,与直接从接口读取的前复权价格的差异
|
||||
select a.code, a.time_key, a.close, b.qfq_close, a.close - b.qfq_close as diff
|
||||
from stockdb.sp500_qfq_his_202410 a, stockdb.sp500_ajust_kline_202410 b
|
||||
WHERE a.code = b.code and a.time_key = b.time_key
|
||||
|
||||
-- 校验使用复权因子计算出的前复权价格,与直接从接口读取的前复权价格的差异
|
||||
select a.code, a.time_key, a.close, b.qfq_close, a.close - b.qfq_close as diff
|
||||
from stockdb.hs300_qfq_his a, stockdb.hs300_ajust_kline_202410 b
|
||||
WHERE a.code = b.code and a.time_key = b.time_key
|
||||
|
||||
|
||||
-- 统计 year_diff = 10000 的所有记录中, win_rate 在不同区间的分布情况,给出每个区间的 行数,以及占整体的比例
|
||||
WITH filtered_data AS (
|
||||
SELECT win_rate
|
||||
FROM hs300_5years_yield_stats_2410
|
||||
WHERE year_diff = 10000
|
||||
)
|
||||
SELECT
|
||||
CASE
|
||||
WHEN win_rate >= 0.99995 THEN '100%'
|
||||
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%~100%'
|
||||
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
|
||||
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
|
||||
ELSE '20% 以下'
|
||||
END AS win_rate_range,
|
||||
ROUND(COUNT(*) / (SELECT COUNT(*) FROM filtered_data) * 100, 2) AS percentage,
|
||||
COUNT(*) AS count
|
||||
FROM filtered_data
|
||||
GROUP BY win_rate_range
|
||||
ORDER BY
|
||||
CASE win_rate_range
|
||||
WHEN '100%' THEN 1
|
||||
WHEN '90%~100%' THEN 2
|
||||
WHEN '50% ~ 90%' THEN 3
|
||||
WHEN '20% ~ 50%' THEN 4
|
||||
ELSE 5
|
||||
END;
|
||||
;
|
||||
|
||||
-- annual_median_yield_rate 代表了投资的年化回报率的中位数,我们想要统计在 year_diff = 10000 的所有记录中,中位数的分布情况。
|
||||
-- 首先把所有记录按照中位数降序排列,然后把所有记录的条数分成10等份,我们要输出的是 每一个等份下面,中位数的区间,以及等份的记录条数。
|
||||
WITH ranked_data AS (
|
||||
SELECT
|
||||
annual_median_yield_rate,
|
||||
NTILE(10) OVER (ORDER BY annual_median_yield_rate DESC) AS tile_rank
|
||||
FROM sp500_5years_yield_stats_2410
|
||||
WHERE year_diff = 10000
|
||||
)
|
||||
SELECT
|
||||
MAX(annual_median_yield_rate) AS max_yield_rate,
|
||||
MIN(annual_median_yield_rate) AS min_yield_rate,
|
||||
COUNT(*) AS record_count
|
||||
FROM ranked_data
|
||||
GROUP BY tile_rank
|
||||
ORDER BY tile_rank;
|
||||
|
||||
-- max_deficit_days 是买入即亏损的最大时长。我们统计在 year_diff = 10000 的所有记录中,按照之前定义的胜率分布下,每一个胜率分布区间内,max_deficit_days 的最大值,最小值,和平均值。
|
||||
WITH filtered_data AS (
|
||||
SELECT win_rate, max_deficit_days
|
||||
FROM sp500_5years_yield_stats_2410
|
||||
WHERE year_diff = 10000
|
||||
)
|
||||
SELECT
|
||||
CASE
|
||||
WHEN win_rate >= 0.99995 THEN '100%'
|
||||
WHEN win_rate >= 0.9 AND win_rate < 0.99995 THEN '90%~100%'
|
||||
WHEN win_rate >= 0.5 AND win_rate < 0.9 THEN '50% ~ 90%'
|
||||
WHEN win_rate >= 0.2 AND win_rate < 0.5 THEN '20% ~ 50%'
|
||||
ELSE '20% 以下'
|
||||
END AS win_rate_range,
|
||||
MAX(max_deficit_days) AS max_deficit,
|
||||
MIN(max_deficit_days) AS min_deficit,
|
||||
ROUND(AVG(max_deficit_days), 2) AS avg_deficit
|
||||
FROM filtered_data
|
||||
GROUP BY win_rate_range
|
||||
ORDER BY
|
||||
CASE win_rate_range
|
||||
WHEN '100%' THEN 1
|
||||
WHEN '90%~100%' THEN 2
|
||||
WHEN '50% ~ 90%' THEN 3
|
||||
WHEN '20% ~ 50%' THEN 4
|
||||
ELSE 5
|
||||
END;
|
||||
|
||||
|
||||
-- 提取 year_diff = 10000 的所有记录中,win_rate >= 0.99995 的所有记录的 code, name, max_yield_rate, median_yield_rate, annual_max_yield_rate, annual_median_yield_rate, max_deficit_days
|
||||
SELECT
|
||||
code,
|
||||
name,
|
||||
max_yield_rate,
|
||||
median_yield_rate,
|
||||
annual_max_yield_rate,
|
||||
annual_median_yield_rate,
|
||||
max_deficit_days
|
||||
FROM hs300_5years_yield_stats_2410
|
||||
WHERE year_diff = 10000
|
||||
AND win_rate >= 0.99995;
|
||||
|
||||
|
||||
-- 对每个不同的 code,统计不同 year_diff (year_diff != 10000)下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;然后对 year_diff 进行分组统计对应的行数,以及占总行数的比例。
|
||||
WITH best_year_diff_per_code AS (
|
||||
SELECT code,
|
||||
year_diff,
|
||||
annual_median_yield_rate,
|
||||
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
|
||||
FROM sp500_5years_yield_stats_2410
|
||||
WHERE year_diff != 10000
|
||||
)
|
||||
SELECT
|
||||
year_diff,
|
||||
ROUND(COUNT(*) / (SELECT COUNT(*) FROM best_year_diff_per_code WHERE rank_by_yield = 1) * 100, 2) AS percentage,
|
||||
COUNT(*) AS record_count
|
||||
FROM best_year_diff_per_code
|
||||
WHERE rank_by_yield = 1
|
||||
GROUP BY year_diff
|
||||
ORDER BY year_diff;
|
||||
|
||||
|
||||
|
||||
-- 对每个不同的 code,统计不同 year_diff (year_diff != 10000)下的最大 annual_median_yield_rate 对应的 year_diff ,我们就得到了 code , 最优 year_diff 的结果;输出这个结果。
|
||||
WITH best_year_diff_per_code AS (
|
||||
SELECT
|
||||
code,
|
||||
year_diff,
|
||||
annual_median_yield_rate,
|
||||
RANK() OVER (PARTITION BY code ORDER BY annual_median_yield_rate DESC) AS rank_by_yield
|
||||
FROM sp500_5years_yield_stats_2410
|
||||
WHERE year_diff != 10000
|
||||
)
|
||||
SELECT
|
||||
code,
|
||||
year_diff AS best_year_diff,
|
||||
annual_median_yield_rate AS max_annual_median_yield_rate
|
||||
FROM best_year_diff_per_code
|
||||
WHERE rank_by_yield = 1
|
||||
ORDER BY code;
|
||||
Reference in New Issue
Block a user