modify scripts

This commit is contained in:
oscarz
2025-03-17 11:08:13 +08:00
parent e6327fbe73
commit f43cd53159
177 changed files with 5 additions and 178173 deletions

116
src/bak_stat_growth_rate.py Normal file
View File

@ -0,0 +1,116 @@
"""
Script Name:
Description: 统计hs300的成分股在区间内的涨幅。取前复权值
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import pandas as pd
import time
from datetime import datetime
import logging
import config
# 设置日志
config.setup_logging()
logger = logging.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**config.db_config)
# 获取 2024-09-23 对应的 close 值
def get_close_for_date(df, date):
filtered = df[df['time_key'] == date]
if not filtered.empty:
return filtered.iloc[0]['close']
else:
logger.warning(f"No data found for date: {date}")
return None
# 获取年内涨幅的 c1, c3 值(最早和最晚的 close 值)
def get_first_last_close(df):
df_sorted = df.sort_values(by='time_key')
c1 = df_sorted.iloc[0]['close'] # 最早的 close 值
c3 = df_sorted.iloc[-1]['close'] # 最晚的 close 值
return c1, c3
# 获取最大值和最小值的 close 值
def get_max_min_close(df):
max_close = df['close'].max()
min_close = df['close'].min()
return max_close, min_close
# 主函数
def main():
try:
connection = connect_to_db()
query = """
SELECT code, name, time_key, close
FROM hs300_qfq_his
WHERE time_key >= '2021-01-01 00:00:00'
"""
df = pd.read_sql(query, connection)
# 确定要查询的日期
target_date = '2024-09-23 00:00:00'
df['time_key'] = pd.to_datetime(df['time_key'])
results = []
for code, group in df.groupby('code'):
logger.info(f"Processing code: {code}")
# 获取 c1最早的 close和 c3最晚的 close
c1, c3 = get_first_last_close(group)
# 获取 c22024-09-23 的 close 值)
c2 = get_close_for_date(group, target_date)
if c1 is None or c2 is None or c3 is None:
logger.warning(f"Skipping code {code} due to missing close values.")
continue
# 计算年内涨幅和自2024-09-23以来的涨幅
year_growth_rate = (c3 / c1 - 1) if c1 else None
growth_since_2024_09_23 = (c3 / c2 - 1) if c2 else None
# 获取年内的最大和最小 close 值
c4, c5 = get_max_min_close(group)
year_volatility = (c4 / c5 - 1) if c4 and c5 else None
results.append({
'code': code,
'name': group['name'].iloc[0],
'year_growth_rate': year_growth_rate,
'growth_since_2024_09_23': growth_since_2024_09_23,
'year_volatility': year_volatility
})
time.sleep(1)
# 将结果转换为 DataFrame 并显示
result_df = pd.DataFrame(results)
print(result_df)
# 你可以选择将结果保存到 CSV 文件中
result_df.to_csv('./result/stat_grouth_rate_since2021.csv', index=False)
except Exception as e:
logger.error(f"Error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

60
src/config.py Normal file
View File

@ -0,0 +1,60 @@
import logging
import os
import inspect
from datetime import datetime
from pathlib import Path
# MySQL 配置
db_config = {
'host': 'testdb',
'user': 'root',
'password': 'mysqlpw',
'database': 'stockdb'
}
log_dir_prefix = '../log'
global_share_data_dir = '/root/sharedata'
global_stock_data_dir = '/root/hostdir/stock_data'
# 获取log目录
def get_log_directory():
"""
获取项目根目录下的 log 目录路径。如果 log 目录不存在,则自动创建。
"""
# 获取当前文件所在目录
current_dir = Path(__file__).resolve().parent
# 找到项目根目录,假设项目根目录下有一个 log 文件夹
project_root = current_dir
while project_root.name != 'src' and project_root != project_root.parent:
project_root = project_root.parent
project_root = project_root.parent # 回到项目根目录
# 确保 log 目录存在
log_dir = project_root / 'log'
log_dir.mkdir(parents=True, exist_ok=True)
return log_dir
def get_caller_filename():
# 获取调用 setup_logging 的脚本文件名
caller_frame = inspect.stack()[2]
caller_filename = os.path.splitext(os.path.basename(caller_frame.filename))[0]
return caller_filename
# 设置日志配置
def setup_logging(log_filename=None):
# 如果未传入 log_filename则使用当前脚本名称作为日志文件名
if log_filename is None:
caller_filename = get_caller_filename()
common_log_dir = get_log_directory()
current_date = datetime.now().strftime('%Y%m%d')
# 拼接 log 文件名,将日期加在扩展名前
log_filename = f'{common_log_dir}/{caller_filename}_{current_date}.log'
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] (%(funcName)s) - %(message)s',
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
])

0
src/crawling/__init__.py Normal file
View File

360
src/crawling/fund_etf_em.py Normal file
View File

@ -0,0 +1,360 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2023/1/4 12:18
Desc: 东方财富-ETF 行情
https://quote.eastmoney.com/sh513500.html
"""
from functools import lru_cache
import pandas as pd
import requests
def fund_etf_spot_em() -> pd.DataFrame:
"""
东方财富-ETF 实时行情
https://quote.eastmoney.com/center/gridlist.html#fund_etf
:return: ETF 实时行情
:rtype: pandas.DataFrame
"""
url = "http://88.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "2000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"wbp2u": "|0|0|0|web",
"fid": "f3",
"fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
"fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152",
"_": "1672806290972",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df.rename(
columns={
"f12": "代码",
"f14": "名称",
"f2": "最新价",
"f3": "涨跌幅",
"f4": "涨跌额",
"f5": "成交量",
"f6": "成交额",
"f17": "开盘价",
"f15": "最高价",
"f16": "最低价",
"f18": "昨收",
"f8": "换手率",
"f21": "流通市值",
"f20": "总市值",
},
inplace=True,
)
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"开盘价",
"最高价",
"最低价",
"昨收",
"换手率",
"流通市值",
"总市值",
]
]
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce")
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce")
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
temp_df["开盘价"] = pd.to_numeric(temp_df["开盘价"], errors="coerce")
temp_df["最高价"] = pd.to_numeric(temp_df["最高价"], errors="coerce")
temp_df["最低价"] = pd.to_numeric(temp_df["最低价"], errors="coerce")
temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce")
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce")
temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce")
temp_df["总市值"] = pd.to_numeric(temp_df["总市值"], errors="coerce")
return temp_df
@lru_cache()
def _fund_etf_code_id_map_em() -> dict:
"""
东方财富-ETF 代码和市场标识映射
https://quote.eastmoney.com/center/gridlist.html#fund_etf
:return: ETF 代码和市场标识映射
:rtype: pandas.DataFrame
"""
url = "http://88.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "5000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"wbp2u": "|0|0|0|web",
"fid": "f3",
"fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
"fields": "f12,f13",
"_": "1672806290972",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_dict = dict(zip(temp_df["f12"], temp_df["f13"]))
return temp_dict
def fund_etf_hist_em(
symbol: str = "159707",
period: str = "daily",
start_date: str = "19700101",
end_date: str = "20500101",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富-ETF 行情
https://quote.eastmoney.com/sz159707.html
:param symbol: ETF 代码
:type symbol: str
:param period: choice of {'daily', 'weekly', 'monthly'}
:type period: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"}
:type adjust: str
:return: 每日行情
:rtype: pandas.DataFrame
"""
code_id_dict = _fund_etf_code_id_map_em()
adjust_dict = {"qfq": "1", "hfq": "2", "": "0"}
period_dict = {"daily": "101", "weekly": "102", "monthly": "103"}
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period_dict[period],
"fqt": adjust_dict[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": start_date,
"end": end_date,
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
if not (data_json["data"] and data_json["data"]["klines"]):
return pd.DataFrame()
temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]["klines"]])
temp_df.columns = [
"日期",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["日期"])
temp_df.reset_index(inplace=True, drop=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
return temp_df
def fund_etf_hist_min_em(
symbol: str = "159707",
start_date: str = "1979-09-01 09:32:00",
end_date: str = "2222-01-01 09:32:00",
period: str = "5",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富-ETF 行情
https://quote.eastmoney.com/sz159707.html
:param symbol: ETF 代码
:type symbol: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param period: choice of {'1', '5', '15', '30', '60'}
:type period: str
:param adjust: choice of {'', 'qfq', 'hfq'}
:type adjust: str
:return: 每日分时行情
:rtype: pandas.DataFrame
"""
code_id_dict = _fund_etf_code_id_map_em()
adjust_map = {
"": "0",
"qfq": "1",
"hfq": "2",
}
if period == "1":
url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"ndays": "5",
"iscr": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
else:
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period,
"fqt": adjust_map[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": "0",
"end": "20500000",
"_": "1630930917857",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
temp_df = temp_df[
[
"时间",
"开盘",
"收盘",
"最高",
"最低",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
]
]
return temp_df
if __name__ == "__main__":
fund_etf_spot_em_df = fund_etf_spot_em()
print(fund_etf_spot_em_df)
fund_etf_hist_hfq_em_df = fund_etf_hist_em(
symbol="513500",
period="daily",
start_date="20000101",
end_date="20230201",
adjust="hfq",
)
print(fund_etf_hist_hfq_em_df)
fund_etf_hist_qfq_em_df = fund_etf_hist_em(
symbol="513500",
period="daily",
start_date="20000101",
end_date="20230201",
adjust="qfq",
)
print(fund_etf_hist_qfq_em_df)
fund_etf_hist_em_df = fund_etf_hist_em(
symbol="513500",
period="daily",
start_date="20000101",
end_date="20230201",
adjust="",
)
print(fund_etf_hist_em_df)
fund_etf_hist_min_em_df = fund_etf_hist_min_em(
symbol="513500",
period="5",
adjust="hfq",
start_date="2023-01-01 09:32:00",
end_date="2023-01-04 14:40:00",
)
print(fund_etf_hist_min_em_df)

139
src/crawling/stock_cpbd.py Normal file
View File

@ -0,0 +1,139 @@
# -*- coding:utf-8 -*-
# !/usr/bin/env python
import pandas as pd
import requests
import instock.core.tablestructure as tbs
__author__ = 'myh '
__date__ = '2023/5/7 '
def stock_cpbd_em(symbol: str = "688041") -> pd.DataFrame:
"""
东方财富网-个股-操盘必读
https://emweb.securities.eastmoney.com/PC_HSF10/OperationsRequired/Index?type=web&code=SH688041#
:param symbol: 带市场标识的股票代码
:type symbol: str
:return: 操盘必读
:rtype: pandas.DataFrame
"""
url = "https://emweb.securities.eastmoney.com/PC_HSF10/OperationsRequired/PageAjax"
if symbol.startswith("6"):
symbol = f"SH{symbol}"
else:
symbol = f"SZ{symbol}"
params = {"code": symbol}
r = requests.get(url, params=params)
data_json = r.json()
zxzb = data_json["zxzb"] # 主要指标
if len(zxzb) < 1:
return None
data_dict = zxzb[0]
zxzbOther = data_json["zxzbOther"] # 其它指标,计算出来
if len(zxzbOther) > 0:
zxzbOther = zxzbOther[0]
data_dict = {**data_dict, **zxzbOther}
# zxzbhq = data_json["zxzbhq"] # 其它指标,计算出来
# if len(zxzbhq) > 0:
# data_dict = {**data_dict, **zxzbhq}
_ssbks = data_json["ssbk"] # 所属板块
ssbk = None
for s in _ssbks:
_v = s.get('BOARD_NAME')
if _v is not None:
if ssbk is None:
ssbk = f"{_v}"
else:
ssbk = f"{ssbk}{_v}"
data_dict["BOARD_NAME"] = ssbk
gdrs = data_json["gdrs"] # 股东分析
if len(gdrs) > 0:
gdrs = gdrs[0]
data_dict = {**data_dict, **gdrs}
lhbd = data_json["lhbd"] # 龙虎榜单
if len(lhbd) > 0:
lhbd = lhbd[0]
lhbd["LHBD_DATE"] = lhbd.pop("TRADE_DATE")
data_dict = {**data_dict, **lhbd}
dzjy = data_json["dzjy"] # 大宗交易
if len(dzjy) > 0:
dzjy = dzjy[0]
dzjy["DZJY_DATE"] = dzjy.pop("TRADE_DATE")
data_dict = {**data_dict, **dzjy}
rzrq = data_json["rzrq"] # 融资融券
if len(rzrq) > 0:
rzrq = rzrq[0]
rzrq["RZRQ_DATE"] = rzrq.pop("TRADE_DATE")
data_dict = {**data_dict, **rzrq}
tbs.CN_STOCK_CPBD
# temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], errors="coerce").dt.date
# temp_df["每股收益"] = pd.to_numeric(temp_df["每股收益"], errors="coerce")
# temp_df["每股净资产"] = pd.to_numeric(temp_df["每股净资产"], errors="coerce")
# temp_df["每股经营现金流"] = pd.to_numeric(temp_df["每股经营现金流"], errors="coerce")
# temp_df["每股公积金"] = pd.to_numeric(temp_df["每股公积金"], errors="coerce")
# temp_df["每股未分配利润"] = pd.to_numeric(temp_df["每股未分配利润"], errors="coerce")
# temp_df["加权净资产收益率"] = pd.to_numeric(temp_df["加权净资产收益率"], errors="coerce")
# temp_df["毛利率"] = pd.to_numeric(temp_df["毛利率"], errors="coerce")
# temp_df["资产负债率"] = pd.to_numeric(temp_df["资产负债率"], errors="coerce")
# temp_df["营业收入"] = pd.to_numeric(temp_df["营业收入"], errors="coerce")
# temp_df["营业收入滚动环比增长"] = pd.to_numeric(temp_df["营业收入同比增长"], errors="coerce")
# temp_df["营业收入同比增长"] = pd.to_numeric(temp_df["营业收入同比增长"], errors="coerce")
# temp_df["归属净利润"] = pd.to_numeric(temp_df["归属净利润"], errors="coerce")
# temp_df["归属净利润滚动环比增长"] = pd.to_numeric(temp_df["归属净利润滚动环比增长"], errors="coerce")
# temp_df["归属净利润同比增长"] = pd.to_numeric(temp_df["归属净利润同比增长"], errors="coerce")
# temp_df["扣非净利润"] = pd.to_numeric(temp_df["归属净利润"], errors="coerce")
# temp_df["扣非净利润滚动环比增长"] = pd.to_numeric(temp_df["扣非净利润滚动环比增长"], errors="coerce")
# temp_df["扣非净利润同比增长"] = pd.to_numeric(temp_df["扣非净利润同比增长"], errors="coerce")
# temp_df["总股本"] = pd.to_numeric(temp_df["总股本"], errors="coerce")
# temp_df["已流通股份"] = pd.to_numeric(temp_df["已流通股份"], errors="coerce")
def stock_zjlx_em(symbol: str = "688041") -> pd.DataFrame:
"""
东方财富网-个股-资金流向
https://data.eastmoney.com/zjlx/688041.html
:param symbol: 带市场标识的股票代码
:type symbol: str
:return: 操盘必读
:rtype: pandas.DataFrame
"""
url = "https://push2his.eastmoney.com/api/qt/stock/fflow/daykline/get"
if symbol.startswith("6"):
symbol = f"1.{symbol}"
else:
symbol = f"0.{symbol}"
params = {
"lmt": "0",
"klt": "1",
"fields1": "f1,f2,f3,f7",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65",
"ut": "b2884a393a59ad64002292a3e90d46a5",
"secid": symbol
}
r = requests.get(url, params=params)
data_json = r.json()
klines = data_json["klines"] # 主要指标
"日期","主力净流入额","小单净流入额","中单净流入额","大单净流入额","超大单净流入额","主力净流入占比", "小单净流入占比", "中单净流入占比", "大单净流入占比", "超大单净流入占比"
"收盘价","涨跌幅"
if len(klines) < 1:
return None
if __name__ == "__main__":
stock_cpbd_em_df = stock_cpbd_em(symbol="688041")
print(stock_cpbd_em_df)

View File

@ -0,0 +1,535 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/5/16 15:31
Desc: 东方财富网-数据中心-大宗交易-市场统计
http://data.eastmoney.com/dzjy/dzjy_sctj.aspx
"""
import pandas as pd
import requests
def stock_dzjy_sctj() -> pd.DataFrame:
"""
东方财富网-数据中心-大宗交易-市场统计
http://data.eastmoney.com/dzjy/dzjy_sctj.aspx
:return: 市场统计表
:rtype: pandas.DataFrame
"""
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
'sortColumns': 'TRADE_DATE',
'sortTypes': '-1',
'pageSize': '500',
'pageNumber': '1',
'reportName': 'PRT_BLOCKTRADE_MARKET_STA',
'columns': 'TRADE_DATE,SZ_INDEX,SZ_CHANGE_RATE,BLOCKTRADE_DEAL_AMT,PREMIUM_DEAL_AMT,PREMIUM_RATIO,DISCOUNT_DEAL_AMT,DISCOUNT_RATIO',
'source': 'WEB',
'client': 'WEB',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = int(data_json['result']["pages"])
big_df = pd.DataFrame()
for page in range(1, total_page+1):
params.update({'pageNumber': page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json['result']["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df['index'] = big_df['index'] + 1
big_df.columns = [
"序号",
"交易日期",
"上证指数",
"上证指数涨跌幅",
"大宗交易成交总额",
"溢价成交总额",
"溢价成交总额占比",
"折价成交总额",
"折价成交总额占比",
]
big_df["交易日期"] = pd.to_datetime(big_df["交易日期"]).dt.date
big_df["上证指数"] = pd.to_numeric(big_df["上证指数"])
big_df["上证指数涨跌幅"] = pd.to_numeric(big_df["上证指数涨跌幅"])
big_df["大宗交易成交总额"] = pd.to_numeric(big_df["大宗交易成交总额"])
big_df["溢价成交总额"] = pd.to_numeric(big_df["溢价成交总额"])
big_df["溢价成交总额占比"] = pd.to_numeric(big_df["溢价成交总额占比"])
big_df["折价成交总额"] = pd.to_numeric(big_df["折价成交总额"])
big_df["折价成交总额占比"] = pd.to_numeric(big_df["折价成交总额占比"])
return big_df
def stock_dzjy_mrmx(symbol: str = '基金', start_date: str = '20220104', end_date: str = '20220104') -> pd.DataFrame:
"""
东方财富网-数据中心-大宗交易-每日明细
http://data.eastmoney.com/dzjy/dzjy_mrmxa.aspx
:param symbol: choice of {'A股', 'B股', '基金', '债券'}
:type symbol: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:return: 每日明细
:rtype: pandas.DataFrame
"""
symbol_map = {
'A股': '1',
'B股': '2',
'基金': '3',
'债券': '4',
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
'sortColumns': 'SECURITY_CODE',
'sortTypes': '1',
'pageSize': '5000',
'pageNumber': '1',
'reportName': 'RPT_DATA_BLOCKTRADE',
'columns': 'TRADE_DATE,SECURITY_CODE,SECUCODE,SECURITY_NAME_ABBR,CHANGE_RATE,CLOSE_PRICE,DEAL_PRICE,PREMIUM_RATIO,DEAL_VOLUME,DEAL_AMT,TURNOVER_RATE,BUYER_NAME,SELLER_NAME,CHANGE_RATE_1DAYS,CHANGE_RATE_5DAYS,CHANGE_RATE_10DAYS,CHANGE_RATE_20DAYS,BUYER_CODE,SELLER_CODE',
'source': 'WEB',
'client': 'WEB',
'filter': f"""(SECURITY_TYPE_WEB={symbol_map[symbol]})(TRADE_DATE>='{'-'.join([start_date[:4], start_date[4:6], start_date[6:]])}')(TRADE_DATE<='{'-'.join([end_date[:4], end_date[4:6], end_date[6:]])}')"""
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json['result']["data"]:
return pd.DataFrame()
temp_df = pd.DataFrame(data_json['result']["data"])
temp_df.reset_index(inplace=True)
temp_df['index'] = temp_df.index + 1
if symbol in {'A股'}:
temp_df.columns = [
"序号",
"交易日期",
"证券代码",
"-",
"证券简称",
"涨跌幅",
"收盘价",
"成交价",
"折溢率",
"成交量",
"成交额",
"成交额/流通市值",
"买方营业部",
"卖方营业部",
"_",
"_",
"_",
"_",
"_",
"_",
]
temp_df["交易日期"] = pd.to_datetime(temp_df["交易日期"]).dt.date
temp_df = temp_df[[
"序号",
"交易日期",
"证券代码",
"证券简称",
"涨跌幅",
"收盘价",
"成交价",
"折溢率",
"成交量",
"成交额",
"成交额/流通市值",
"买方营业部",
"卖方营业部",
]]
temp_df['涨跌幅'] = pd.to_numeric(temp_df['涨跌幅'])
temp_df['收盘价'] = pd.to_numeric(temp_df['收盘价'])
temp_df['成交价'] = pd.to_numeric(temp_df['成交价'])
temp_df['折溢率'] = pd.to_numeric(temp_df['折溢率'])
temp_df['成交量'] = pd.to_numeric(temp_df['成交量'])
temp_df['成交额'] = pd.to_numeric(temp_df['成交额'])
temp_df['成交额/流通市值'] = pd.to_numeric(temp_df['成交额/流通市值'])
if symbol in {'B股', '基金', '债券'}:
temp_df.columns = [
"序号",
"交易日期",
"证券代码",
"-",
"证券简称",
"-",
"-",
"成交价",
"-",
"成交量",
"成交额",
"-",
"买方营业部",
"卖方营业部",
"_",
"_",
"_",
"_",
"_",
"_",
]
temp_df["交易日期"] = pd.to_datetime(temp_df["交易日期"]).dt.date
temp_df = temp_df[[
"序号",
"交易日期",
"证券代码",
"证券简称",
"成交价",
"成交量",
"成交额",
"买方营业部",
"卖方营业部",
]]
temp_df['成交价'] = pd.to_numeric(temp_df['成交价'])
temp_df['成交量'] = pd.to_numeric(temp_df['成交量'])
temp_df['成交额'] = pd.to_numeric(temp_df['成交额'])
return temp_df
def stock_dzjy_mrtj(start_date: str = '20220105', end_date: str = '20220105') -> pd.DataFrame:
"""
东方财富网-数据中心-大宗交易-每日统计
http://data.eastmoney.com/dzjy/dzjy_mrtj.aspx
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:return: 每日统计
:rtype: pandas.DataFrame
"""
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
'sortColumns': 'TURNOVERRATE',
'sortTypes': '-1',
'pageSize': '5000',
'pageNumber': '1',
'reportName': 'RPT_BLOCKTRADE_STA',
'columns': 'TRADE_DATE,SECURITY_CODE,SECUCODE,SECURITY_NAME_ABBR,CHANGE_RATE,CLOSE_PRICE,AVERAGE_PRICE,PREMIUM_RATIO,DEAL_NUM,VOLUME,DEAL_AMT,TURNOVERRATE,D1_CLOSE_ADJCHRATE,D5_CLOSE_ADJCHRATE,D10_CLOSE_ADJCHRATE,D20_CLOSE_ADJCHRATE',
'source': 'WEB',
'client': 'WEB',
'filter': f"(TRADE_DATE>='{'-'.join([start_date[:4], start_date[4:6], start_date[6:]])}')(TRADE_DATE<='{'-'.join([end_date[:4], end_date[4:6], end_date[6:]])}')"
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json['result']["data"])
temp_df.reset_index(inplace=True)
temp_df['index'] = temp_df.index + 1
temp_df.columns = [
"序号",
"交易日期",
"证券代码",
"-",
"证券简称",
"涨跌幅",
"收盘价",
"成交价",
"折溢率",
"成交笔数",
"成交总量",
"成交总额",
"成交总额/流通市值",
"_",
"_",
"_",
"_",
]
temp_df["交易日期"] = pd.to_datetime(temp_df["交易日期"]).dt.date
temp_df = temp_df[[
"序号",
"交易日期",
"证券代码",
"证券简称",
"收盘价",
"涨跌幅",
"成交价",
"折溢率",
"成交笔数",
"成交总量",
"成交总额",
"成交总额/流通市值",
]]
temp_df['涨跌幅'] = pd.to_numeric(temp_df['涨跌幅'])
temp_df['收盘价'] = pd.to_numeric(temp_df['收盘价'])
temp_df['成交价'] = pd.to_numeric(temp_df['成交价'])
temp_df['折溢率'] = pd.to_numeric(temp_df['折溢率'])
temp_df['成交笔数'] = pd.to_numeric(temp_df['成交笔数'])
temp_df['成交总量'] = pd.to_numeric(temp_df['成交总量'])
temp_df['成交总额'] = pd.to_numeric(temp_df['成交总额'])
temp_df['成交总额/流通市值'] = pd.to_numeric(temp_df['成交总额/流通市值'])
return temp_df
def stock_dzjy_hygtj(symbol: str = '近三月') -> pd.DataFrame:
"""
东方财富网-数据中心-大宗交易-活跃 A 股统计
http://data.eastmoney.com/dzjy/dzjy_hygtj.aspx
:param symbol: choice of {'近一月', '近三月', '近六月', '近一年'}
:type symbol: str
:return: 活跃 A 股统计
:rtype: pandas.DataFrame
"""
period_map = {
'近一月': '1',
'近三月': '3',
'近六月': '6',
'近一年': '12',
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
'sortColumns': 'DEAL_NUM,SECURITY_CODE',
'sortTypes': '-1,-1',
'pageSize': '5000',
'pageNumber': '1',
'reportName': 'RPT_BLOCKTRADE_ACSTA',
'columns': 'SECURITY_CODE,SECUCODE,SECURITY_NAME_ABBR,CLOSE_PRICE,CHANGE_RATE,TRADE_DATE,DEAL_AMT,PREMIUM_RATIO,SUM_TURNOVERRATE,DEAL_NUM,PREMIUM_TIMES,DISCOUNT_TIMES,D1_AVG_ADJCHRATE,D5_AVG_ADJCHRATE,D10_AVG_ADJCHRATE,D20_AVG_ADJCHRATE,DATE_TYPE_CODE',
'source': 'WEB',
'client': 'WEB',
'filter': f'(DATE_TYPE_CODE={period_map[symbol]})',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json['result']["pages"]
big_df = pd.DataFrame()
for page in range(1, int(total_page)+1):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json['result']["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df['index'] = big_df.index + 1
big_df.columns = [
"序号",
"证券代码",
"_",
"证券简称",
"最新价",
"涨跌幅",
"最近上榜日",
"总成交额",
"折溢率",
"成交总额/流通市值",
"上榜次数-总计",
"上榜次数-溢价",
"上榜次数-折价",
"上榜日后平均涨跌幅-1日",
"上榜日后平均涨跌幅-5日",
"上榜日后平均涨跌幅-10日",
"上榜日后平均涨跌幅-20日",
"_",
]
big_df = big_df[[
"序号",
"证券代码",
"证券简称",
"最新价",
"涨跌幅",
"最近上榜日",
"上榜次数-总计",
"上榜次数-溢价",
"上榜次数-折价",
"总成交额",
"折溢率",
"成交总额/流通市值",
"上榜日后平均涨跌幅-1日",
"上榜日后平均涨跌幅-5日",
"上榜日后平均涨跌幅-10日",
"上榜日后平均涨跌幅-20日",
]]
big_df["最近上榜日"] = pd.to_datetime(big_df["最近上榜日"]).dt.date
big_df["最新价"] = pd.to_numeric(big_df["最新价"])
big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"])
big_df["上榜次数-总计"] = pd.to_numeric(big_df["上榜次数-总计"])
big_df["上榜次数-溢价"] = pd.to_numeric(big_df["上榜次数-溢价"])
big_df["上榜次数-折价"] = pd.to_numeric(big_df["上榜次数-折价"])
big_df["总成交额"] = pd.to_numeric(big_df["总成交额"])
big_df["折溢率"] = pd.to_numeric(big_df["折溢率"])
big_df["成交总额/流通市值"] = pd.to_numeric(big_df["成交总额/流通市值"])
big_df["上榜日后平均涨跌幅-1日"] = pd.to_numeric(big_df["上榜日后平均涨跌幅-1日"])
big_df["上榜日后平均涨跌幅-5日"] = pd.to_numeric(big_df["上榜日后平均涨跌幅-5日"])
big_df["上榜日后平均涨跌幅-10日"] = pd.to_numeric(big_df["上榜日后平均涨跌幅-10日"])
big_df["上榜日后平均涨跌幅-20日"] = pd.to_numeric(big_df["上榜日后平均涨跌幅-20日"])
return big_df
def stock_dzjy_hyyybtj(symbol: str = '近3日') -> pd.DataFrame:
"""
东方财富网-数据中心-大宗交易-活跃营业部统计
https://data.eastmoney.com/dzjy/dzjy_hyyybtj.html
:param symbol: choice of {'当前交易日', '近3日', '近5日', '近10日', '近30日'}
:type symbol: str
:return: 活跃营业部统计
:rtype: pandas.DataFrame
"""
period_map = {
'当前交易日': '1',
'近3日': '3',
'近5日': '5',
'近10日': '10',
'近30日': '30',
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
'sortColumns': 'BUYER_NUM,TOTAL_BUYAMT',
'sortTypes': '-1,-1',
'pageSize': '5000',
'pageNumber': '1',
'reportName': 'RPT_BLOCKTRADE_OPERATEDEPTSTATISTICS',
'columns': 'OPERATEDEPT_CODE,OPERATEDEPT_NAME,ONLIST_DATE,STOCK_DETAILS,BUYER_NUM,SELLER_NUM,TOTAL_BUYAMT,TOTAL_SELLAMT,TOTAL_NETAMT,N_DATE',
'source': 'WEB',
'client': 'WEB',
'filter': f'(N_DATE=-{period_map[symbol]})',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json['result']["pages"]
big_df = pd.DataFrame()
for page in range(1, int(total_page)+1):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json['result']["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df['index'] = big_df.index + 1
big_df.columns = [
"序号",
"_",
"营业部名称",
"最近上榜日",
"买入的股票",
"次数总计-买入",
"次数总计-卖出",
"成交金额统计-买入",
"成交金额统计-卖出",
"成交金额统计-净买入额",
"_",
]
big_df = big_df[[
"序号",
"最近上榜日",
"营业部名称",
"次数总计-买入",
"次数总计-卖出",
"成交金额统计-买入",
"成交金额统计-卖出",
"成交金额统计-净买入额",
"买入的股票",
]]
big_df["最近上榜日"] = pd.to_datetime(big_df["最近上榜日"]).dt.date
big_df["次数总计-买入"] = pd.to_numeric(big_df["次数总计-买入"])
big_df["次数总计-卖出"] = pd.to_numeric(big_df["次数总计-卖出"])
big_df["成交金额统计-买入"] = pd.to_numeric(big_df["成交金额统计-买入"])
big_df["成交金额统计-卖出"] = pd.to_numeric(big_df["成交金额统计-卖出"])
big_df["成交金额统计-净买入额"] = pd.to_numeric(big_df["成交金额统计-净买入额"])
return big_df
def stock_dzjy_yybph(symbol: str = '近三月') -> pd.DataFrame:
"""
东方财富网-数据中心-大宗交易-营业部排行
http://data.eastmoney.com/dzjy/dzjy_yybph.aspx
:param symbol: choice of {'近一月', '近三月', '近六月', '近一年'}
:type symbol: str
:return: 营业部排行
:rtype: pandas.DataFrame
"""
period_map = {
'近一月': '30',
'近三月': '90',
'近六月': '120',
'近一年': '360',
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
'sortColumns': 'D5_BUYER_NUM,D1_AVERAGE_INCREASE',
'sortTypes': '-1,-1',
'pageSize': '5000',
'pageNumber': '1',
'reportName': 'RPT_BLOCKTRADE_OPERATEDEPT_RANK',
'columns': 'OPERATEDEPT_CODE,OPERATEDEPT_NAME,D1_BUYER_NUM,D1_AVERAGE_INCREASE,D1_RISE_PROBABILITY,D5_BUYER_NUM,D5_AVERAGE_INCREASE,D5_RISE_PROBABILITY,D10_BUYER_NUM,D10_AVERAGE_INCREASE,D10_RISE_PROBABILITY,D20_BUYER_NUM,D20_AVERAGE_INCREASE,D20_RISE_PROBABILITY,N_DATE,RELATED_ORG_CODE',
'source': 'WEB',
'client': 'WEB',
'filter': f'(N_DATE=-{period_map[symbol]})',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json['result']["pages"]
big_df = pd.DataFrame()
for page in range(1, int(total_page)+1):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json['result']["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df['index'] = big_df.index + 1
big_df.columns = [
"序号",
"_",
"营业部名称",
"上榜后1天-买入次数",
"上榜后1天-平均涨幅",
"上榜后1天-上涨概率",
"上榜后5天-买入次数",
"上榜后5天-平均涨幅",
"上榜后5天-上涨概率",
"上榜后10天-买入次数",
"上榜后10天-平均涨幅",
"上榜后10天-上涨概率",
"上榜后20天-买入次数",
"上榜后20天-平均涨幅",
"上榜后20天-上涨概率",
"_",
"_",
]
big_df = big_df[[
"序号",
"营业部名称",
"上榜后1天-买入次数",
"上榜后1天-平均涨幅",
"上榜后1天-上涨概率",
"上榜后5天-买入次数",
"上榜后5天-平均涨幅",
"上榜后5天-上涨概率",
"上榜后10天-买入次数",
"上榜后10天-平均涨幅",
"上榜后10天-上涨概率",
"上榜后20天-买入次数",
"上榜后20天-平均涨幅",
"上榜后20天-上涨概率",
]]
big_df['上榜后1天-买入次数'] = pd.to_numeric(big_df['上榜后1天-买入次数'])
big_df['上榜后1天-平均涨幅'] = pd.to_numeric(big_df['上榜后1天-平均涨幅'])
big_df['上榜后1天-上涨概率'] = pd.to_numeric(big_df['上榜后1天-上涨概率'])
big_df['上榜后5天-买入次数'] = pd.to_numeric(big_df['上榜后5天-买入次数'])
big_df['上榜后5天-平均涨幅'] = pd.to_numeric(big_df['上榜后5天-平均涨幅'])
big_df['上榜后5天-上涨概率'] = pd.to_numeric(big_df['上榜后5天-上涨概率'])
big_df['上榜后10天-买入次数'] = pd.to_numeric(big_df['上榜后10天-买入次数'])
big_df['上榜后10天-平均涨幅'] = pd.to_numeric(big_df['上榜后10天-平均涨幅'])
big_df['上榜后10天-上涨概率'] = pd.to_numeric(big_df['上榜后10天-上涨概率'])
big_df['上榜后20天-买入次数'] = pd.to_numeric(big_df['上榜后20天-买入次数'])
big_df['上榜后20天-平均涨幅'] = pd.to_numeric(big_df['上榜后20天-平均涨幅'])
big_df['上榜后20天-上涨概率'] = pd.to_numeric(big_df['上榜后20天-上涨概率'])
return big_df
if __name__ == "__main__":
stock_dzjy_sctj_df = stock_dzjy_sctj()
print(stock_dzjy_sctj_df)
stock_dzjy_mrmx_df = stock_dzjy_mrmx(symbol='债券', start_date='20201204', end_date='20201204')
print(stock_dzjy_mrmx_df)
stock_dzjy_mrtj_df = stock_dzjy_mrtj(start_date='20201204', end_date='20201204')
print(stock_dzjy_mrtj_df)
stock_dzjy_hygtj_df = stock_dzjy_hygtj(symbol='近三月')
print(stock_dzjy_hygtj_df)
stock_dzjy_hyyybtj_df = stock_dzjy_hyyybtj(symbol='近3日')
print(stock_dzjy_hyyybtj_df)
stock_dzjy_yybph_df = stock_dzjy_yybph(symbol='近三月')
print(stock_dzjy_yybph_df)

View File

@ -0,0 +1,126 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2023/4/7 15:22
Desc: 东方财富网-数据中心-年报季报-分红送配
https://data.eastmoney.com/yjfp/
"""
import pandas as pd
import requests
from tqdm import tqdm
__author__ = 'myh '
__date__ = '2023/6/27 '
def stock_fhps_em(date: str = "20210630") -> pd.DataFrame:
"""
东方财富网-数据中心-年报季报-分红送配
https://data.eastmoney.com/yjfp/
:param date: 分红送配报告期
:type date: str
:return: 分红送配
:rtype: pandas.DataFrame
"""
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "PLAN_NOTICE_DATE",
"sortTypes": "-1",
"pageSize": "500",
"pageNumber": "1",
"reportName": "RPT_SHAREBONUS_DET",
"columns": "ALL",
"quoteColumns": "",
"js": '{"data":(x),"pages":(tp)}',
"source": "WEB",
"client": "WEB",
"filter": f"""(REPORT_DATE='{"-".join([date[:4], date[4:6], date[6:]])}')""",
}
r = requests.get(url, params=params)
data_json = r.json()
total_pages = int(data_json["result"]["pages"])
big_df = pd.DataFrame()
for page in tqdm(range(1, total_pages + 1), leave=False):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.columns = [
"_",
"名称",
"_",
"_",
"代码",
"送转股份-送转总比例",
"送转股份-送转比例",
"送转股份-转股比例",
"现金分红-现金分红比例",
"预案公告日",
"股权登记日",
"除权除息日",
"_",
"方案进度",
"_",
"最新公告日期",
"_",
"_",
"_",
"每股收益",
"每股净资产",
"每股公积金",
"每股未分配利润",
"净利润同比增长",
"总股本",
"_",
"现金分红-股息率",
"-",
"-",
"-",
]
big_df = big_df[
[
"代码",
"名称",
"送转股份-送转总比例",
"送转股份-送转比例",
"送转股份-转股比例",
"现金分红-现金分红比例",
"现金分红-股息率",
"每股收益",
"每股净资产",
"每股公积金",
"每股未分配利润",
"净利润同比增长",
"总股本",
"预案公告日",
"股权登记日",
"除权除息日",
"方案进度",
"最新公告日期",
]
]
big_df["送转股份-送转总比例"] = pd.to_numeric(big_df["送转股份-送转总比例"])
big_df["送转股份-送转比例"] = pd.to_numeric(big_df["送转股份-送转比例"])
big_df["送转股份-转股比例"] = pd.to_numeric(big_df["送转股份-转股比例"])
big_df["现金分红-现金分红比例"] = pd.to_numeric(big_df["现金分红-现金分红比例"])
big_df["现金分红-股息率"] = pd.to_numeric(big_df["现金分红-股息率"])
big_df["每股收益"] = pd.to_numeric(big_df["每股收益"])
big_df["每股净资产"] = pd.to_numeric(big_df["每股净资产"])
big_df["每股公积金"] = pd.to_numeric(big_df["每股公积金"])
big_df["每股未分配利润"] = pd.to_numeric(big_df["每股未分配利润"])
big_df["净利润同比增长"] = pd.to_numeric(big_df["净利润同比增长"])
big_df["总股本"] = pd.to_numeric(big_df["总股本"])
big_df["预案公告日"] = pd.to_datetime(big_df["预案公告日"], errors="coerce").dt.date
big_df["股权登记日"] = pd.to_datetime(big_df["股权登记日"], errors="coerce").dt.date
big_df["除权除息日"] = pd.to_datetime(big_df["除权除息日"], errors="coerce").dt.date
big_df["最新公告日期"] = pd.to_datetime(big_df["最新公告日期"], errors="coerce").dt.date
return big_df
if __name__ == "__main__":
stock_fhps_em_df = stock_fhps_em(date="20221231")
print(stock_fhps_em_df)

View File

@ -0,0 +1,419 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2023/5/16 15:30
Desc: 东方财富网-数据中心-资金流向
https://data.eastmoney.com/zjlx/detail.html
"""
import json
import time
from functools import lru_cache
import pandas as pd
import requests
__author__ = 'myh '
__date__ = '2023/6/12 '
def stock_individual_fund_flow_rank(indicator: str = "5日") -> pd.DataFrame:
"""
东方财富网-数据中心-资金流向-排名
https://data.eastmoney.com/zjlx/detail.html
:param indicator: choice of {"今日", "3日", "5日", "10日"}
:type indicator: str
:return: 指定 indicator 资金流向排行
:rtype: pandas.DataFrame
"""
indicator_map = {
"今日": [
"f62",
"f12,f14,f2,f3,f62,f184,f66,f69,f72,f75,f78,f81,f84,f87,f204,f205,f124",
],
"3日": [
"f267",
"f12,f14,f2,f127,f267,f268,f269,f270,f271,f272,f273,f274,f275,f276,f257,f258,f124",
],
"5日": [
"f164",
"f12,f14,f2,f109,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f257,f258,f124",
],
"10日": [
"f174",
"f12,f14,f2,f160,f174,f175,f176,f177,f178,f179,f180,f181,f182,f183,f260,f261,f124",
],
}
url = "http://push2.eastmoney.com/api/qt/clist/get"
params = {
"fid": indicator_map[indicator][0],
"po": "1",
"pz": "10000",
"pn": "1",
"np": "1",
"fltt": "2",
"invt": "2",
"ut": "b2884a393a59ad64002292a3e90d46a5",
"fs": "m:0+t:6+f:!2,m:0+t:13+f:!2,m:0+t:80+f:!2,m:1+t:2+f:!2,m:1+t:23+f:!2,m:0+t:7+f:!2,m:1+t:3+f:!2",
"fields": indicator_map[indicator][1],
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["data"]["diff"])
if indicator == "今日":
temp_df.columns = [
"最新价",
"今日涨跌幅",
"代码",
"名称",
"今日主力净流入-净额",
"今日超大单净流入-净额",
"今日超大单净流入-净占比",
"今日大单净流入-净额",
"今日大单净流入-净占比",
"今日中单净流入-净额",
"今日中单净流入-净占比",
"今日小单净流入-净额",
"今日小单净流入-净占比",
"_",
"今日主力净流入-净占比",
"_",
"_",
"_",
]
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"今日涨跌幅",
"今日主力净流入-净额",
"今日主力净流入-净占比",
"今日超大单净流入-净额",
"今日超大单净流入-净占比",
"今日大单净流入-净额",
"今日大单净流入-净占比",
"今日中单净流入-净额",
"今日中单净流入-净占比",
"今日小单净流入-净额",
"今日小单净流入-净占比",
]
]
elif indicator == "3日":
temp_df.columns = [
"最新价",
"代码",
"名称",
"_",
"3日涨跌幅",
"_",
"_",
"_",
"3日主力净流入-净额",
"3日主力净流入-净占比",
"3日超大单净流入-净额",
"3日超大单净流入-净占比",
"3日大单净流入-净额",
"3日大单净流入-净占比",
"3日中单净流入-净额",
"3日中单净流入-净占比",
"3日小单净流入-净额",
"3日小单净流入-净占比",
]
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"3日涨跌幅",
"3日主力净流入-净额",
"3日主力净流入-净占比",
"3日超大单净流入-净额",
"3日超大单净流入-净占比",
"3日大单净流入-净额",
"3日大单净流入-净占比",
"3日中单净流入-净额",
"3日中单净流入-净占比",
"3日小单净流入-净额",
"3日小单净流入-净占比",
]
]
elif indicator == "5日":
temp_df.columns = [
"最新价",
"代码",
"名称",
"5日涨跌幅",
"_",
"5日主力净流入-净额",
"5日主力净流入-净占比",
"5日超大单净流入-净额",
"5日超大单净流入-净占比",
"5日大单净流入-净额",
"5日大单净流入-净占比",
"5日中单净流入-净额",
"5日中单净流入-净占比",
"5日小单净流入-净额",
"5日小单净流入-净占比",
"_",
"_",
"_",
]
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"5日涨跌幅",
"5日主力净流入-净额",
"5日主力净流入-净占比",
"5日超大单净流入-净额",
"5日超大单净流入-净占比",
"5日大单净流入-净额",
"5日大单净流入-净占比",
"5日中单净流入-净额",
"5日中单净流入-净占比",
"5日小单净流入-净额",
"5日小单净流入-净占比",
]
]
elif indicator == "10日":
temp_df.columns = [
"最新价",
"代码",
"名称",
"_",
"10日涨跌幅",
"10日主力净流入-净额",
"10日主力净流入-净占比",
"10日超大单净流入-净额",
"10日超大单净流入-净占比",
"10日大单净流入-净额",
"10日大单净流入-净占比",
"10日中单净流入-净额",
"10日中单净流入-净占比",
"10日小单净流入-净额",
"10日小单净流入-净占比",
"_",
"_",
"_",
]
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"10日涨跌幅",
"10日主力净流入-净额",
"10日主力净流入-净占比",
"10日超大单净流入-净额",
"10日超大单净流入-净占比",
"10日大单净流入-净额",
"10日大单净流入-净占比",
"10日中单净流入-净额",
"10日中单净流入-净占比",
"10日小单净流入-净额",
"10日小单净流入-净占比",
]
]
return temp_df
def stock_sector_fund_flow_rank(
indicator: str = "10日", sector_type: str = "行业资金流"
) -> pd.DataFrame:
"""
东方财富网-数据中心-资金流向-板块资金流-排名
https://data.eastmoney.com/bkzj/hy.html
:param indicator: choice of {"今日", "5日", "10日"}
:type indicator: str
:param sector_type: choice of {"行业资金流", "概念资金流", "地域资金流"}
:type sector_type: str
:return: 指定参数的资金流排名数据
:rtype: pandas.DataFrame
"""
sector_type_map = {"行业资金流": "2", "概念资金流": "3", "地域资金流": "1"}
indicator_map = {
"今日": [
"f62",
"1",
"f12,f14,f2,f3,f62,f184,f66,f69,f72,f75,f78,f81,f84,f87,f204,f205,f124",
],
"5日": [
"f164",
"5",
"f12,f14,f2,f109,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f257,f258,f124",
],
"10日": [
"f174",
"10",
"f12,f14,f2,f160,f174,f175,f176,f177,f178,f179,f180,f181,f182,f183,f260,f261,f124",
],
}
url = "http://push2.eastmoney.com/api/qt/clist/get"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
}
params = {
"pn": "1",
"pz": "5000",
"po": "1",
"np": "1",
"ut": "b2884a393a59ad64002292a3e90d46a5",
"fltt": "2",
"invt": "2",
"fid0": indicator_map[indicator][0],
"fs": f"m:90 t:{sector_type_map[sector_type]}",
"stat": indicator_map[indicator][1],
"fields": indicator_map[indicator][2],
"rt": "52975239",
"cb": "jQuery18308357908311220152_1589256588824",
"_": int(time.time() * 1000),
}
r = requests.get(url, params=params, headers=headers)
text_data = r.text
json_data = json.loads(text_data[text_data.find("{") : -2])
temp_df = pd.DataFrame(json_data["data"]["diff"])
if indicator == "今日":
temp_df.columns = [
"-",
"今日涨跌幅",
"_",
"名称",
"今日主力净流入-净额",
"今日超大单净流入-净额",
"今日超大单净流入-净占比",
"今日大单净流入-净额",
"今日大单净流入-净占比",
"今日中单净流入-净额",
"今日中单净流入-净占比",
"今日小单净流入-净额",
"今日小单净流入-净占比",
"-",
"今日主力净流入-净占比",
"今日主力净流入最大股",
"今日主力净流入最大股代码",
"是否净流入",
]
temp_df = temp_df[
[
"名称",
"今日涨跌幅",
"今日主力净流入-净额",
"今日主力净流入-净占比",
"今日超大单净流入-净额",
"今日超大单净流入-净占比",
"今日大单净流入-净额",
"今日大单净流入-净占比",
"今日中单净流入-净额",
"今日中单净流入-净占比",
"今日小单净流入-净额",
"今日小单净流入-净占比",
"今日主力净流入最大股",
]
]
elif indicator == "5日":
temp_df.columns = [
"-",
"_",
"名称",
"5日涨跌幅",
"_",
"5日主力净流入-净额",
"5日主力净流入-净占比",
"5日超大单净流入-净额",
"5日超大单净流入-净占比",
"5日大单净流入-净额",
"5日大单净流入-净占比",
"5日中单净流入-净额",
"5日中单净流入-净占比",
"5日小单净流入-净额",
"5日小单净流入-净占比",
"5日主力净流入最大股",
"_",
"_",
]
temp_df = temp_df[
[
"名称",
"5日涨跌幅",
"5日主力净流入-净额",
"5日主力净流入-净占比",
"5日超大单净流入-净额",
"5日超大单净流入-净占比",
"5日大单净流入-净额",
"5日大单净流入-净占比",
"5日中单净流入-净额",
"5日中单净流入-净占比",
"5日小单净流入-净额",
"5日小单净流入-净占比",
"5日主力净流入最大股",
]
]
elif indicator == "10日":
temp_df.columns = [
"-",
"_",
"名称",
"_",
"10日涨跌幅",
"10日主力净流入-净额",
"10日主力净流入-净占比",
"10日超大单净流入-净额",
"10日超大单净流入-净占比",
"10日大单净流入-净额",
"10日大单净流入-净占比",
"10日中单净流入-净额",
"10日中单净流入-净占比",
"10日小单净流入-净额",
"10日小单净流入-净占比",
"10日主力净流入最大股",
"_",
"_",
]
temp_df = temp_df[
[
"名称",
"10日涨跌幅",
"10日主力净流入-净额",
"10日主力净流入-净占比",
"10日超大单净流入-净额",
"10日超大单净流入-净占比",
"10日大单净流入-净额",
"10日大单净流入-净占比",
"10日中单净流入-净额",
"10日中单净流入-净占比",
"10日小单净流入-净额",
"10日小单净流入-净占比",
"10日主力净流入最大股",
]
]
return temp_df
if __name__ == "__main__":
stock_individual_fund_flow_rank_df = stock_individual_fund_flow_rank(indicator="今日")
print(stock_individual_fund_flow_rank_df)
stock_individual_fund_flow_rank_df = stock_individual_fund_flow_rank(indicator="3日")
print(stock_individual_fund_flow_rank_df)
stock_individual_fund_flow_rank_df = stock_individual_fund_flow_rank(indicator="5日")
print(stock_individual_fund_flow_rank_df)
stock_individual_fund_flow_rank_df = stock_individual_fund_flow_rank(
indicator="10日"
)
print(stock_individual_fund_flow_rank_df)
stock_sector_fund_flow_rank_df = stock_sector_fund_flow_rank(
indicator="5日", sector_type="概念资金流"
)
print(stock_sector_fund_flow_rank_df)
stock_sector_fund_flow_rank_df = stock_sector_fund_flow_rank(
indicator="今日", sector_type="行业资金流"
)
print(stock_sector_fund_flow_rank_df)

View File

@ -0,0 +1,819 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/6/19 15:26
Desc: 东方财富网-行情首页-沪深京 A 股
"""
import requests
import pandas as pd
import time
from functools import lru_cache
def fetch_with_retries_em(url, params, max_retries=3, delay=2):
"""带重试机制的 GET 请求"""
for attempt in range(max_retries):
try:
response = requests.get(url, params=params, timeout=5)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"请求失败,第 {attempt + 1} 次重试: {e}")
time.sleep(delay)
return None
def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=100) -> pd.DataFrame:
"""
东方财富网-沪深京 A 股-实时行情
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
"""
url = "http://82.push2.eastmoney.com/api/qt/clist/get"
pn = 1 # 初始页数
pn_max = 10000 # 设定初始最大页数
all_data = []
while pn <= pn_max:
params = {
"pn": str(pn),
"pz": str(pz),
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": fs,
"fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
"_": "1623833739532",
}
data_json = fetch_with_retries_em(url, params)
if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
break
diff_data = data_json["data"]["diff"]
if not diff_data:
break
all_data.extend(diff_data)
# 获取 total 数据来更新 pn_max
if pn == 1:
pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz
print(f'total pages: {pn_max}, total data lines: {data_json["data"].get("total", 0)}, curr lines: {len(diff_data)}, page size: {pz}')
pn += 1
time.sleep(0.5) # 防止请求过快
if not all_data:
return pd.DataFrame()
temp_df = pd.DataFrame(all_data)
column_map = {
"f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率",
"f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低",
"f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅",
"f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份",
"f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润",
"f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产",
"f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期"
}
temp_df.rename(columns=column_map, inplace=True)
numeric_columns = [
"最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅",
"年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润",
"加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份",
"总市值", "流通市值"
]
for col in numeric_columns:
temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce")
temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")
return temp_df
def stock_zh_a_spot_em_old(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame:
"""
东方财富网-沪深京 A 股-实时行情
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 实时行情
:rtype: pandas.DataFrame
"""
url = "http://82.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": fs,
"fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return pd.DataFrame()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df.columns = [
"最新价",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
"市盈率动",
"量比",
"5分钟涨跌",
"代码",
"名称",
"最高",
"最低",
"今开",
"昨收",
"总市值",
"流通市值",
"涨速",
"市净率",
"60日涨跌幅",
"年初至今涨跌幅",
"上市时间",
"加权净资产收益率",
"总股本",
"已流通股份",
"营业收入",
"营业收入同比增长",
"归属净利润",
"归属净利润同比增长",
"每股未分配利润",
"毛利率",
"资产负债率",
"每股公积金",
"所处行业",
"每股收益",
"每股净资产",
"市盈率静",
"市盈率TTM",
"报告期"
]
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
"量比",
"今开",
"最高",
"最低",
"昨收",
"涨速",
"5分钟涨跌",
"60日涨跌幅",
"年初至今涨跌幅",
"市盈率动",
"市盈率TTM",
"市盈率静",
"市净率",
"每股收益",
"每股净资产",
"每股公积金",
"每股未分配利润",
"加权净资产收益率",
"毛利率",
"资产负债率",
"营业收入",
"营业收入同比增长",
"归属净利润",
"归属净利润同比增长",
"报告期",
"总股本",
"已流通股份",
"总市值",
"流通市值",
"所处行业",
"上市时间"
]
]
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce")
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce")
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce")
temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce")
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce")
temp_df["最高"] = pd.to_numeric(temp_df["最高"], errors="coerce")
temp_df["最低"] = pd.to_numeric(temp_df["最低"], errors="coerce")
temp_df["今开"] = pd.to_numeric(temp_df["今开"], errors="coerce")
temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce")
temp_df["涨速"] = pd.to_numeric(temp_df["涨速"], errors="coerce")
temp_df["5分钟涨跌"] = pd.to_numeric(temp_df["5分钟涨跌"], errors="coerce")
temp_df["60日涨跌幅"] = pd.to_numeric(temp_df["60日涨跌幅"], errors="coerce")
temp_df["年初至今涨跌幅"] = pd.to_numeric(temp_df["年初至今涨跌幅"], errors="coerce")
temp_df["市盈率动"] = pd.to_numeric(temp_df["市盈率动"], errors="coerce")
temp_df["市盈率TTM"] = pd.to_numeric(temp_df["市盈率TTM"], errors="coerce")
temp_df["市盈率静"] = pd.to_numeric(temp_df["市盈率静"], errors="coerce")
temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
temp_df["每股收益"] = pd.to_numeric(temp_df["每股收益"], errors="coerce")
temp_df["每股净资产"] = pd.to_numeric(temp_df["每股净资产"], errors="coerce")
temp_df["每股公积金"] = pd.to_numeric(temp_df["每股公积金"], errors="coerce")
temp_df["每股未分配利润"] = pd.to_numeric(temp_df["每股未分配利润"], errors="coerce")
temp_df["加权净资产收益率"] = pd.to_numeric(temp_df["加权净资产收益率"], errors="coerce")
temp_df["毛利率"] = pd.to_numeric(temp_df["毛利率"], errors="coerce")
temp_df["资产负债率"] = pd.to_numeric(temp_df["资产负债率"], errors="coerce")
temp_df["营业收入"] = pd.to_numeric(temp_df["营业收入"], errors="coerce")
temp_df["营业收入同比增长"] = pd.to_numeric(temp_df["营业收入同比增长"], errors="coerce")
temp_df["归属净利润"] = pd.to_numeric(temp_df["归属净利润"], errors="coerce")
temp_df["归属净利润同比增长"] = pd.to_numeric(temp_df["归属净利润同比增长"], errors="coerce")
temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
temp_df["总股本"] = pd.to_numeric(temp_df["总股本"], errors="coerce")
temp_df["已流通股份"] = pd.to_numeric(temp_df["已流通股份"], errors="coerce")
temp_df["总市值"] = pd.to_numeric(temp_df["总市值"], errors="coerce")
temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce")
temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")
return temp_df
#原有版本,实现的比较繁琐,后面有个简化版本替代它。
#@lru_cache()
def code_id_map_em_older() -> dict:
"""
东方财富-股票和市场代码
http://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 股票和市场代码
:rtype: dict
"""
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:1 t:2,m:1 t:23",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
temp_df.columns = ["sh_code", "sh_id"]
code_id_dict = dict(zip(temp_df["sh_code"], temp_df["sh_id"]))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:0 t:6,m:0 t:80",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["sz_id"] = 0
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["sz_id"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:0 t:81 s:2048",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["bj_id"] = 0
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["bj_id"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:128 t:3",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["hk_main"] = 116
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["hk_main"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:128 t:4",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["hk_cyb"] = 116
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["hk_cyb"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:105,m:106,m:107",
"fields": "f12,f13",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
# 把数据保存到字典中。按照f13的值分别存储
grouped = temp_df_sz.groupby('f13')
for id, group in grouped:
temp_df_sz[f"us_all_{id}"] = id
code_id_dict.update(dict(zip(group["f12"], str(id))))
#print(f"分组 f13 = {id}:")
#print(group)
#temp_df_sz["us_all"] = 105
#code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["us_all"])))
print(code_id_dict)
return code_id_dict
@lru_cache()
def code_id_map_em() -> dict:
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
pz = 200 # 固定每页 200 条
pn = 1 # 初始页码
pn_max = 10000 # 预设一个较大的初始值
params = {
"pn": str(pn),
"pz": str(pz),
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "",
"fields": "f12,f13",
"_": "1623833739532",
}
market_fs = {
"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"
}
code_id_dict = {}
for market_id, fs in market_fs.items():
params["fs"] = fs
pn = 1 # 每个市场都从第一页开始
total = 0
fetched_cnt = 0
while pn <= pn_max:
params["pn"] = str(pn)
data_json = fetch_with_retries_em(url, params)
if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
print(f"市场 {market_id} 数据获取失败或为空,跳过。")
break
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
# 处理 total 以计算 pn_max
if pn == 1 and "total" in data_json["data"]:
total = int(data_json["data"]["total"])
pn_max = (total // pz) + 1 # 计算最大页数
print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}, 当前获取数量: {len(temp_df)}, 每页最大拉取行数: {pz}")
# 按 f13 进行分组并存入字典
grouped = temp_df.groupby('f13')
for id, group in grouped:
code_id_dict.update(dict.fromkeys(group["f12"], id))
fetched_cnt += len(group)
# print(f'获取 {market_id} 股票列表f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
pn += 1 # 翻页继续
print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
return code_id_dict
@lru_cache()
def code_id_map_em2() -> dict:
"""
东方财富-股票和市场代码
http://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 股票和市场代码
:rtype: dict
"""
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:1 t:2,m:1 t:23",
"fields": "f12,f13",
"_": "1623833739532",
}
market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"}
code_id_dict = dict()
for market_id, fs in market_fs.items():
params['fs'] = fs
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
# 把数据保存到字典中。按照f13的值分别存储
grouped = temp_df.groupby('f13')
for id, group in grouped:
temp_df[f"{market_id}_{id}"] = id
#code_id_dict.update(dict(zip(group["f12"], str(id))))
code_id_dict.update(dict.fromkeys(group["f12"], id))
print(f'get {market_id} stock list. f13: {id}, stock count: {len(group)}')
return code_id_dict
def stock_zh_a_hist(
symbol: str = "000001",
period: str = "daily",
start_date: str = "19700101",
end_date: str = "20500101",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日行情
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param period: choice of {'daily', 'weekly', 'monthly'}
:type period: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"}
:type adjust: str
:return: 每日行情
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
adjust_dict = {"qfq": "1", "hfq": "2", "": "0"}
period_dict = {"daily": "101", "weekly": "102", "monthly": "103"}
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period_dict[period],
"fqt": adjust_dict[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": start_date,
"end": end_date,
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
if not (data_json["data"] and data_json["data"]["klines"]):
return pd.DataFrame()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"日期",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["日期"])
temp_df.reset_index(inplace=True, drop=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
return temp_df
def stock_zh_a_hist_min_em(
symbol: str = "000001",
start_date: str = "1979-09-01 09:32:00",
end_date: str = "2222-01-01 09:32:00",
period: str = "5",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日分时行情
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param period: choice of {'1', '5', '15', '30', '60'}
:type period: str
:param adjust: choice of {'', 'qfq', 'hfq'}
:type adjust: str
:return: 每日分时行情
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
adjust_map = {
"": "0",
"qfq": "1",
"hfq": "2",
}
if period == "1":
url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"ndays": "5",
"iscr": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
else:
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period,
"fqt": adjust_map[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": "0",
"end": "20500000",
"_": "1630930917857",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
temp_df = temp_df[
[
"时间",
"开盘",
"收盘",
"最高",
"最低",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
]
]
return temp_df
def stock_zh_a_hist_pre_min_em(
symbol: str = "000001",
start_time: str = "09:00:00",
end_time: str = "15:50:00",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日分时行情包含盘前数据
http://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param start_time: 开始时间
:type start_time: str
:param end_time: 结束时间
:type end_time: str
:return: 每日分时行情包含盘前数据
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
url = "https://push2.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "fa5fd1943c7b386f172d6893dbfba10b",
"ndays": "1",
"iscr": "1",
"iscca": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
date_format = temp_df.index[0].date().isoformat()
temp_df = temp_df[
date_format + " " + start_time : date_format + " " + end_time
]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
if __name__ == "__main__":
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="000858",
period="daily",
start_date="20220516",
end_date="20220722",
adjust="",
)
print(stock_zh_a_hist_df)
exit(0)
stock_zh_a_spot_em_df = stock_zh_a_spot_em()
print(stock_zh_a_spot_em_df)
code_id_map_em_df = code_id_map_em()
print(code_id_map_em_df)
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="430090",
period="daily",
start_date="20220516",
end_date="20220722",
adjust="hfq",
)
print(stock_zh_a_hist_df)
stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(symbol="833454", period="1")
print(stock_zh_a_hist_min_em_df)
stock_zh_a_hist_pre_min_em_df = stock_zh_a_hist_pre_min_em(symbol="833454")
print(stock_zh_a_hist_pre_min_em_df)
stock_zh_a_spot_em_df = stock_zh_a_spot_em()
print(stock_zh_a_spot_em_df)
stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(
symbol="000001", period='1'
)
print(stock_zh_a_hist_min_em_df)
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="833454",
period="daily",
start_date="20170301",
end_date="20211115",
adjust="hfq",
)
print(stock_zh_a_hist_df)

View File

@ -0,0 +1,888 @@
# -*- coding:utf-8 -*-
# !/usr/bin/env python
"""
Date: 2022/3/15 17:32
Desc: 东方财富网-数据中心-龙虎榜单
https://data.eastmoney.com/stock/tradedetail.html
"""
import pandas as pd
import requests
from tqdm import tqdm
def stock_lhb_detail_em(
start_date: str = "20230403", end_date: str = "20230417"
) -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-龙虎榜详情
https://data.eastmoney.com/stock/tradedetail.html
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:return: 龙虎榜详情
:rtype: pandas.DataFrame
"""
start_date = "-".join([start_date[:4], start_date[4:6], start_date[6:]])
end_date = "-".join([end_date[:4], end_date[4:6], end_date[6:]])
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "SECURITY_CODE,TRADE_DATE",
"sortTypes": "1,-1",
"pageSize": "5000",
"pageNumber": "1",
"reportName": "RPT_DAILYBILLBOARD_DETAILSNEW",
"columns": "SECURITY_CODE,SECUCODE,SECURITY_NAME_ABBR,TRADE_DATE,EXPLAIN,CLOSE_PRICE,CHANGE_RATE,BILLBOARD_NET_AMT,BILLBOARD_BUY_AMT,BILLBOARD_SELL_AMT,BILLBOARD_DEAL_AMT,ACCUM_AMOUNT,DEAL_NET_RATIO,DEAL_AMOUNT_RATIO,TURNOVERRATE,FREE_MARKET_CAP,EXPLANATION,D1_CLOSE_ADJCHRATE,D2_CLOSE_ADJCHRATE,D5_CLOSE_ADJCHRATE,D10_CLOSE_ADJCHRATE,SECURITY_TYPE_CODE",
"source": "WEB",
"client": "WEB",
"filter": f"(TRADE_DATE<='{end_date}')(TRADE_DATE>='{start_date}')",
}
r = requests.get(url, params=params)
data_json = r.json()
total_page_num = data_json["result"]["pages"]
big_df = pd.DataFrame()
for page in range(1, total_page_num + 1):
params.update(
{
"pageNumber": page,
}
)
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df["index"] = big_df.index + 1
big_df.rename(
columns={
"index": "序号",
"SECURITY_CODE": "代码",
"SECUCODE": "-",
"SECURITY_NAME_ABBR": "名称",
"TRADE_DATE": "上榜日",
"EXPLAIN": "解读",
"CLOSE_PRICE": "收盘价",
"CHANGE_RATE": "涨跌幅",
"BILLBOARD_NET_AMT": "龙虎榜净买额",
"BILLBOARD_BUY_AMT": "龙虎榜买入额",
"BILLBOARD_SELL_AMT": "龙虎榜卖出额",
"BILLBOARD_DEAL_AMT": "龙虎榜成交额",
"ACCUM_AMOUNT": "市场总成交额",
"DEAL_NET_RATIO": "净买额占总成交比",
"DEAL_AMOUNT_RATIO": "成交额占总成交比",
"TURNOVERRATE": "换手率",
"FREE_MARKET_CAP": "流通市值",
"EXPLANATION": "上榜原因",
"D1_CLOSE_ADJCHRATE": "上榜后1日",
"D2_CLOSE_ADJCHRATE": "上榜后2日",
"D5_CLOSE_ADJCHRATE": "上榜后5日",
"D10_CLOSE_ADJCHRATE": "上榜后10日",
},
inplace=True,
)
big_df = big_df[
[
"序号",
"代码",
"名称",
"上榜日",
"解读",
"收盘价",
"涨跌幅",
"龙虎榜净买额",
"龙虎榜买入额",
"龙虎榜卖出额",
"龙虎榜成交额",
"市场总成交额",
"净买额占总成交比",
"成交额占总成交比",
"换手率",
"流通市值",
"上榜原因",
"上榜后1日",
"上榜后2日",
"上榜后5日",
"上榜后10日",
]
]
big_df["上榜日"] = pd.to_datetime(big_df["上榜日"]).dt.date
big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce")
big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"], errors="coerce")
big_df["龙虎榜净买额"] = pd.to_numeric(big_df["龙虎榜净买额"], errors="coerce")
big_df["龙虎榜买入额"] = pd.to_numeric(big_df["龙虎榜买入额"], errors="coerce")
big_df["龙虎榜卖出额"] = pd.to_numeric(big_df["龙虎榜卖出额"], errors="coerce")
big_df["龙虎榜成交额"] = pd.to_numeric(big_df["龙虎榜成交额"], errors="coerce")
big_df["市场总成交额"] = pd.to_numeric(big_df["市场总成交额"], errors="coerce")
big_df["净买额占总成交比"] = pd.to_numeric(big_df["净买额占总成交比"], errors="coerce")
big_df["成交额占总成交比"] = pd.to_numeric(big_df["成交额占总成交比"], errors="coerce")
big_df["换手率"] = pd.to_numeric(big_df["换手率"], errors="coerce")
big_df["流通市值"] = pd.to_numeric(big_df["流通市值"], errors="coerce")
big_df["上榜后1日"] = pd.to_numeric(big_df["上榜后1日"], errors="coerce")
big_df["上榜后2日"] = pd.to_numeric(big_df["上榜后2日"], errors="coerce")
big_df["上榜后5日"] = pd.to_numeric(big_df["上榜后5日"], errors="coerce")
big_df["上榜后10日"] = pd.to_numeric(big_df["上榜后10日"], errors="coerce")
return big_df
def stock_lhb_stock_statistic_em(symbol: str = "近一月") -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-个股上榜统计
https://data.eastmoney.com/stock/tradedetail.html
:param symbol: choice of {"近一月", "近三月", "近六月", "近一年"}
:type symbol: str
:return: 个股上榜统计
:rtype: pandas.DataFrame
"""
symbol_map = {
"近一月": "01",
"近三月": "02",
"近六月": "03",
"近一年": "04",
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "BILLBOARD_TIMES,LATEST_TDATE,SECURITY_CODE",
"sortTypes": "-1,-1,1",
"pageSize": "500",
"pageNumber": "1",
"reportName": "RPT_BILLBOARD_TRADEALL",
"columns": "ALL",
"source": "WEB",
"client": "WEB",
"filter": f'(STATISTICS_CYCLE="{symbol_map[symbol]}")',
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
temp_df.reset_index(inplace=True)
temp_df["index"] = temp_df.index + 1
temp_df.columns = [
"序号",
"-",
"代码",
"最近上榜日",
"名称",
"近1个月涨跌幅",
"近3个月涨跌幅",
"近6个月涨跌幅",
"近1年涨跌幅",
"涨跌幅",
"收盘价",
"-",
"龙虎榜总成交额",
"龙虎榜净买额",
"-",
"-",
"机构买入净额",
"上榜次数",
"龙虎榜买入额",
"龙虎榜卖出额",
"机构买入总额",
"机构卖出总额",
"买方机构次数",
"卖方机构次数",
"-",
]
temp_df = temp_df[
[
"序号",
"代码",
"名称",
"最近上榜日",
"收盘价",
"涨跌幅",
"上榜次数",
"龙虎榜净买额",
"龙虎榜买入额",
"龙虎榜卖出额",
"龙虎榜总成交额",
"买方机构次数",
"卖方机构次数",
"机构买入净额",
"机构买入总额",
"机构卖出总额",
"近1个月涨跌幅",
"近3个月涨跌幅",
"近6个月涨跌幅",
"近1年涨跌幅",
]
]
temp_df["最近上榜日"] = pd.to_datetime(temp_df["最近上榜日"]).dt.date
return temp_df
def stock_lhb_jgmmtj_em(
start_date: str = "20220906", end_date: str = "20220906"
) -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-机构买卖每日统计
https://data.eastmoney.com/stock/jgmmtj.html
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:return: 机构买卖每日统计
:rtype: pandas.DataFrame
"""
start_date = "-".join([start_date[:4], start_date[4:6], start_date[6:]])
end_date = "-".join([end_date[:4], end_date[4:6], end_date[6:]])
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "NET_BUY_AMT,TRADE_DATE,SECURITY_CODE",
"sortTypes": "-1,-1,1",
"pageSize": "5000",
"pageNumber": "1",
"reportName": "RPT_ORGANIZATION_TRADE_DETAILS",
"columns": "ALL",
"source": "WEB",
"client": "WEB",
"filter": f"(TRADE_DATE>='{start_date}')(TRADE_DATE<='{end_date}')",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
temp_df.reset_index(inplace=True)
temp_df["index"] = temp_df.index + 1
temp_df.columns = [
"序号",
"-",
"名称",
"代码",
"上榜日期",
"收盘价",
"涨跌幅",
"买方机构数",
"卖方机构数",
"机构买入总额",
"机构卖出总额",
"机构买入净额",
"市场总成交额",
"机构净买额占总成交额比",
"换手率",
"流通市值",
"上榜原因",
"-",
"-",
"-",
"-",
"-",
"-",
"-",
"-",
"-",
]
temp_df = temp_df[
[
"序号",
"代码",
"名称",
"收盘价",
"涨跌幅",
"买方机构数",
"卖方机构数",
"机构买入总额",
"机构卖出总额",
"机构买入净额",
"市场总成交额",
"机构净买额占总成交额比",
"换手率",
"流通市值",
"上榜原因",
"上榜日期",
]
]
temp_df["上榜日期"] = pd.to_datetime(temp_df["上榜日期"]).dt.date
temp_df["收盘价"] = pd.to_numeric(temp_df["收盘价"], errors="coerce")
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
temp_df["买方机构数"] = pd.to_numeric(temp_df["买方机构数"], errors="coerce")
temp_df["卖方机构数"] = pd.to_numeric(temp_df["卖方机构数"], errors="coerce")
temp_df["机构买入总额"] = pd.to_numeric(temp_df["机构买入总额"], errors="coerce")
temp_df["机构卖出总额"] = pd.to_numeric(temp_df["机构卖出总额"], errors="coerce")
temp_df["机构买入净额"] = pd.to_numeric(temp_df["机构买入净额"], errors="coerce")
temp_df["市场总成交额"] = pd.to_numeric(temp_df["市场总成交额"], errors="coerce")
temp_df["机构净买额占总成交额比"] = pd.to_numeric(temp_df["机构净买额占总成交额比"], errors="coerce")
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce")
temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce")
return temp_df
def stock_lhb_jgstatistic_em(symbol: str = "近一月") -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-机构席位追踪
https://data.eastmoney.com/stock/jgstatistic.html
:param symbol: choice of {"近一月", "近三月", "近六月", "近一年"}
:type symbol: str
:return: 机构席位追踪
:rtype: pandas.DataFrame
"""
symbol_map = {
"近一月": "01",
"近三月": "02",
"近六月": "03",
"近一年": "04",
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "ONLIST_TIMES,SECURITY_CODE",
"sortTypes": "-1,1",
"pageSize": "5000",
"pageNumber": "1",
"reportName": "RPT_ORGANIZATION_SEATNEW",
"columns": "ALL",
"source": "WEB",
"client": "WEB",
"filter": f'(STATISTICSCYCLE="{symbol_map[symbol]}")',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json["result"]["pages"]
big_df = pd.DataFrame()
for page in tqdm(range(1, total_page + 1), leave=False):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df["index"] = big_df.index + 1
big_df.rename(
columns={
"index": "序号",
"SECURITY_CODE": "代码",
"SECURITY_NAME_ABBR": "名称",
"CLOSE_PRICE": "收盘价",
"CHANGE_RATE": "涨跌幅",
"AMOUNT": "龙虎榜成交金额",
"ONLIST_TIMES": "上榜次数",
"BUY_AMT": "机构买入额",
"BUY_TIMES": "机构买入次数",
"SELL_AMT": "机构卖出额",
"SELL_TIMES": "机构卖出次数",
"NET_BUY_AMT": "机构净买额",
"M1_CLOSE_ADJCHRATE": "近1个月涨跌幅",
"M3_CLOSE_ADJCHRATE": "近3个月涨跌幅",
"M6_CLOSE_ADJCHRATE": "近6个月涨跌幅",
"Y1_CLOSE_ADJCHRATE": "近1年涨跌幅",
},
inplace=True,
)
big_df = big_df[
[
"序号",
"代码",
"名称",
"收盘价",
"涨跌幅",
"龙虎榜成交金额",
"上榜次数",
"机构买入额",
"机构买入次数",
"机构卖出额",
"机构卖出次数",
"机构净买额",
"近1个月涨跌幅",
"近3个月涨跌幅",
"近6个月涨跌幅",
"近1年涨跌幅",
]
]
big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce")
big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"], errors="coerce")
big_df["龙虎榜成交金额"] = pd.to_numeric(big_df["龙虎榜成交金额"], errors="coerce")
big_df["上榜次数"] = pd.to_numeric(big_df["上榜次数"], errors="coerce")
big_df["机构买入额"] = pd.to_numeric(big_df["机构买入额"], errors="coerce")
big_df["机构买入次数"] = pd.to_numeric(big_df["机构买入次数"], errors="coerce")
big_df["机构卖出额"] = pd.to_numeric(big_df["机构卖出额"], errors="coerce")
big_df["机构卖出次数"] = pd.to_numeric(big_df["机构卖出次数"], errors="coerce")
big_df["机构净买额"] = pd.to_numeric(big_df["机构净买额"], errors="coerce")
big_df["近1个月涨跌幅"] = pd.to_numeric(big_df["近1个月涨跌幅"], errors="coerce")
big_df["近3个月涨跌幅"] = pd.to_numeric(big_df["近3个月涨跌幅"], errors="coerce")
big_df["近6个月涨跌幅"] = pd.to_numeric(big_df["近6个月涨跌幅"], errors="coerce")
big_df["近1年涨跌幅"] = pd.to_numeric(big_df["近1年涨跌幅"], errors="coerce")
return big_df
def stock_lhb_hyyyb_em(
start_date: str = "20220324", end_date: str = "20220324"
) -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-每日活跃营业部
https://data.eastmoney.com/stock/jgmmtj.html
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:return: 每日活跃营业部
:rtype: pandas.DataFrame
"""
start_date = "-".join([start_date[:4], start_date[4:6], start_date[6:]])
end_date = "-".join([end_date[:4], end_date[4:6], end_date[6:]])
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "TOTAL_NETAMT,ONLIST_DATE,OPERATEDEPT_CODE",
"sortTypes": "-1,-1,1",
"pageSize": "5000",
"pageNumber": "1",
"reportName": "RPT_OPERATEDEPT_ACTIVE",
"columns": "ALL",
"source": "WEB",
"client": "WEB",
"filter": f"(ONLIST_DATE>='{start_date}')(ONLIST_DATE<='{end_date}')",
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json["result"]["pages"]
big_df = pd.DataFrame()
for page in tqdm(range(1, total_page + 1), leave=False):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df["index"] = big_df.index + 1
big_df.columns = [
"序号",
"营业部名称",
"上榜日",
"买入个股数",
"卖出个股数",
"买入总金额",
"卖出总金额",
"总买卖净额",
"-",
"-",
"买入股票",
"-",
"-",
]
big_df = big_df[
[
"序号",
"营业部名称",
"上榜日",
"买入个股数",
"卖出个股数",
"买入总金额",
"卖出总金额",
"总买卖净额",
"买入股票",
]
]
big_df["上榜日"] = pd.to_datetime(big_df["上榜日"]).dt.date
big_df["买入个股数"] = pd.to_numeric(big_df["买入个股数"])
big_df["卖出个股数"] = pd.to_numeric(big_df["卖出个股数"])
big_df["买入总金额"] = pd.to_numeric(big_df["买入总金额"])
big_df["卖出总金额"] = pd.to_numeric(big_df["卖出总金额"])
big_df["总买卖净额"] = pd.to_numeric(big_df["总买卖净额"])
return big_df
def stock_lhb_yybph_em(symbol: str = "近一月") -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-营业部排行
https://data.eastmoney.com/stock/yybph.html
:param symbol: choice of {"近一月", "近三月", "近六月", "近一年"}
:type symbol: str
:return: 营业部排行
:rtype: pandas.DataFrame
"""
symbol_map = {
"近一月": "01",
"近三月": "02",
"近六月": "03",
"近一年": "04",
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "TOTAL_BUYER_SALESTIMES_1DAY,OPERATEDEPT_CODE",
"sortTypes": "-1,1",
"pageSize": "5000",
"pageNumber": "1",
"reportName": "RPT_RATEDEPT_RETURNT_RANKING",
"columns": "ALL",
"source": "WEB",
"client": "WEB",
"filter": f'(STATISTICSCYCLE="{symbol_map[symbol]}")',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json["result"]["pages"]
big_df = pd.DataFrame()
for page in tqdm(range(1, total_page + 1), leave=False):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df["index"] = big_df.index + 1
big_df.rename(
columns={
"index": "序号",
"OPERATEDEPT_NAME": "营业部名称",
"TOTAL_BUYER_SALESTIMES_1DAY": "上榜后1天-买入次数",
"AVERAGE_INCREASE_1DAY": "上榜后1天-平均涨幅",
"RISE_PROBABILITY_1DAY": "上榜后1天-上涨概率",
"TOTAL_BUYER_SALESTIMES_2DAY": "上榜后2天-买入次数",
"AVERAGE_INCREASE_2DAY": "上榜后2天-平均涨幅",
"RISE_PROBABILITY_2DAY": "上榜后2天-上涨概率",
"TOTAL_BUYER_SALESTIMES_3DAY": "上榜后3天-买入次数",
"AVERAGE_INCREASE_3DAY": "上榜后3天-平均涨幅",
"RISE_PROBABILITY_3DAY": "上榜后3天-上涨概率",
"TOTAL_BUYER_SALESTIMES_5DAY": "上榜后5天-买入次数",
"AVERAGE_INCREASE_5DAY": "上榜后5天-平均涨幅",
"RISE_PROBABILITY_5DAY": "上榜后5天-上涨概率",
"TOTAL_BUYER_SALESTIMES_10DAY": "上榜后10天-买入次数",
"AVERAGE_INCREASE_10DAY": "上榜后10天-平均涨幅",
"RISE_PROBABILITY_10DAY": "上榜后10天-上涨概率",
},
inplace=True,
)
big_df = big_df[
[
"序号",
"营业部名称",
"上榜后1天-买入次数",
"上榜后1天-平均涨幅",
"上榜后1天-上涨概率",
"上榜后2天-买入次数",
"上榜后2天-平均涨幅",
"上榜后2天-上涨概率",
"上榜后3天-买入次数",
"上榜后3天-平均涨幅",
"上榜后3天-上涨概率",
"上榜后5天-买入次数",
"上榜后5天-平均涨幅",
"上榜后5天-上涨概率",
"上榜后10天-买入次数",
"上榜后10天-平均涨幅",
"上榜后10天-上涨概率",
]
]
big_df["上榜后1天-买入次数"] = pd.to_numeric(big_df["上榜后1天-买入次数"], errors="coerce")
big_df["上榜后1天-平均涨幅"] = pd.to_numeric(big_df["上榜后1天-平均涨幅"], errors="coerce")
big_df["上榜后1天-上涨概率"] = pd.to_numeric(big_df["上榜后1天-上涨概率"], errors="coerce")
big_df["上榜后2天-买入次数"] = pd.to_numeric(big_df["上榜后2天-买入次数"], errors="coerce")
big_df["上榜后2天-平均涨幅"] = pd.to_numeric(big_df["上榜后2天-平均涨幅"], errors="coerce")
big_df["上榜后2天-上涨概率"] = pd.to_numeric(big_df["上榜后2天-上涨概率"], errors="coerce")
big_df["上榜后3天-买入次数"] = pd.to_numeric(big_df["上榜后3天-买入次数"], errors="coerce")
big_df["上榜后3天-平均涨幅"] = pd.to_numeric(big_df["上榜后3天-平均涨幅"], errors="coerce")
big_df["上榜后3天-上涨概率"] = pd.to_numeric(big_df["上榜后3天-上涨概率"], errors="coerce")
big_df["上榜后5天-买入次数"] = pd.to_numeric(big_df["上榜后5天-买入次数"], errors="coerce")
big_df["上榜后5天-平均涨幅"] = pd.to_numeric(big_df["上榜后5天-平均涨幅"], errors="coerce")
big_df["上榜后5天-上涨概率"] = pd.to_numeric(big_df["上榜后5天-上涨概率"], errors="coerce")
big_df["上榜后10天-买入次数"] = pd.to_numeric(big_df["上榜后10天-买入次数"], errors="coerce")
big_df["上榜后10天-平均涨幅"] = pd.to_numeric(big_df["上榜后10天-平均涨幅"], errors="coerce")
big_df["上榜后10天-上涨概率"] = pd.to_numeric(big_df["上榜后10天-上涨概率"], errors="coerce")
return big_df
def stock_lhb_traderstatistic_em(symbol: str = "近一月") -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-营业部统计
https://data.eastmoney.com/stock/traderstatistic.html
:param symbol: choice of {"近一月", "近三月", "近六月", "近一年"}
:type symbol: str
:return: 营业部统计
:rtype: pandas.DataFrame
"""
symbol_map = {
"近一月": "01",
"近三月": "02",
"近六月": "03",
"近一年": "04",
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"sortColumns": "AMOUNT,OPERATEDEPT_CODE",
"sortTypes": "-1,1",
"pageSize": "5000",
"pageNumber": "1",
"reportName": "RPT_OPERATEDEPT_LIST_STATISTICS",
"columns": "ALL",
"source": "WEB",
"client": "WEB",
"filter": f'(STATISTICSCYCLE="{symbol_map[symbol]}")',
}
r = requests.get(url, params=params)
data_json = r.json()
total_page = data_json["result"]["pages"]
big_df = pd.DataFrame()
for page in tqdm(range(1, total_page + 1), leave=False):
params.update({"pageNumber": page})
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.reset_index(inplace=True)
big_df["index"] = big_df.index + 1
big_df.rename(
columns={
"index": "序号",
"OPERATEDEPT_NAME": "营业部名称",
"AMOUNT": "龙虎榜成交金额",
"SALES_ONLIST_TIMES": "上榜次数",
"ACT_BUY": "买入额",
"TOTAL_BUYER_SALESTIMES": "买入次数",
"ACT_SELL": "卖出额",
"TOTAL_SELLER_SALESTIMES": "卖出次数",
},
inplace=True,
)
big_df = big_df[
[
"序号",
"营业部名称",
"龙虎榜成交金额",
"上榜次数",
"买入额",
"买入次数",
"卖出额",
"卖出次数",
]
]
big_df["龙虎榜成交金额"] = pd.to_numeric(big_df["龙虎榜成交金额"], errors="coerce")
big_df["上榜次数"] = pd.to_numeric(big_df["上榜次数"], errors="coerce")
big_df["买入额"] = pd.to_numeric(big_df["买入额"], errors="coerce")
big_df["买入次数"] = pd.to_numeric(big_df["买入次数"], errors="coerce")
big_df["卖出额"] = pd.to_numeric(big_df["卖出额"], errors="coerce")
big_df["卖出次数"] = pd.to_numeric(big_df["卖出次数"], errors="coerce")
return big_df
def stock_lhb_stock_detail_date_em(symbol: str = "600077") -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-个股龙虎榜详情-日期
https://data.eastmoney.com/stock/tradedetail.html
:param symbol: 股票代码
:type symbol: str
:return: 个股龙虎榜详情-日期
:rtype: pandas.DataFrame
"""
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"reportName": "RPT_LHB_BOARDDATE",
"columns": "SECURITY_CODE,TRADE_DATE,TR_DATE",
"filter": f'(SECURITY_CODE="{symbol}")',
"pageNumber": "1",
"pageSize": "1000",
"sortTypes": "-1",
"sortColumns": "TRADE_DATE",
"source": "WEB",
"client": "WEB",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
temp_df.reset_index(inplace=True)
temp_df["index"] = temp_df.index + 1
temp_df.columns = [
"序号",
"股票代码",
"交易日",
"-",
]
temp_df = temp_df[
[
"序号",
"股票代码",
"交易日",
]
]
temp_df["交易日"] = pd.to_datetime(temp_df["交易日"]).dt.date
return temp_df
def stock_lhb_stock_detail_em(
symbol: str = "000788", date: str = "20220315", flag: str = "卖出"
) -> pd.DataFrame:
"""
东方财富网-数据中心-龙虎榜单-个股龙虎榜详情
https://data.eastmoney.com/stock/lhb/600077.html
:param symbol: 股票代码
:type symbol: str
:param date: 查询日期; 需要通过 ak.stock_lhb_stock_detail_date_em(symbol="600077") 接口获取相应股票的有龙虎榜详情数据的日期
:type date: str
:param flag: choice of {"买入", "卖出"}
:type flag: str
:return: 个股龙虎榜详情
:rtype: pandas.DataFrame
"""
flag_map = {
"买入": "BUY",
"卖出": "SELL",
}
report_map = {
"买入": "RPT_BILLBOARD_DAILYDETAILSBUY",
"卖出": "RPT_BILLBOARD_DAILYDETAILSSELL",
}
url = "https://datacenter-web.eastmoney.com/api/data/v1/get"
params = {
"reportName": report_map[flag],
"columns": "ALL",
"filter": f"""(TRADE_DATE='{'-'.join([date[:4], date[4:6], date[6:]])}')(SECURITY_CODE="{symbol}")""",
"pageNumber": "1",
"pageSize": "500",
"sortTypes": "-1",
"sortColumns": flag_map[flag],
"source": "WEB",
"client": "WEB",
"_": "1647338693644",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(data_json["result"]["data"])
temp_df.reset_index(inplace=True)
temp_df["index"] = temp_df.index + 1
if flag == "买入":
temp_df.columns = [
"序号",
"-",
"-",
"-",
"-",
"交易营业部名称",
"类型",
"-",
"-",
"-",
"-",
"买入金额",
"卖出金额",
"净额",
"-",
"-",
"-",
"-",
"买入金额-占总成交比例",
"卖出金额-占总成交比例",
"-",
]
temp_df = temp_df[
[
"序号",
"交易营业部名称",
"买入金额",
"买入金额-占总成交比例",
"卖出金额",
"卖出金额-占总成交比例",
"净额",
"类型",
]
]
temp_df["买入金额"] = pd.to_numeric(temp_df["买入金额"])
temp_df["买入金额-占总成交比例"] = pd.to_numeric(temp_df["买入金额-占总成交比例"])
temp_df["卖出金额"] = pd.to_numeric(temp_df["卖出金额"])
temp_df["卖出金额-占总成交比例"] = pd.to_numeric(temp_df["卖出金额-占总成交比例"])
temp_df.sort_values("类型", inplace=True)
temp_df.reset_index(inplace=True, drop=True)
temp_df["序号"] = range(1, len(temp_df["序号"]) + 1)
else:
temp_df.columns = [
"序号",
"-",
"-",
"-",
"-",
"交易营业部名称",
"类型",
"-",
"-",
"-",
"-",
"买入金额",
"卖出金额",
"净额",
"-",
"-",
"-",
"-",
"买入金额-占总成交比例",
"卖出金额-占总成交比例",
"-",
]
temp_df = temp_df[
[
"序号",
"交易营业部名称",
"买入金额",
"买入金额-占总成交比例",
"卖出金额",
"卖出金额-占总成交比例",
"净额",
"类型",
]
]
temp_df["买入金额"] = pd.to_numeric(temp_df["买入金额"])
temp_df["买入金额-占总成交比例"] = pd.to_numeric(temp_df["买入金额-占总成交比例"])
temp_df["卖出金额"] = pd.to_numeric(temp_df["卖出金额"])
temp_df["卖出金额-占总成交比例"] = pd.to_numeric(temp_df["卖出金额-占总成交比例"])
temp_df.sort_values("类型", inplace=True)
temp_df.reset_index(inplace=True, drop=True)
temp_df["序号"] = range(1, len(temp_df["序号"]) + 1)
return temp_df
if __name__ == "__main__":
stock_lhb_detail_em_df = stock_lhb_detail_em(
start_date="20230403", end_date="20230417"
)
print(stock_lhb_detail_em_df)
stock_lhb_stock_statistic_em_df = stock_lhb_stock_statistic_em(symbol="近一月")
print(stock_lhb_stock_statistic_em_df)
stock_lhb_stock_statistic_em_df = stock_lhb_stock_statistic_em(symbol="近三月")
print(stock_lhb_stock_statistic_em_df)
stock_lhb_stock_statistic_em_df = stock_lhb_stock_statistic_em(symbol="近六月")
print(stock_lhb_stock_statistic_em_df)
stock_lhb_stock_statistic_em_df = stock_lhb_stock_statistic_em(symbol="近一年")
print(stock_lhb_stock_statistic_em_df)
stock_lhb_jgmmtj_em_df = stock_lhb_jgmmtj_em(
start_date="20220904", end_date="20220906"
)
print(stock_lhb_jgmmtj_em_df)
stock_lhb_jgstatistic_em_df = stock_lhb_jgstatistic_em(symbol="近一月")
print(stock_lhb_jgstatistic_em_df)
stock_lhb_hyyyb_em_df = stock_lhb_hyyyb_em(
start_date="20220324", end_date="20220324"
)
print(stock_lhb_hyyyb_em_df)
stock_lhb_yybph_em_df = stock_lhb_yybph_em(symbol="近一月")
print(stock_lhb_yybph_em_df)
stock_lhb_traderstatistic_em_df = stock_lhb_traderstatistic_em(symbol="近一月")
print(stock_lhb_traderstatistic_em_df)
stock_lhb_stock_detail_date_em_df = stock_lhb_stock_detail_date_em(symbol="002901")
print(stock_lhb_stock_detail_date_em_df)
stock_lhb_stock_detail_em_df = stock_lhb_stock_detail_em(
symbol="002901", date="20221012", flag="买入"
)
print(stock_lhb_stock_detail_em_df)
stock_lhb_stock_detail_em_df = stock_lhb_stock_detail_em(
symbol="600016", date="20220324", flag="买入"
)
print(stock_lhb_stock_detail_em_df)

View File

@ -0,0 +1,216 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/11/19 12:00
Desc: 新浪财经-龙虎榜
https://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml
"""
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
def stock_lhb_detail_daily_sina(
trade_date: str = "20200730", symbol: str = "当日无价格涨跌幅限制的A股出现异常波动停牌的股票"
) -> pd.DataFrame:
"""
龙虎榜-每日详情
http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml
:param trade_date: 交易日, e.g., trade_date="20200729"
:type trade_date: str
:param symbol: 指定标题
:type symbol: str
:return: 龙虎榜-每日详情
:rtype: pandas.DataFrame
"""
trade_date = "-".join([trade_date[:4], trade_date[4:6], trade_date[6:]])
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml"
params = {"tradedate": trade_date}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
table_name_list = [
item.get_text().strip()
for item in soup.find_all(
"span", attrs={"style": "font-weight:bold;font-size:14px;"}
)
if item.get_text().strip() != ""
]
if symbol == "返回当前交易日所有可查询的指标":
return table_name_list
else:
position_num = table_name_list.index(symbol)
if len(table_name_list) == position_num + 1:
temp_df_1 = pd.read_html(r.text, flavor='bs4', header=1)[position_num].iloc[0:, :]
temp_df_2 = pd.read_html(r.text, flavor='bs4', header=1)[position_num + 1].iloc[0:, :]
temp_df_3 = pd.read_html(r.text, flavor='bs4', header=1)[position_num + 2].iloc[0:, :]
temp_df = pd.concat([temp_df_1, temp_df_2, temp_df_3], ignore_index=True)
else:
temp_df = pd.read_html(r.text, flavor='bs4', header=1)[position_num].iloc[0:, :]
temp_df["股票代码"] = temp_df["股票代码"].astype(str).str.zfill(6)
del temp_df["查看详情"]
temp_df.columns = ["序号", "股票代码", "股票名称", "收盘价", "对应值", "成交量", "成交额"]
return temp_df
def _find_last_page(url: str = None, recent_day: str = "60"):
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml"
params = {
"last": recent_day,
"p": "1",
}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
try:
previous_page = int(soup.find_all(attrs={"class": "page"})[-2].text)
except:
previous_page = 1
if previous_page != 1:
while True:
params = {
"last": recent_day,
"p": previous_page,
}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
last_page = int(soup.find_all(attrs={"class": "page"})[-2].text)
if last_page != previous_page:
previous_page = last_page
continue
else:
break
return previous_page
def stock_lhb_ggtj_sina(recent_day: str = "30") -> pd.DataFrame:
"""
龙虎榜-个股上榜统计
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
:type recent_day: str
:return: 龙虎榜-每日详情
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml"
last_page_num = _find_last_page(url, recent_day)
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"last": recent_day,
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
big_df.columns = ["股票代码", "股票名称", "上榜次数", "累积购买额", "累积卖出额", "净额", "买入席位数", "卖出席位数"]
return big_df
def stock_lhb_yytj_sina(recent_day: str = "5") -> pd.DataFrame:
"""
龙虎榜-营业部上榜统计
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
:type recent_day: str
:return: 龙虎榜-营业部上榜统计
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml"
last_page_num = _find_last_page(url, recent_day)
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"last": "5",
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.columns = ["营业部名称", "上榜次数", "累积购买额", "买入席位数", "累积卖出额", "卖出席位数", "买入前三股票"]
big_df['上榜次数'] = pd.to_numeric(big_df['上榜次数'], errors="coerce")
big_df['买入席位数'] = pd.to_numeric(big_df['买入席位数'], errors="coerce")
big_df['卖出席位数'] = pd.to_numeric(big_df['卖出席位数'], errors="coerce")
return big_df
def stock_lhb_jgzz_sina(recent_day: str = "5") -> pd.DataFrame:
"""
龙虎榜-机构席位追踪
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
:type recent_day: str
:return: 龙虎榜-机构席位追踪
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml"
last_page_num = _find_last_page(url, recent_day)
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"last": recent_day,
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
del big_df["当前价"]
del big_df["涨跌幅"]
big_df.columns = ["股票代码", "股票名称", "累积买入额", "买入次数", "累积卖出额", "卖出次数", "净额"]
big_df['买入次数'] = pd.to_numeric(big_df['买入次数'], errors="coerce")
big_df['卖出次数'] = pd.to_numeric(big_df['卖出次数'], errors="coerce")
return big_df
def stock_lhb_jgmx_sina() -> pd.DataFrame:
"""
龙虎榜-机构席位成交明细
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml
:return: 龙虎榜-机构席位成交明细
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml"
params = {
"p": "1",
}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
try:
last_page_num = int(soup.find_all(attrs={"class": "page"})[-2].text)
except:
last_page_num = 1
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
return big_df
if __name__ == "__main__":
indicator_name_list = stock_lhb_detail_daily_sina(
trade_date="20221118", symbol="返回当前交易日所有可查询的指标"
)
print(indicator_name_list)
stock_lhb_detail_daily_sina_df = stock_lhb_detail_daily_sina(
trade_date="20221118", symbol="换手率达20%的证券"
)
print(stock_lhb_detail_daily_sina_df)
stock_lhb_ggtj_sina_df = stock_lhb_ggtj_sina(recent_day="60")
print(stock_lhb_ggtj_sina_df)
stock_lhb_yytj_sina_df = stock_lhb_yytj_sina(recent_day="60")
print(stock_lhb_yytj_sina_df)
stock_lhb_jgzz_sina_df = stock_lhb_jgzz_sina(recent_day="30")
print(stock_lhb_jgzz_sina_df)
stock_lhb_jgmx_sina_df = stock_lhb_jgmx_sina()
print(stock_lhb_jgmx_sina_df)

View File

@ -0,0 +1,79 @@
# -*- coding:utf-8 -*-
# !/usr/bin/env python
import pandas as pd
import requests
import instock.core.tablestructure as tbs
__author__ = 'myh '
__date__ = '2023/5/9 '
def stock_selection() -> pd.DataFrame:
"""
东方财富网-个股-选股器
https://data.eastmoney.com/xuangu/
:return: 选股器
:rtype: pandas.DataFrame
"""
cols = tbs.TABLE_CN_STOCK_SELECTION['columns']
sty = "" # 初始值 "SECUCODE,SECURITY_CODE,SECURITY_NAME_ABBR,CHANGE_RATE"
for k in cols:
sty = f"{sty},{cols[k]['map']}"
url = "https://data.eastmoney.com/dataapi/xuangu/list"
params = {
"sty": sty[1:],
"filter": "(MARKET+in+(\"上交所主板\",\"深交所主板\",\"深交所创业板\"))(NEW_PRICE>0)",
"p": 1,
"ps": 10000,
"source": "SELECT_SECURITIES",
"client": "WEB"
}
r = requests.get(url, params=params)
data_json = r.json()
data = data_json["result"]["data"]
if not data:
return pd.DataFrame()
temp_df = pd.DataFrame(data)
mask = ~temp_df['CONCEPT'].isna()
temp_df.loc[mask, 'CONCEPT'] = temp_df.loc[mask, 'CONCEPT'].apply(lambda x: ', '.join(x))
mask = ~temp_df['STYLE'].isna()
temp_df.loc[mask, 'STYLE'] = temp_df.loc[mask, 'STYLE'].apply(lambda x: ', '.join(x))
for k in cols:
t = tbs.get_field_type_name(cols[k]["type"])
if t == 'numeric':
temp_df[cols[k]["map"]] = pd.to_numeric(temp_df[cols[k]["map"]], errors="coerce")
elif t == 'datetime':
temp_df[cols[k]["map"]] = pd.to_datetime(temp_df[cols[k]["map"]], errors="coerce").dt.date
return temp_df
def stock_selection_params():
"""
东方财富网-个股-选股器-选股指标
https://data.eastmoney.com/xuangu/
:return: 选股器-选股指标
:rtype: pandas.DataFrame
"""
url = "https://datacenter-web.eastmoney.com/wstock/selection/api/data/get"
params = {
"type": "RPTA_PCNEW_WHOLE",
"sty": "ALL",
"p": 1,
"ps": 50000,
"source": "SELECT_SECURITIES",
"client": "WEB"
}
r = requests.get(url, params=params)
data_json = r.json()
zxzb = data_json["zxzb"] # 指标
print(zxzb)
if __name__ == "__main__":
stock_selection_df = stock_selection()
print(stock_selection)

View File

@ -0,0 +1,331 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/10/1 19:27
Desc: 新浪财经-交易日历
https://finance.sina.com.cn/realstock/company/klc_td_sh.txt
此处可以用来更新 calendar.json 文件,注意末尾没有 ","
"""
import datetime
import pandas as pd
import requests
from py_mini_racer import py_mini_racer
hk_js_decode = """
function d(t) {
var e, i, n, r, a, o, s, l = (arguments,
864e5), u = 7657, c = [], h = [], d = ~(3 << 30), f = 1 << 30,
p = [0, 3, 5, 6, 9, 10, 12, 15, 17, 18, 20, 23, 24, 27, 29, 30], m = Math, g = function () {
var l, u;
for (l = 0; 64 > l; l++)
h[l] = m.pow(2, l),
26 > l && (c[l] = v(l + 65),
c[l + 26] = v(l + 97),
10 > l && (c[l + 52] = v(l + 48)));
for (c.push("+", "/"),
c = c.join(""),
i = t.split(""),
n = i.length,
l = 0; n > l; l++)
i[l] = c.indexOf(i[l]);
return r = {},
e = o = 0,
a = {},
u = w([12, 6]),
s = 63 ^ u[1],
{
_1479: T,
_136: _,
_200: S,
_139: k,
_197: _mi_run
}["_" + u[0]] || function () {
return []
}
}, v = String.fromCharCode, b = function (t) {
return t === {}._
}, N = function () {
var t, e;
for (t = y(),
e = 1; ;) {
if (!y())
return e * (2 * t - 1);
e++
}
}, y = function () {
var t;
return e >= n ? 0 : (t = i[e] & 1 << o,
o++,
o >= 6 && (o -= 6,
e++),
!!t)
}, w = function (t, r, a) {
var s, l, u, c, d;
for (l = [],
u = 0,
r || (r = []),
a || (a = []),
s = 0; s < t.length; s++)
if (c = t[s],
u = 0,
c) {
if (e >= n)
return l;
if (t[s] <= 0)
u = 0;
else if (t[s] <= 30) {
for (; d = 6 - o,
d = c > d ? d : c,
u |= (i[e] >> o & (1 << d) - 1) << t[s] - c,
o += d,
o >= 6 && (o -= 6,
e++),
c -= d,
!(0 >= c);)
;
r[s] && u >= h[t[s] - 1] && (u -= h[t[s]])
} else
u = w([30, t[s] - 30], [0, r[s]]),
a[s] || (u = u[0] + u[1] * h[30]);
l[s] = u
} else
l[s] = 0;
return l
}, x = function (t) {
var e, i, n;
for (t > 1 && (e = 0),
e = 0; t > e; e++)
r.d++,
n = r.d % 7,
(3 == n || 4 == n) && (r.d += 5 - n);
return i = new Date,
i.setTime((u + r.d) * l),
i
}, S = function () {
var t, i, a, o, l;
if (s >= 1)
return [];
for (r.d = w([18], [1])[0] - 1,
a = w([3, 3, 30, 6]),
r.p = a[0],
r.ld = a[1],
r.cd = a[2],
r.c = a[3],
r.m = m.pow(10, r.p),
r.pc = r.cd / r.m,
i = [],
t = 0; o = {
d: 1
},
y() && (a = w([3])[0],
0 == a ? o.d = w([6])[0] : 1 == a ? (r.d = w([18])[0],
o.d = 0) : o.d = a),
l = {
day: x(o.d)
},
y() && (r.ld += N()),
a = w([3 * r.ld], [1]),
r.cd += a[0],
l.close = r.cd / r.m,
i.push(l),
!(e >= n) && (e != n - 1 || 63 & (r.c ^ t + 1)); t++)
;
return i[0].prevclose = r.pc,
i
}, _ = function () {
var t, i, a, o, l, u, c, h, d, f, p;
if (s > 2)
return [];
for (c = [],
d = {
v: "volume",
p: "price",
a: "avg_price"
},
r.d = w([18], [1])[0] - 1,
h = {
day: x(1)
},
a = w(1 > s ? [3, 3, 4, 1, 1, 1, 5] : [4, 4, 4, 1, 1, 1, 3]),
t = 0; 7 > t; t++)
r[["la", "lp", "lv", "tv", "rv", "zv", "pp"][t]] = a[t];
for (r.m = m.pow(10, r.pp),
s >= 1 ? (a = w([3, 3]),
r.c = a[0],
a = a[1]) : (a = 5,
r.c = 2),
r.pc = w([6 * a])[0],
h.pc = r.pc / r.m,
r.cp = r.pc,
r.da = 0,
r.sa = r.sv = 0,
t = 0; !(e >= n) && (e != n - 1 || 7 & (r.c ^ t)); t++) {
for (l = {},
o = {},
f = r.tv ? y() : 1,
i = 0; 3 > i; i++)
if (p = ["v", "p", "a"][i],
(f ? y() : 0) && (a = N(),
r["l" + p] += a),
u = "v" == p && r.rv ? y() : 1,
a = w([3 * r["l" + p] + ("v" == p ? 7 * u : 0)], [!!i])[0] * (u ? 1 : 100),
o[p] = a,
"v" == p) {
if (!(l[d[p]] = a) && (s > 1 || 241 > t) && (r.zv ? !y() : 1)) {
o.p = 0;
break
}
} else
"a" == p && (r.da = (1 > s ? 0 : r.da) + o.a);
r.sv += o.v,
l[d.p] = (r.cp += o.p) / r.m,
r.sa += o.v * r.cp,
l[d.a] = b(o.a) ? t ? c[t - 1][d.a] : l[d.p] : r.sv ? ((m.floor((r.sa * (2e3 / r.m) + r.sv) / r.sv) >> 1) + r.da) / 1e3 : l[d.p] + r.da / 1e3,
c.push(l)
}
return c[0].date = h.day,
c[0].prevclose = h.pc,
c
}, T = function () {
var t, e, i, n, a, o, l;
if (s >= 1)
return [];
for (r.lv = 0,
r.ld = 0,
r.cd = 0,
r.cv = [0, 0],
r.p = w([6])[0],
r.d = w([18], [1])[0] - 1,
r.m = m.pow(10, r.p),
a = w([3, 3]),
r.md = a[0],
r.mv = a[1],
t = []; a = w([6]),
a.length;) {
if (i = {
c: a[0]
},
n = {},
i.d = 1,
32 & i.c)
for (; ;) {
if (a = w([6])[0],
63 == (16 | a)) {
l = 16 & a ? "x" : "u",
a = w([3, 3]),
i[l + "_d"] = a[0] + r.md,
i[l + "_v"] = a[1] + r.mv;
break
}
if (32 & a) {
o = 8 & a ? "d" : "v",
l = 16 & a ? "x" : "u",
i[l + "_" + o] = (7 & a) + r["m" + o];
break
}
if (o = 15 & a,
0 == o ? i.d = w([6])[0] : 1 == o ? (r.d = o = w([18])[0],
i.d = 0) : i.d = o,
!(16 & a))
break
}
n.date = x(i.d);
for (o in {
v: 0,
d: 0
})
b(i["x_" + o]) || (r["l" + o] = i["x_" + o]),
b(i["u_" + o]) && (i["u_" + o] = r["l" + o]);
for (i.l_l = [i.u_d, i.u_d, i.u_d, i.u_d, i.u_v],
l = p[15 & i.c],
1 & i.u_v && (l = 31 - l),
16 & i.c && (i.l_l[4] += 2),
e = 0; 5 > e; e++)
l & 1 << 4 - e && i.l_l[e]++,
i.l_l[e] *= 3;
i.d_v = w(i.l_l, [1, 0, 0, 1, 1], [0, 0, 0, 0, 1]),
o = r.cd + i.d_v[0],
n.open = o / r.m,
n.high = (o + i.d_v[1]) / r.m,
n.low = (o - i.d_v[2]) / r.m,
n.close = (o + i.d_v[3]) / r.m,
a = i.d_v[4],
"number" == typeof a && (a = [a, a >= 0 ? 0 : -1]),
r.cd = o + i.d_v[3],
l = r.cv[0] + a[0],
r.cv = [l & d, r.cv[1] + a[1] + !!((r.cv[0] & d) + (a[0] & d) & f)],
n.volume = (r.cv[0] & f - 1) + r.cv[1] * f,
t.push(n)
}
return t
}, k = function () {
var t, e, i, n;
if (s > 1)
return [];
for (r.l = 0,
n = -1,
r.d = w([18])[0] - 1,
i = w([18])[0]; r.d < i;)
e = x(1),
0 >= n ? (y() && (r.l += N()),
n = w([3 * r.l], [0])[0] + 1,
t || (t = [e],
n--)) : t.push(e),
n--;
return t
};
return _mi_run = function () {
var t, i, a, o;
if (s >= 1)
return [];
for (r.f = w([6])[0],
r.c = w([6])[0],
a = [],
r.dv = [],
r.dl = [],
t = 0; t < r.f; t++)
r.dv[t] = 0,
r.dl[t] = 0;
for (t = 0; !(e >= n) && (e != n - 1 || 7 & (r.c ^ t)); t++) {
for (o = [],
i = 0; i < r.f; i++)
y() && (r.dl[i] += N()),
r.dv[i] += w([3 * r.dl[i]], [1])[0],
o[i] = r.dv[i];
a.push(o)
}
return a
}
,
g()()
}
"""
def tool_trade_date_hist_sina() -> pd.DataFrame:
"""
交易日历-历史数据
https://finance.sina.com.cn/realstock/company/klc_td_sh.txt
:return: 交易日历
:rtype: pandas.DataFrame
"""
url = "https://finance.sina.com.cn/realstock/company/klc_td_sh.txt"
r = requests.get(url)
js_code = py_mini_racer.MiniRacer()
js_code.eval(hk_js_decode)
dict_list = js_code.call(
"d", r.text.split("=")[1].split(";")[0].replace('"', "")
) # 执行js解密代码
temp_df = pd.DataFrame(dict_list)
temp_df.columns = ["trade_date"]
temp_df["trade_date"] = pd.to_datetime(temp_df["trade_date"]).dt.date
temp_list = temp_df["trade_date"].to_list()
temp_list.append(datetime.date(1992, 5, 4)) # 是交易日但是交易日历缺失该日期
temp_list.sort()
temp_df = pd.DataFrame(temp_list, columns=["trade_date"])
return temp_df
if __name__ == "__main__":
tool_trade_date_hist_df = tool_trade_date_hist_sina()
print(tool_trade_date_hist_df)

View File

@ -0,0 +1,607 @@
000001
000002
000063
000100
000157
000166
000301
000333
000338
000408
000425
000538
000568
000596
000617
000625
000651
000661
000708
000725
000733
000768
000776
000786
000792
000800
000807
000858
000876
000895
000938
000963
000977
000983
000999
001289
001965
001979
002001
002007
002027
002049
002050
002074
002129
002142
002179
002180
002230
002236
002241
002252
002271
002304
002311
002352
002371
002410
002415
002459
002460
002466
002475
002493
002555
002594
002601
002603
002648
002709
002714
002736
002812
002821
002841
002916
002920
002938
003816
300014
300015
300033
300059
300122
300124
300142
300223
300274
300308
300316
300347
300408
300413
300418
300433
300442
300450
300454
300496
300498
300628
300661
300750
300751
300759
300760
300782
300832
300896
300919
300957
300979
300999
301269
600000
600009
600010
600011
600015
600016
600018
600019
600023
600025
600026
600027
600028
600029
600030
600031
600036
600039
600048
600050
600061
600085
600089
600104
600111
600115
600132
600150
600161
600176
600183
600188
600196
600219
600233
600276
600309
600332
600346
600362
600372
600406
600415
600426
600436
600438
600460
600489
600515
600519
600547
600570
600584
600585
600588
600600
600660
600674
600690
600732
600741
600745
600760
600795
600803
600809
600837
600845
600875
600886
600887
600893
600900
600905
600918
600919
600926
600938
600941
600958
600989
600999
601006
601009
601012
601021
601059
601066
601088
601100
601111
601117
601138
601166
601169
601186
601211
601225
601229
601236
601238
601288
601318
601319
601328
601336
601360
601377
601390
601398
601600
601601
601607
601618
601628
601633
601658
601668
601669
601688
601689
601698
601699
601728
601766
601788
601799
601800
601808
601816
601818
601838
601857
601865
601868
601872
601877
601878
601881
601888
601898
601899
601901
601916
601919
601939
601985
601988
601989
601995
601998
603019
603195
603259
603260
603288
603296
603369
603392
603501
603659
603799
603806
603833
603899
603986
603993
605117
605499
688008
688009
688012
688036
688041
688082
688111
688126
688187
688223
688256
688271
688303
688363
688396
688599
688981
000009
000034
000035
000039
000066
000069
000400
000423
000519
000547
000591
000623
000629
000630
000683
000690
000738
000818
000830
000831
000878
000887
000932
000933
000960
000967
000975
000988
000998
001914
002008
002025
002028
002044
002064
002065
002078
002080
002081
002085
002091
002120
002123
002131
002138
002145
002151
002152
002156
002176
002185
002192
002195
002202
002212
002223
002240
002245
002266
002268
002273
002281
002292
002294
002312
002340
002353
002368
002372
002384
002389
002396
002405
002407
002409
002414
002422
002432
002436
002439
002444
002456
002463
002465
002472
002497
002508
002511
002517
002532
002541
002544
002558
002572
002602
002607
002624
002625
002738
002739
002756
002791
002831
300001
300002
300003
300012
300017
300024
300037
300054
300058
300068
300070
300073
300088
300118
300133
300136
300144
300182
300207
300212
300251
300253
300285
300296
300315
300339
300346
300383
300390
300394
300395
300438
300457
300459
300474
300502
300529
300558
300567
300568
300573
300595
300601
300604
300627
300676
300699
300724
300763
300769
300866
301236
301558
600004
600007
600008
600038
600066
600096
600118
600129
600131
600141
600143
600153
600157
600160
600166
600167
600170
600177
600256
600258
600316
600323
600325
600352
600392
600398
600399
600418
600482
600486
600497
600498
600499
600516
600521
600529
600535
600549
600563
600637
600655
600667
600699
600704
600754
600755
600763
600765
600771
600816
600820
600839
600859
600862
600867
600879
600884
600885
600895
600959
600988
600998
601058
601155
601168
601216
601233
601615
601636
601677
601727
601866
601880
601966
603000
603077
603129
603156
603236
603290
603444
603456
603486
603568
603588
603596
603605
603606
603613
603650
603688
603737
603816
603882
603885
603939
605358
688002
688005
688063
688072
688099
688122
688169
688188
688390
688536
688598
688617
688777
688001
688019
688037
688047
688048
688052
688106
688107
688110
688120
688123
688141
688146
688153
688172
688200
688213
688220
688234
688249
688279
688347
688352
688361
688362
688385
688409
688432
688484
688498
688521
688525
688582
688596
688608
688702
688728
688798
688349
688472
688506

View File

@ -0,0 +1,11 @@
689009
688981
688517
000001
000002
000001
688353
688093
688303
000063
000100

101
src/get_futu_rehb.py Normal file
View File

@ -0,0 +1,101 @@
"""
Script Name:
Description: 从富途获取复权因子数据。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/get-rehab.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import logging
import pymysql
import time
from futu import *
import pandas as pd
import config
# 设置日志
config.setup_logging()
# 连接 MySQL 数据库
def get_mysql_connection():
return pymysql.connect(**config.db_config)
# 获取股票代码列表
def get_stock_codes(table_name):
connection = get_mysql_connection()
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
cursor.execute(f"SELECT code, code_name FROM {table_name} ")
result = cursor.fetchall()
connection.close()
return result
# 插入或更新复权信息
def insert_or_update_rehab_data(connection, rehab_data, code, name):
try:
with connection.cursor() as cursor:
sql = """
INSERT INTO futu_rehab (code, name, ex_div_date, forward_adj_factorA, forward_adj_factorB, backward_adj_factorA, backward_adj_factorB)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
forward_adj_factorA = VALUES(forward_adj_factorA),
forward_adj_factorB = VALUES(forward_adj_factorB),
backward_adj_factorA = VALUES(backward_adj_factorA),
backward_adj_factorB = VALUES(backward_adj_factorB)
"""
for row in rehab_data.itertuples(index=False):
cursor.execute(sql, (code, name, row.ex_div_date, row.forward_adj_factorA, row.forward_adj_factorB, row.backward_adj_factorA, row.backward_adj_factorB))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting or updating rehab data: {e}", exc_info=True)
# 从 Futu API 获取复权信息
def get_rehab_data(code):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_rehab(code)
quote_ctx.close()
if ret == RET_OK:
return data
else:
logging.error(f"Failed to get rehab data for {code}: {data}")
return None
# 主函数
def process_stock_data(table_name, prefix=''):
stocks = get_stock_codes(table_name)
connection = get_mysql_connection()
for stock in stocks:
code = stock['code']
name = stock['code_name']
# 拼接 'US.' 前缀对于 sp500 表中的股票
full_code = f"{prefix}{code}" if prefix else code
logging.info(f"Processing {full_code} ({name})")
# 获取复权数据
rehab_data = get_rehab_data(full_code)
if rehab_data is not None:
# 插入或更新复权数据
insert_or_update_rehab_data(connection, rehab_data, full_code, name)
logging.info(f"Inserted/Updated rehab data for {full_code} ({name})")
time.sleep(3)
connection.close()
if __name__ == "__main__":
# 处理 hs300 表数据,不需要加 'US.' 前缀
process_stock_data("hs300")
# 处理 sp500 表数据,加 'US.' 前缀
process_stock_data("sp500", prefix='US.')

242
src/get_his_kline_em.py Normal file
View File

@ -0,0 +1,242 @@
import time
import logging
import pandas as pd
import os
import sys
import config
import crawling.stock_hist_em as his_em
file_selected_codes = './cursor/his_kline_em_codes.txt' # 指定拉取的代码列表,每行一个代码
file_done_codes = './cursor/his_kline_em_done_codes.txt' # 已完成拉取的代码列表,每行一个代码
dir_his_kline_em = '../data/his_kline_em'
config.setup_logging()
# 刷新代码列表,并返回
def flush_code_map():
code_id_map_em_df = his_em.code_id_map_em()
print(code_id_map_em_df)
return code_id_map_em_df
# 获取历史K线如果失败就重试
def fetch_with_retry(code: str, adjust: str = '', max_retries: int = 20) -> pd.DataFrame :
retries = 0
while retries < max_retries:
try:
# 调用 stock_zh_a_hist 获取历史数据
df = his_em.stock_zh_a_hist(
symbol=code,
period="daily",
start_date="19000101",
end_date="20241020",
adjust=adjust,
)
# 如果获取到的数据为空,记录日志并重试
if df.empty:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
else:
return df
except Exception as e:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
return pd.DataFrame()
# 检查子目录是否存在,不存在则创建
def create_directory_if_not_exists(dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
logging.info(f"Created directory: {dir_name}")
# 读取 code.txt 文件,并获取每个股票代码
def read_stock_codes(filename: str) -> list:
try:
with open(filename, 'r') as f:
codes = [line.strip() for line in f if line.strip()]
return codes
except FileNotFoundError:
logging.error(f"文件 {filename} 未找到。")
return []
# 从文件获取指定的代码并拉取历史K线
def fetch_parts_by_codes():
# 读取股票代码列表
codes = read_stock_codes(file_selected_codes)
# 如果没有代码,结束程序
if not codes:
logging.error("没有找到有效的股票代码,程序终止。")
return
# 读取已经下载的代码列表,后续下载时忽略
done_codes = []
if os.path.exists(file_done_codes):
with open(file_done_codes, 'r', encoding='utf-8') as f:
done_codes = [line.strip() for line in f] # 使用strip()去掉每行的换行符和多余的空白
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for key in codes:
val = code_id_map_em_df.get(key)
if key in done_codes:
logging.info(f'Skipping already code. code: ({key})')
continue
if val is None:
logging.error(f'cannot find stock code. code: ({key}), adjust: ({adjust_str})')
continue
succ = True
start_time = time.time() # 在函数执行前获取当前时间
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.info(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
succ = False
else:
# 将 DataFrame 输出为 CSV 文件
curr_dir = f'{dir_his_kline_em}/{val}_{adjust_str}'
create_directory_if_not_exists(curr_dir)
curr_file = f'{curr_dir}/{key}_{adjust_str}_his_data.csv'
stock_zh_a_hist_df.to_csv(curr_file, index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), file: ({curr_file}) lines: ({lines})')
time.sleep(5)
end_time = time.time() # 在函数执行后获取当前时间
elapsed_time = int(end_time - start_time) # 计算时间差,秒
if succ:
# 下载后,记录日志
with open(file_done_codes, 'a', encoding='utf-8') as done_list:
done_list.write(f"{key}\n")
logging.info(f"Downloaded and recorded: ({key}) total lines: {lines} time cost: {elapsed_time} s")
time.sleep(10)
# 获取全量代码的历史K线
def fetch_all_by_codes():
# 读取已经下载的代码列表,后续下载时忽略
done_codes = []
if os.path.exists(file_done_codes):
with open(file_done_codes, 'r', encoding='utf-8') as f:
done_codes = [line.strip() for line in f] # 使用strip()去掉每行的换行符和多余的空白
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for key, val in code_id_map_em_df.items():
if key in done_codes:
logging.info(f'Skipping already code. code: ({key})')
continue
succ = True
start_time = time.time() # 在函数执行前获取当前时间
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.error(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
succ = False
else:
# 将 DataFrame 输出为 CSV 文件
curr_dir = f'{dir_his_kline_em}/{val}_{adjust_str}'
create_directory_if_not_exists(curr_dir)
curr_file = f'{curr_dir}/{key}_{adjust_str}_his_data.csv'
stock_zh_a_hist_df.to_csv(curr_file, index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), file: ({curr_file}) lines: ({lines})')
time.sleep(5)
end_time = time.time() # 在函数执行后获取当前时间
elapsed_time = int(end_time - start_time) # 计算时间差,秒
if succ:
# 下载后,记录日志
with open(file_done_codes, 'a', encoding='utf-8') as done_list:
done_list.write(f"{key}\n")
logging.info(f"Downloaded and recorded: ({key}) total lines: {lines} time cost: {elapsed_time} s")
time.sleep(10)
# 从文件获取指定的代码并拉取历史K线废弃
def fetch_parts():
# 读取股票代码列表
codes = read_stock_codes(file_selected_codes)
# 如果没有代码,结束程序
if not codes:
logging.error("没有找到有效的股票代码,程序终止。")
return
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
for key in codes:
val = code_id_map_em_df.get(key)
if val is None:
logging.error(f'cannot find stock code. code: ({key}), adjust: ({adjust_str})')
continue
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.info(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
else:
# 将 DataFrame 输出为 CSV 文件
stock_zh_a_hist_df.to_csv(f'../data/{val}/{key}_{adjust_str}_his_data.csv', index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), lines: ({lines})')
time.sleep(5)
time.sleep(10)
# 获取全量代码的历史K线废弃
def fetch_all():
adjust_values = ['', 'qfq', 'hfq']
code_id_map_em_df = his_em.code_id_map_em()
for adjust in adjust_values:
adjust_str = adjust if adjust != '' else 'none'
for key, val in code_id_map_em_df.items():
stock_zh_a_hist_df =fetch_with_retry(key, adjust)
if stock_zh_a_hist_df.empty:
logging.error(f'fetch his data error. code: ({key}), adjust: ({adjust_str})')
else:
# 将 DataFrame 输出为 CSV 文件
stock_zh_a_hist_df.to_csv(f'../data/{val}/{key}_{adjust_str}_his_data.csv', index=False, encoding='utf-8')
lines = stock_zh_a_hist_df.shape[0]
logging.info(f'fetch his data and write to file. code: ({key}), adjust: ({adjust_str}), lines: ({lines})')
time.sleep(5)
time.sleep(10)
# 主函数
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python script.py <cmd>")
print("cmd: all, parts")
sys.exit(1)
cmd = sys.argv[1]
if cmd == "all":
fetch_all_by_codes() # 拉取所有的代码
elif cmd == "parts":
fetch_parts_by_codes() # 拉取指定的代码
elif cmd == "all_other":
fetch_all()
elif cmd == "parts_other":
fetch_parts()
else:
print(f"Unknown command: {cmd}")

160
src/get_hs300_his_kline.py Normal file
View File

@ -0,0 +1,160 @@
"""
Script Name:
Description: 从富途获取历史K线。通过不同变量可以获取不复权、前复权、后复权等数据。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/request-history-kline.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import time
import logging
from futu import *
from datetime import datetime, timedelta
import config
config.setup_logging()
# 连接 MySQL
connection = pymysql.connect(**config.db_config)
# 复权类型,不复权
# selected_autype = AuType.NONE
# selected_table = "hs300_his_kline_none"
# 复权类型,后复权
selected_autype = AuType.HFQ
selected_table = "hs300_his_kline_hfq"
# 复权类型,默认为 AuType.QFQ ,即前复权
# selected_autype = AuType.QFQ
# selected_table = "hs300_qfq_his"
# 获取当前日期
end_date = datetime.now().strftime('%Y-%m-%d')
# 计算 start_date 为当前日期减去10年再加一天
start_date = (datetime.now() - timedelta(days=365*10-1)).strftime('%Y-%m-%d')
# 定义插入数据的函数
def insert_data(connection, data, s_table=selected_table):
try:
with connection.cursor() as cursor:
for index, row in data.iterrows():
sql = f"""
INSERT INTO {s_table} (code, name, time_key, open, close, high, low, pe_ratio, turnover_rate, volume, turnover, change_rate, last_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
open = VALUES(open),
close = VALUES(close),
high = VALUES(high),
low = VALUES(low),
pe_ratio = VALUES(pe_ratio),
turnover_rate = VALUES(turnover_rate),
volume = VALUES(volume),
turnover = VALUES(turnover),
change_rate = VALUES(change_rate),
last_close = VALUES(last_close)
"""
cursor.execute(sql, (
row['code'], row['name'], row['time_key'], row['open'], row['close'],
row['high'], row['low'], row['pe_ratio'], row['turnover_rate'],
row['volume'], row['turnover'], row['change_rate'], row['last_close']
))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting data: {e}")
print(f"Error occurred while inserting data: {e}")
# 获取 hs300 表中的所有股票代码
def get_hs300_codes():
with connection.cursor() as cursor:
cursor.execute("SELECT code FROM hs300 ")
return cursor.fetchall()
def stat_growth(s_autype = selected_autype, s_table = selected_table, s_start = start_date, s_end = end_date):
# 初始化 futu 行情连接
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
hs300_codes = get_hs300_codes()
for code_row in hs300_codes:
code = code_row[0] # 从数据库行中提取 code
# 获取历史 K 线数据,设置分页请求
ret, data, page_req_key = quote_ctx.request_history_kline(code, autype=s_autype, start=s_start, end=s_end, max_count=500)
if ret == RET_OK:
logging.info(f"成功获取 {code} 的第一页数据,共 {len(data)}")
print(f"成功获取 {code} 的第一页数据,共 {len(data)}")
# 插入第一页数据
insert_data(connection, data, s_table)
else:
logging.error(f"获取 {code} 的数据失败: {data}")
print(f"获取 {code} 的数据失败: {data}")
# 分页拉取
while page_req_key is not None:
time.sleep(1) # 休眠 5 秒
ret, data, page_req_key = quote_ctx.request_history_kline(code, autype=s_autype, start=s_start, end=s_end, max_count=500, page_req_key=page_req_key)
if ret == RET_OK:
logging.info(f"成功获取 {code} 的分页数据,共 {len(data)}")
print(f"成功获取 {code} 的分页数据,共 {len(data)}")
# 插入分页数据
insert_data(connection, data, s_table)
else:
logging.error(f"分页数据获取失败: {data}")
print(f"分页数据获取失败: {data}")
# 每次获取完一个股票的数据后,休眠 5 秒
time.sleep(2)
finally:
quote_ctx.close() # 关闭 futu 连接
connection.close() # 关闭 MySQL 连接
def print_help():
print("Usage: python script.py type end_date start_date")
print("type: qfq|hfq|none, default for none")
print('start_date: yyyy-mm-dd, default for end_date - 10 years ')
print('end_date: yyyy-mm-dd, default for current date ')
def main():
type = selected_autype
table = selected_table
start = start_date
end = end_date
args_num = len(sys.argv)
if args_num > 1 :
if sys.argv[1] == 'none':
type = AuType.NONE
table = "hs300_his_kline_none"
elif sys.argv[1] == 'qfq':
type = AuType.QFQ
table = "hs300_qfq_his"
elif sys.argv[1] == 'hfq':
type = AuType.HFQ
table = "hs300_his_kline_hfq"
else:
print_help()
exit(1)
if args_num > 2 :
end = sys.argv[2]
if args_num > 3 :
start = sys.argv[3]
print(f'fetching his kline... type: {type}, table: {table}, start: {start}, end: {end}\n\n')
stat_growth(type, table, start, end)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,78 @@
"""
Script Name:
Description: 从富途获取历史K线。通过不同变量可以获取不复权、前复权、后复权等数据。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/get-market-snapshot.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import pandas as pd
from futu import *
import logging
import config
# 设置日志
config.setup_logging()
logger = logging.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**config.db_config)
# 从sp300表中获取所有股票代码
def fetch_sp300_codes(connection):
query = "SELECT code FROM hs300"
return pd.read_sql(query, connection)
# 获取市场快照并保存到 CSV 文件
def get_market_snapshot_and_save_to_csv(stock_codes, output_file):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_market_snapshot(stock_codes)
if ret == RET_OK:
logger.info(f"Successfully fetched market snapshot for {len(stock_codes)} codes.")
# 将数据写入CSV文件
data.to_csv(output_file, index=False)
logger.info(f"Snapshot data saved to {output_file}")
else:
logger.error(f"Error fetching market snapshot: {data}")
quote_ctx.close()
# 主函数
def main():
try:
# 连接数据库
connection = connect_to_db()
# 从 sp300 表中获取所有的股票代码
sp300_codes_df = fetch_sp300_codes(connection)
# 提取股票代码列表
stock_codes = sp300_codes_df['code'].tolist()
if not stock_codes:
logger.warning("No stock codes found in sp300 table.")
return
# 获取市场快照并保存到 CSV 文件
output_file = "./result/market_snapshot.csv"
get_market_snapshot_and_save_to_csv(stock_codes, output_file)
except Exception as e:
logger.error(f"An error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

97
src/get_plat_list.py Normal file
View File

@ -0,0 +1,97 @@
"""
Script Name:
Description: 从富途获取板块列表。
参考地址: https://openapi.futunn.com/futu-api-doc/quote/get-plate-list.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import time
from futu import *
import pymysql
from datetime import datetime
import logging
import config
# 市场映射
market_mapping = {
'US': Market.US,
'HK': Market.HK,
'SZ': Market.SZ,
'SH': Market.SH
}
# 设置日志
config.setup_logging()
# 板块集合类型映射
plat_mapping = {
'INDUSTRY': Plate.INDUSTRY,
'ALL': Plate.ALL,
'CONCEPT': Plate.CONCEPT
}
# 建立 MySQL 连接
connection = pymysql.connect(**config.db_config)
# 定义插入或更新函数
def insert_or_update_data(connection, data, market, plat):
try:
with connection.cursor() as cursor:
for index, row in data.iterrows():
code = row['code']
plate_name = row['plate_name']
plate_id = row['plate_id']
up_date = datetime.now().strftime('%Y-%m-%d') # 当前日期
# MySQL 插入或更新语句
sql = """
INSERT INTO futu_plat_list (code, plate_name, plate_id, market, plat, up_date)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
plate_name = VALUES(plate_name),
plate_id = VALUES(plate_id),
market = VALUES(market),
up_date = VALUES(up_date)
"""
cursor.execute(sql, (code, plate_name, plate_id, market, plat, up_date))
# 提交事务
connection.commit()
except pymysql.MySQLError as e:
# 捕获 MySQL 错误并打印日志
print(f"Error occurred while inserting/updating data for market {market}, plat {plat}: {e}")
# 可根据需要记录到文件或其他日志工具
# 初始化行情连接
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
try:
# 双重循环:市场和板块类型
for market in market_mapping:
for plat_name, plat_enum in plat_mapping.items():
# 拉取数据
ret, data = quote_ctx.get_plate_list(market_mapping[market], plat_enum)
if ret == RET_OK:
row_count = len(data) # 获取行数
print(f"成功获取 {market} 市场的 {plat_name} 板块数据,共 {row_count}")
# 插入或更新数据到 MySQL
insert_or_update_data(connection, data, market, plat_name)
else:
print(f"获取 {market} 市场的 {plat_name} 板块数据失败: {data}")
# 每次循环后休眠10秒
time.sleep(10)
finally:
quote_ctx.close() # 关闭行情连接
connection.close() # 关闭 MySQL 连接

135
src/get_sp500_his_kline.py Normal file
View File

@ -0,0 +1,135 @@
"""
Script Name:
Description: 从yahoo获取美股股票的历史K线, 通过 auto_adjust 参数来控制是否获取前复权数据。默认为true, 如果设置为false, 那么结果中会自动带 adj Close.
参考地址: https://github.com/ranaroussi/yfinance
https://aroussi.com/post/python-yahoo-finance
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import yfinance as yf
import pymysql
import logging
import time
import sys
import os
from datetime import datetime
import config # 引入 config.py 中的配置
# 股票代码集合,如果属于这些股票,则使用 "max" 时间段
special_stock_codes = ('ABNB', 'CARR', 'CEG', 'GEHC', 'GEV', 'HUBB', 'KVUE', 'OTIS', 'PLTR', 'SOLV', 'VLTO')
# K线调整选项决定是否使用前复权价格
kline_adjust = True
# 根据 kline_adjust 决定使用的表名
table_name = 'sp500_qfq_his_202410' if kline_adjust else 'sp500_his_kline_none'
# 使用 config.py 中的日志配置
config.setup_logging()
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
#return pymysql.connect(**config.db_config)
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor # 确保使用字典形式的游标
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从MySQL读取sp500表中的股票代码和名称
def fetch_sp500_codes():
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
cursor.execute("SELECT code_inner, code_name FROM sp500")
codes = cursor.fetchall()
return codes
finally:
db.close()
# 插入数据到指定表名
def insert_stock_data_to_db(data, code, name):
try:
db = connect_to_db()
if db is None:
return
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {table_name} (time_key, open, high, low, close, adj_close, volume, dividends, stock_splits, code, name)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
open = VALUES(open),
high = VALUES(high),
low = VALUES(low),
close = VALUES(close),
adj_close = VALUES(adj_close),
volume = VALUES(volume),
dividends = VALUES(dividends),
stock_splits = VALUES(stock_splits)
"""
# auto_adjust=True: Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
# auto_adjust=False: Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
for index, row in data.iterrows():
time_key = index.strftime('%Y-%m-%d %H:%M:%S')
# 判断 row['Adj Close'] 是否存在,若不存在则使用 0
adj_close = row['Adj Close'] if 'Adj Close' in row else 0
values = (time_key, row['Open'], row['High'], row['Low'], row['Close'], adj_close, row['Volume'], row['Dividends'], row['Stock Splits'], code, name)
cursor.execute(insert_query, values)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error occurred while inserting data: {e}", exc_info=True)
finally:
if db:
db.close()
# 拉取股票的历史数据
def fetch_and_store_stock_data():
codes = fetch_sp500_codes()
for row in codes:
code_inner = row['code_inner']
code_name = row['code_name']
logger.info(f"Fetching data for {code_name} ({code_inner})...")
# 判断使用的时间段,特殊股票使用 max其他使用 10y
period = "max" if code_inner in special_stock_codes else "10y"
try:
stock = yf.Ticker(code_inner)
# 拉取股票历史数据,使用 kline_adjust 决定 auto_adjust 是否为 True
hist_data = stock.history(period=period, auto_adjust=kline_adjust)
if not hist_data.empty:
logger.info(f"Inserting data for {code_name} ({code_inner}) into {table_name}...")
insert_stock_data_to_db(hist_data, code_inner, code_name)
else:
logger.warning(f"No data found for {code_name} ({code_inner})")
# 每次请求完后休眠3秒
time.sleep(3)
except Exception as e:
logger.error(f"Error fetching data for {code_name} ({code_inner}): {e}", exc_info=True)
if __name__ == "__main__":
fetch_and_store_stock_data()

View File

@ -0,0 +1,9 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_market_snapshot(['SH.600000', 'HK.00700'])
if ret == RET_OK:
print(data)
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,62 @@
import pymysql
import pandas as pd
from futu import *
import logging
from config import db_config # 引用config.py中的数据库配置
# 设置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s')
logger = logging.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**db_config)
# 从sp300表中获取所有股票代码
def fetch_sp300_codes(connection):
query = "SELECT code FROM hs300"
return pd.read_sql(query, connection)
# 获取市场快照并保存到 CSV 文件
def get_market_snapshot_and_save_to_csv(stock_codes, output_file):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_market_snapshot(stock_codes)
if ret == RET_OK:
logger.info(f"Successfully fetched market snapshot for {len(stock_codes)} codes.")
# 将数据写入CSV文件
data.to_csv(output_file, index=False)
logger.info(f"Snapshot data saved to {output_file}")
else:
logger.error(f"Error fetching market snapshot: {data}")
quote_ctx.close()
# 主函数
def main():
try:
# 连接数据库
connection = connect_to_db()
# 从 sp300 表中获取所有的股票代码
sp300_codes_df = fetch_sp300_codes(connection)
# 提取股票代码列表
stock_codes = sp300_codes_df['code'].tolist()
if not stock_codes:
logger.warning("No stock codes found in sp300 table.")
return
# 获取市场快照并保存到 CSV 文件
output_file = "market_snapshot.csv"
get_market_snapshot_and_save_to_csv(stock_codes, output_file)
except Exception as e:
logger.error(f"An error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,18 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data, page_req_key = quote_ctx.request_history_kline('HK.00700', autype=AuType.NONE, start='2021-10-03', end='2021-11-08', max_count=50) # 每页5个请求第一页
if ret == RET_OK:
print(data)
print(data['code'][0]) # 取第一条的股票代码
print(data['close'].values.tolist()) # 第一页收盘价转为 list
else:
print('error:', data)
while page_req_key != None: # 请求后面的所有结果
print('*************************************')
ret, data, page_req_key = quote_ctx.request_history_kline('HK.00700', start='2024-04-11', end='2024-06-18', max_count=50, page_req_key=page_req_key) # 请求翻页后的数据
if ret == RET_OK:
print(data)
else:
print('error:', data)
print('All pages are finished!')
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,12 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_user_security("全部")
if ret == RET_OK:
print(data)
if data.shape[0] > 0: # 如果自选股列表不为空
print(data['code'][0]) # 取第一条的股票代码
print(data['code'].values.tolist()) # 转为 list
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,11 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_plate_list(Market.HK, Plate.CONCEPT)
if ret == RET_OK:
print(data)
print(data['plate_name'][0]) # 取第一条的板块名称
print(data['plate_name'].values.tolist()) # 转为 list
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,11 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_plate_stock('SH.LIST3000005')
if ret == RET_OK:
print(data)
#print(data['stock_name'][0]) # 取第一条的股票名称
#print(data['stock_name'].values.tolist()) # 转为 list
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

11
src/sample/get_rehab.py Normal file
View File

@ -0,0 +1,11 @@
from futu import *
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
ret, data = quote_ctx.get_rehab("US.AAPL")
if ret == RET_OK:
print(data)
print(data['ex_div_date'][0]) # 取第一条的除权除息日
print(data['ex_div_date'].values.tolist()) # 转为 list
else:
print('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽

View File

@ -0,0 +1,28 @@
import yfinance as yf
import pandas as pd
code = 'KDP'
# 获取AAPL的股票数据
stock = yf.Ticker(code)
# 获取过去十年的日K线数据前复权
hist_data = stock.history(period="10y", auto_adjust=True)
print (hist_data['Close'].resample('Y').last().pct_change)
# 打印数据前几行
print(hist_data.head())
# 保存到CSV文件
hist_data.to_csv(f"{code}_10year_data_adjust.csv")
# 获取过去十年的日K线数据不复权
hist_data = stock.history(period="10y", auto_adjust=False)
# 打印数据前几行
print(hist_data.head())
# 保存到CSV文件
hist_data.to_csv(f"{code}_10year_data.csv")

View File

@ -0,0 +1,17 @@
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
# 获取当前日期
end_date = datetime.today().strftime('%Y-%m-%d')
# 获取十年前的日期
start_date = (datetime.today() - timedelta(days=365*10)).strftime('%Y-%m-%d')
# 下载 AAPL 股票数据
data = yf.download('AAPL', start=start_date, end=end_date)
# 将数据保存为 CSV 文件
data.to_csv('AAPL.csv')
print(f"Downloaded AAPL stock data from {start_date} to {end_date} and saved to AAPL.csv")

218
src/stat_adjust_kline.py Normal file
View File

@ -0,0 +1,218 @@
"""
Script Name: stat_adjust_kline.py
Description: 根据从数据源获取的历史K线计算股票的前复权和后复权值.
注意:
本程序只适合沪深300的前复权和后复权计算使用的是富途提供的不复权数据和复权因子。
处理sp500有问题yahoo 提供的不复权收据 + 富途提供的复权因子计算前复权与yahoo提供的前复权数据差别很大。
尚未找到原因所以目前只能用hs300
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import sys
import time
import config
import argparse
# 配置不同市场的表名管理
tables_mapping = {
'sp500': {
'none_his_kline': 'sp500_his_kline_none',
'adjust_his_kline': 'sp500_ajust_kline_202410',
'rehab_table': 'futu_rehab'
},
'hs300': {
'none_his_kline': 'hs300_his_kline_none',
'adjust_his_kline': 'hs300_ajust_kline_202410',
'rehab_table': 'futu_rehab'
}
}
# 日志配置
config.setup_logging()
logger = logging.getLogger()
# MySQL数据库连接
def connect_to_db():
try:
return pymysql.connect(
**config.db_config,
cursorclass=pymysql.cursors.DictCursor
)
except pymysql.MySQLError as e:
logger.error(f"Error connecting to the database: {e}", exc_info=True)
return None
# 从指定市场表中读取code和code_name字段
def fetch_codes_from_market_table(market, debug=False):
db = connect_to_db()
if db is None:
logger.error("Failed to connect to database.")
return []
try:
with db.cursor() as cursor:
query = f"SELECT code, code_name FROM {market} "
if debug:
#query += " LIMIT 2"
query += " where code in ('GE', 'WTW')"
cursor.execute(query)
codes = cursor.fetchall()
return codes
finally:
db.close()
# 读取复权因子数据
def fetch_rehab_data(db, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT ex_div_date, forward_adj_factorA, forward_adj_factorB, backward_adj_factorA, backward_adj_factorB FROM futu_rehab WHERE code = %s ORDER BY ex_div_date DESC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching rehab data for {code}: {e}", exc_info=True)
return []
# 读取不复权的股票价格数据
def fetch_kline_none_data(db, table_name, code):
try:
with db.cursor() as cursor:
cursor.execute(f"SELECT code, time_key, open, close FROM {table_name} WHERE code = %s ORDER BY time_key ASC", (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logger.error(f"Error fetching kline none data for {code}: {e}", exc_info=True)
return []
# 插入前后复权价格到数据库
def insert_hfq_data(db, hfq_data, hfq_table):
try:
with db.cursor() as cursor:
insert_query = f"""
INSERT INTO {hfq_table} (code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
hfq_open = VALUES(hfq_open),
hfq_close = VALUES(hfq_close),
qfq_open = VALUES(qfq_open),
qfq_close = VALUES(qfq_close),
none_open = VALUES(none_open),
none_close = VALUES(none_close)
"""
cursor.executemany(insert_query, hfq_data)
db.commit()
except pymysql.MySQLError as e:
logger.error(f"Error inserting hfq data: {e}", exc_info=True)
# 计算后复权价格和前复权价格
def calculate_hfq_qfq_price(market, debug=False):
db = connect_to_db()
if db is None:
return
# 获取表名
table_names = tables_mapping[market]
none_his_kline_table = table_names['none_his_kline']
adjust_kline_table = table_names['adjust_his_kline']
# 获取股票代码
codes = fetch_codes_from_market_table(market, debug)
isSP500 = True if market == 'sp500' else False
for row in codes:
code = row['code']
name = row['code_name']
# 如果是 sp500 市场,拼接 'US.' + code
rehab_code = code
if isSP500:
rehab_code = 'US.' + code
logger.info(f"Processing {code} ({name})...")
# 获取复权因子数据,没有的话就直接使用不复权值
rehab_res = fetch_rehab_data(db, rehab_code)
if not rehab_res:
logger.warning(f"No rehab data found for {code}, use non close")
rehab_res = list()
# 反转复权因子行,为了计算前复权
rehab_res_asc = list(reversed(rehab_res))
# 获取不复权的价格数据
kline_none = fetch_kline_none_data(db, none_his_kline_table, code)
if not kline_none:
logger.warning(f"No kline none data found for {code}")
continue
hfq_data = []
# 遍历kline_none计算前后复权价格
for kline_row in kline_none:
none_open = kline_row['open']
none_close = kline_row['close']
time_key = kline_row['time_key']
# 将 time_key 转换为 date 格式
time_key_date = time_key.date()
# 计算后复权价格
hfq_open = none_open
hfq_close = none_close
tmp_close = none_close
tmp_open = none_open
for rehab_row in rehab_res:
# Yahoo Finance 提供的不复权数据,已经处理了拆股;所以对于富途复权因子表里的拆股数据,要忽略
if isSP500 and rehab_row['backward_adj_factorA'] != 1 :
continue
if rehab_row['ex_div_date'] <= time_key_date:
hfq_close = (tmp_close * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
hfq_open = (tmp_open * rehab_row['backward_adj_factorA']) + rehab_row['backward_adj_factorB']
tmp_close = hfq_close
tmp_open = hfq_open
# 计算前复权价格
qfq_close = none_close
qfq_open = none_open
tmp_close = none_close
tmp_open = none_open
for rehab_row in rehab_res_asc:
# Yahoo Finance 提供的不复权数据,已经处理了拆股;所以对于富途复权因子表里的拆股和合股数据,要忽略
if isSP500 and rehab_row['backward_adj_factorA'] != 1 :
continue
# 富途对美股的前复权价格计算,要忽略 forward_adj_factorB
# https://openapi.futunn.com/futu-api-doc/qa/quote.html
factorB = 0 if isSP500 else rehab_row['forward_adj_factorB']
if rehab_row['ex_div_date'] > time_key_date:
qfq_close = (tmp_close * rehab_row['forward_adj_factorA']) + factorB
qfq_open = (tmp_open * rehab_row['forward_adj_factorA']) + factorB
tmp_close = qfq_close
tmp_open = qfq_open
# 保存计算后的复权价格
hfq_data.append((code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close))
# 插入后复权价格数据
insert_hfq_data(db, hfq_data, adjust_kline_table)
logger.info(f"Inserted HFQ/QFQ data for {code} ({name})")
time.sleep(1)
db.close()
if __name__ == "__main__":
# 命令行参数处理
parser = argparse.ArgumentParser(description='Calculate HFQ and QFQ Prices for Market')
parser.add_argument('--market', type=str, default='hs300', help='Market to process (sp500 or hs300)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
args = parser.parse_args()
# 调用主函数
calculate_hfq_qfq_price(args.market, args.debug)

271
src/stat_growth.py Normal file
View File

@ -0,0 +1,271 @@
"""
Script Name:
Description: 获取沪深300成分股的最新股价, 并计算年内涨幅, 924以来的涨幅, 市盈率, 股息率等。
需要调用futu的获取快照接口。
https://openapi.futunn.com/futu-api-doc/quote/get-market-snapshot.html
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import csv
import os
import config
import time
from datetime import datetime
from futu import OpenQuoteContext, RET_OK # Futu API client
# 配置日志
config.setup_logging()
# 1. 获取 hs300 表数据
def get_hs300_data():
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
cursor.execute("SELECT code, code_name FROM hs300")
hs300_data = cursor.fetchall()
cursor.close()
conn.close()
return hs300_data
# 2. 批量获取市场快照
def get_market_snapshots(codes):
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111) # 替换为实际Futu API地址和端口
snapshot_results = []
batch_size = 350
for i in range(0, len(codes), batch_size):
code_batch = codes[i:i+batch_size]
ret, data = quote_ctx.get_market_snapshot(code_batch)
if ret == RET_OK:
snapshot_results.extend(data.to_dict('records'))
else:
logging.error(f"获取市场快照失败: {data}")
quote_ctx.close()
return snapshot_results
# 3. 插入或更新 futu_market_snapshot 表
def insert_or_update_snapshot(snapshot_data):
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor()
query = """
INSERT INTO futu_market_snapshot (
code, name, update_time, last_price, open_price, high_price, low_price, prev_close_price,
volume, turnover, turnover_rate, suspension, listing_date, equity_valid, issued_shares,
total_market_val, net_asset, net_profit, earning_per_share, outstanding_shares, net_asset_per_share,
circular_market_val, ey_ratio, pe_ratio, pb_ratio, pe_ttm_ratio, dividend_ttm, dividend_ratio_ttm,
dividend_lfy, dividend_lfy_ratio
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
last_price = VALUES(last_price), open_price = VALUES(open_price), high_price = VALUES(high_price),
low_price = VALUES(low_price), prev_close_price = VALUES(prev_close_price), volume = VALUES(volume),
turnover = VALUES(turnover), turnover_rate = VALUES(turnover_rate), suspension = VALUES(suspension),
issued_shares = VALUES(issued_shares), total_market_val = VALUES(total_market_val), net_asset = VALUES(net_asset),
net_profit = VALUES(net_profit), earning_per_share = VALUES(earning_per_share), outstanding_shares = VALUES(outstanding_shares),
net_asset_per_share = VALUES(net_asset_per_share), circular_market_val = VALUES(circular_market_val),
ey_ratio = VALUES(ey_ratio), pe_ratio = VALUES(pe_ratio), pb_ratio = VALUES(pb_ratio),
pe_ttm_ratio = VALUES(pe_ttm_ratio), dividend_ttm = VALUES(dividend_ttm), dividend_ratio_ttm = VALUES(dividend_ratio_ttm),
dividend_lfy = VALUES(dividend_lfy), dividend_lfy_ratio = VALUES(dividend_lfy_ratio)
"""
try:
for record in snapshot_data:
cursor.execute(query, (
record['code'], record['name'], record['update_time'], record['last_price'], record['open_price'],
record['high_price'], record['low_price'], record['prev_close_price'], record['volume'], record['turnover'],
record['turnover_rate'], record['suspension'], record['listing_date'], record['equity_valid'], record['issued_shares'],
record['total_market_val'], record['net_asset'], record['net_profit'], record['earning_per_share'], record['outstanding_shares'],
record['net_asset_per_share'], record['circular_market_val'], record['ey_ratio'], record['pe_ratio'], record['pb_ratio'],
record['pe_ttm_ratio'], record['dividend_ttm'], record['dividend_ratio_ttm'], record['dividend_lfy'], record['dividend_lfy_ratio']
))
conn.commit()
except Exception as e:
logging.error(f"插入或更新快照数据时出错: {e}")
conn.rollback()
finally:
cursor.close()
conn.close()
def process_snapshot_data():
hs300_data = get_hs300_data()
codes = [item['code'] for item in hs300_data]
# 分批拉取市场快照数据
snapshot_data = get_market_snapshots(codes)
# 插入或更新数据到数据库
insert_or_update_snapshot(snapshot_data)
logging.info(f"Successfully get market snapshots and write to db.")
def write_to_csv(file_path, fieldnames, data):
"""
Writes data to a CSV file.
:param file_path: Path to the CSV file.
:param fieldnames: A list of field names (headers) for the CSV file.
:param data: A list of dictionaries where each dictionary represents a row.
"""
try:
# Check if file exists to determine if we need to write headers
file_exists = os.path.isfile(file_path)
# Open file in append mode ('a' means append, newline='' is needed for correct CSV formatting)
with open(file_path, mode='w+', newline='', encoding='utf-8') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
# Write headers
writer.writeheader()
# Write the data rows
writer.writerows(data)
logging.info(f"Successfully wrote data to {file_path}")
except Exception as e:
logging.error(f"Error while writing to CSV file {file_path}: {str(e)}")
raise
def calculate_yield():
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
hs300_data = get_hs300_data()
results = []
for item in hs300_data:
code = item['code']
name = item['code_name']
# 1. 获取最近快照数据
cursor.execute("SELECT * FROM futu_market_snapshot WHERE code=%s ORDER BY update_time DESC LIMIT 1", (code,))
row1 = cursor.fetchone()
row1['close'] = row1['last_price']
row1['time_key'] = row1['update_time']
# 2. 获取 2024-01-01 的年初数据
cursor.execute("SELECT * FROM hs300_his_kline_none WHERE code=%s AND time_key<'2024-01-01' ORDER BY time_key DESC LIMIT 1", (code,))
row2 = cursor.fetchone()
# 3. 获取 2024-09-24 的初始数据
cursor.execute("SELECT * FROM hs300_his_kline_none WHERE code=%s AND time_key<'2024-09-24' ORDER BY time_key DESC LIMIT 1", (code,))
row3 = cursor.fetchone()
# 3.1 获取 2021-01-01 之后的历史数据中最大值(前复权)
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY close DESC LIMIT 1", (code,))
row4 = cursor.fetchone()
# 3.2 获取 2021-01-01 之后的历史数据中最小值(前复权)
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY close ASC LIMIT 1", (code,))
row5 = cursor.fetchone()
# 3.4 获取 2021-01-01 之后的历史数据中pe最大值
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY pe_ratio DESC LIMIT 1", (code,))
row6 = cursor.fetchone()
# 3.5 获取 2021-01-01 之后的历史数据中pe最小值
cursor.execute("SELECT * FROM hs300_qfq_his WHERE code=%s AND time_key>='2021-01-01' ORDER BY pe_ratio ASC LIMIT 1", (code,))
row7 = cursor.fetchone()
# 3.6 获取 2024-10-08 的收盘数据
cursor.execute("SELECT * FROM hs300_his_kline_none WHERE code=%s AND time_key<'2024-10-09' ORDER BY time_key DESC LIMIT 1", (code,))
row8 = cursor.fetchone()
# 4. 读取复权数据
cursor.execute("SELECT * FROM futu_rehab WHERE code=%s ORDER BY ex_div_date ASC", (code,))
rehab_res = cursor.fetchall()
# 5. 计算复权价格
for row in [row1, row2, row3, row8]:
qfq_close = row['close']
time_key_date = row['time_key'].date() # 将 datetime 转换为 date
for rehab in rehab_res:
if rehab['ex_div_date'] > time_key_date:
qfq_close = (qfq_close * rehab['forward_adj_factorA']) + rehab['forward_adj_factorB']
row['qfq_close'] = qfq_close
# 6. 计算收益率
year_yield = row1['qfq_close'] / row2['qfq_close'] - 1 if row1 and row2 else None
yield_0924 = row1['qfq_close'] / row3['qfq_close'] - 1 if row3 and row1 else None
yield_1008 = row1['qfq_close'] / row8['qfq_close'] - 1 if row8 and row1 else None
# 6.1 计算当前股价是 2021 年之后最高点及最低点的百分比
max_price_pct = row1['qfq_close'] / row4['close'] if row1 and row4 and row4['close'] !=0 else None
max_price_pe_pct = row1['pe_ttm_ratio'] / row4['pe_ratio'] if row1 and row4 and row4['pe_ratio'] !=0 else None
min_price_pct = row1['qfq_close'] / row5['close'] if row1 and row5 and row5['close'] !=0 else None
min_price_pe_pct = row1['pe_ttm_ratio'] / row5['pe_ratio'] if row1 and row5 and row5['pe_ratio'] !=0 else None
max_pe_pct = row1['pe_ttm_ratio'] / row6['pe_ratio'] if row1 and row6 and row6['pe_ratio'] !=0 else None
min_pe_pct = row1['pe_ttm_ratio'] / row7['pe_ratio'] if row1 and row7 and row7['pe_ratio'] !=0 else None
# 7. 收集结果
result = {
'code': code,
'name': name,
'year_begin_date': row2['time_key'].date(),
'year_begin_close': round(row2['qfq_close'], 4),
'0924_date': row3['time_key'].date(),
'0924_close': round(row3['qfq_close'],4),
'1008_date': row8['time_key'].date(),
'1008_close': round(row8['qfq_close'], 4),
'max_price_date': row4['time_key'].date(),
'max_price': round(row4['close'], 4),
'max_price_pe': row4['pe_ratio'],
'max_pe_date': row6['time_key'].date(),
'max_pe': row6['pe_ratio'],
'min_price_date': row5['time_key'].date(),
'min_price': round(row5['close'], 4),
'min_price_pe': row5['pe_ratio'],
'min_pe_date': row7['time_key'].date(),
'min_pe': row7['pe_ratio'],
'latest_date': row1['time_key'].date(),
'latest_close': round(row1['qfq_close'], 4),
'year_yield': round(year_yield, 4),
'yield_0924': round(yield_0924, 4),
'yield_1008': round(yield_1008, 4),
'total_market_val': row1.get('total_market_val', None),
'pe_ttm_ratio': row1.get('pe_ttm_ratio', None),
'dividend_ratio_ttm': row1.get('dividend_ratio_ttm', None),
'dividend_lfy_ratio': row1.get('dividend_lfy_ratio', None),
'max_price_pct': max_price_pct,
'max_price_pe_pct': max_price_pe_pct,
'min_price_pct': min_price_pct,
'min_price_pe_pct': min_price_pe_pct,
'max_pe_pct': max_pe_pct,
'min_pe_pct': min_pe_pct,
#'price_pe': round(max_price_pct - max_price_pe_pct,4)
}
results.append(result)
logging.info(f"{result}")
time.sleep(0.1)
# 写入CSV
# 指定字段名称
# 获取当前日期 格式化为 yyyymmdd
current_date = datetime.now()
date_string = current_date.strftime('%Y%m%d')
fieldnames = ['code', 'name', 'year_begin_date', 'year_begin_close', '0924_date', '0924_close', '1008_date', '1008_close',
'max_price_date', 'max_price', 'max_price_pe', 'max_pe_date', 'max_pe', 'min_price_date', 'min_price', 'min_price_pe', 'min_pe_date', 'min_pe',
'latest_date', 'latest_close', 'year_yield', 'yield_0924', 'yield_1008', 'total_market_val', 'pe_ttm_ratio', 'dividend_ratio_ttm', 'dividend_lfy_ratio',
'max_price_pct', 'max_price_pe_pct', 'min_price_pct', 'min_price_pe_pct', 'max_pe_pct', 'min_pe_pct'
]
write_to_csv(f'../result/stat_growth{date_string}.csv', fieldnames, results)
except Exception as e:
logging.error(f"计算收益率时出错: {e}")
finally:
cursor.close()
conn.close()
if __name__ == "__main__":
process_snapshot_data()
calculate_yield()

349
src/stat_growth_em.py Normal file
View File

@ -0,0 +1,349 @@
"""
Script Name:
Description: 获取沪深300成分股的最新股价, 并计算年内涨幅, 924以来的涨幅, 市盈率, 股息率等。
调用em历史数据接口。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import csv
import os
import config
import time
from datetime import datetime
from futu import OpenQuoteContext, RET_OK # Futu API client
from futu import *
import crawling.stock_hist_em as his_em
import argparse
# 配置日志
config.setup_logging()
current_date = datetime.now().strftime("%Y%m%d")
current_year = datetime.now().strftime("%Y")
res_dir = config.global_stock_data_dir
# 刷新代码列表,并返回
def flush_code_map():
code_id_map_em_df = his_em.code_id_map_em()
print(code_id_map_em_df)
return code_id_map_em_df
# 获取历史K线如果失败就重试
def fetch_with_retry(code: str, s_date, e_date, adjust: str = '', max_retries: int = 3) -> pd.DataFrame :
retries = 0
while retries < max_retries:
try:
# 调用 stock_zh_a_hist 获取历史数据
df = his_em.stock_zh_a_hist(
symbol=code,
period="daily",
start_date=s_date,
end_date=e_date,
adjust=adjust,
)
# 如果获取到的数据为空,记录日志并重试
if df.empty:
logging.info(f'{code} empty data. retry...')
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
else:
return df
except Exception as e:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
return pd.DataFrame()
# 获取所有市场的当年股价快照,带重试机制。
def fetch_snap_all(max_retries: int = 3) -> pd.DataFrame:
market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"}
result = pd.DataFrame()
for market_id, fs in market_fs.items():
retries = 0
while retries < max_retries:
try:
df = his_em.stock_zh_a_spot_em(fs)
# 如果获取到的数据为空,记录日志并重试
if df.empty:
logging.warning(f'{market_id} empty data. retry...')
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
else:
print(f'get {market_id} stock snapshot. stock count: {len(df)}')
result = pd.concat([result, df], ignore_index=True)
break
except Exception as e:
retries += 1
time.sleep(3) # 每次重试前休眠 3 秒
if retries >= max_retries:
logging.warning(f'{market_id} fetching error.')
return result
# 从数据库中读取指定指数的成分股
def load_index_codes():
conn = pymysql.connect(**config.db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
#沪深300
#cursor.execute("SELECT code, code_name FROM index_hs where index_code='000300' ")
#中证A500
#cursor.execute("SELECT code, code_name FROM index_hs where index_code='000510' ")
#沪深300和中证A500的并集去重
#cursor.execute("SELECT DISTINCT CONCAT('index-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ")
#沪深300和中证A500的合并不去重
cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ")
#沪深300、中证A500、中证A50、科创芯片、科创创业50不去重
#cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510', '930050', '000685', '931643') ")
hs300_data = cursor.fetchall()
#港股国企指数成分股、恒生科技指数成分股等
cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hk where index_code IN ('HSCEI', 'HSTECH') ")
hk_data = cursor.fetchall()
#美股中概股等
cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_us where index_code IN ('CN_US') ")
us_data = cursor.fetchall()
cursor.close()
conn.close()
return hs300_data + hk_data + us_data
# 读取富途自选股的指定分类股
def load_futu_all_codes():
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
stock_data = []
ret, data = quote_ctx.get_user_security("全部")
if ret == RET_OK:
if data.shape[0] > 0: # 如果自选股列表不为空
stock_data = [{'code': row['code'], 'code_name': row['name']} for _, row in data.iterrows() if row['stock_type'] == 'STOCK']
#stock_data = [{'code': row['code'], 'code_name': row['name']} for _, row in data.iterrows()]
else:
logging.error('error:', data)
quote_ctx.close() # 结束后记得关闭当条连接,防止连接条数用尽
return stock_data
# 获取特定的行
def get_specific_date_row(data, date):
"""获取特定日期的行"""
for row in data:
if row['日期'] == date:
return row
return None
# 获取股票数据,并统计收益率
def calculate_stock_statistics(market, code, code_name):
try:
# 获取当前日期(用于比较)
last_year = datetime.now().year - 1
last_year_str = str(last_year)
# 获取历史数据
data = fetch_with_retry(code, "20210101", current_date, 'qfq')
if data.empty:
logging.warning(f'{code}, {code_name} has no data. skipping...')
return None
# 获取当前日期的股价
current_row = data.loc[data['日期'].idxmax()]
# 默认行如果该股票没有年初股价0923股价1008股价等以此记录代替
defaut_row = data.loc[data['日期'].idxmin()]
# 获取年初股价,也就是上一年的最后一个交易日的收盘价
year_data = data[data['日期'].str.startswith(last_year_str)]
if year_data.empty:
logging.warning(f"{code}, {code_name} 未找到上一年的数据 ({last_year_str}), 以 {defaut_row['日期']} 的数据来代替")
year_begin_row = defaut_row
else:
year_begin_row = year_data.loc[year_data['日期'].idxmax()]
# 获取0923收盘价
try:
row_0923 = data[data['日期'] == '2024-09-23'].iloc[0]
except IndexError:
logging.warning(f"{code}, {code_name} 未找到0923的数据, 以 {defaut_row['日期']} 的数据来代替")
row_0923 = defaut_row
# 获取0930收盘价
try:
row_0930 = data[data['日期'] == '2024-09-30'].iloc[0]
except IndexError:
logging.warning(f"{code}, {code_name} 未找到0930的数据, 以 {defaut_row['日期']} 的数据来代替")
row_0930 = defaut_row
# 获取1008开盘价、收盘价
try:
row_1008 = data[data['日期'] == '2024-10-08'].iloc[0]
except IndexError:
logging.warning(f"{code}, {code_name} 未找到1008的数据, 以 {defaut_row['日期']} 的数据来代替")
row_1008 = defaut_row
# 获取2021年以来的最高价
max_close_row = data.loc[data['收盘'].idxmax()]
# 获取2021年以来的最低价
min_close_row = data.loc[data['收盘'].idxmin()]
# 获取年内的最高价、最低价
year_data = data[data['日期'].str.startswith(current_year)]
if year_data.empty:
logging.warning(f"{code}, {code_name} 未找到年内的数据, 以 {defaut_row['日期']} 的数据来代替")
year_min_row = defaut_row
year_max_row = defaut_row
else:
year_min_row = year_data.loc[year_data['收盘'].idxmin()]
year_max_row = year_data.loc[year_data['收盘'].idxmax()]
# 计算统计数据
try:
year_increase = (current_row['收盘'] / year_begin_row['收盘'] - 1)
growth_0923 = (current_row['收盘'] / row_0923['收盘'] - 1)
growth_0930 = (current_row['收盘'] / row_0930['收盘'] - 1)
growth_1008 = (current_row['收盘'] / row_1008['收盘'] - 1)
growth_1008_open = (current_row['收盘'] / row_1008['开盘'] - 1)
year_amplitude = (year_max_row['收盘'] / year_min_row['收盘'] - 1)
max_amplitude = (max_close_row['收盘'] / min_close_row['收盘'] - 1)
stock_recovery = (current_row['收盘'] / max_close_row['收盘'] - 1)
except ZeroDivisionError:
logging.error(f"股票 {code} 计算时遇到除零错误")
return None
# 组织结果
result = [
market,
code,
code_name,
current_row['日期'], current_row['收盘'],
year_begin_row['日期'], year_begin_row['收盘'],
row_0923['日期'], row_0923['收盘'] ,
row_0930['日期'], row_0930['收盘'] ,
row_1008['日期'], row_1008['开盘'] ,row_1008['收盘'] ,
max_close_row['日期'], max_close_row['收盘'],
min_close_row['日期'], min_close_row['收盘'],
year_max_row['日期'], year_max_row['收盘'],
year_min_row['日期'], year_min_row['收盘'],
year_increase,
growth_0923 if growth_0923 is not None else 'N/A',
growth_0930 if growth_0930 is not None else 'N/A',
growth_1008 if growth_1008 is not None else 'N/A',
growth_1008_open if growth_1008_open is not None else 'N/A',
year_amplitude,
max_amplitude,
stock_recovery
]
return result
except Exception as e:
logging.error(f"处理股票 {code} 时出错: {e}")
return None
# 写入到文件中
def write_to_csv(results, filename):
"""将所有结果写入CSV文件"""
try:
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
# 写入表头
writer.writerow([
"股市", "股票代码", "股票名称", "当前日期", "当前收盘", "年初日期", "年初收盘",
"0923日期", "0923收盘", "0930日期", "0930收盘", "1008日期", "1008开盘", "1008收盘",
"最高日期", "最高收盘", "最低日期", "最低收盘", "年内最高日期", "年内最高收盘", "年内最低日期", "年内最低收盘", "年内涨幅",
"相比0923收盘价涨幅", "相比0930收盘价涨幅", "相比1008收盘价涨幅", "相比1008开盘价涨幅",
"年内振幅", "最大振幅", "股价自最高点恢复", "市盈率TTM", "市净率", "总市值"
])
# 写入每行数据
for result in results:
writer.writerow(result)
except Exception as e:
logging.error(f"写入CSV文件时出错: {e}")
# 主函数,执行逻辑
def main(list, debug):
futu_codes = []
index_codes = []
if list == 'futu':
futu_codes = load_futu_all_codes()
elif list == 'all':
futu_codes = load_futu_all_codes()
index_codes = load_index_codes()
else:
index_codes = load_index_codes()
codes = futu_codes + index_codes
all_results = []
# 获取快照数据,并保存到文件
snap_data = fetch_snap_all()
if snap_data.empty:
logging.error(f"fetching snapshot data error!")
return
file_name = f'{res_dir}/snapshot_em_{current_date}.csv'
snap_data.to_csv(file_name, index=False, encoding='utf-8')
logging.info(f"市场快照数据已经写入 CSV 文件 {file_name}\n\n")
for item in codes:
code = item['code']
code_name = item['code_name']
# 清理股票代码中的前缀
try:
market, clean_code = code.split(".")
except ValueError:
logging.error(f"wrong format code: {code}")
logging.info(f"正在处理股票 {market}.{clean_code}, {code_name}...")
result = calculate_stock_statistics(market, clean_code, code_name)
if result:
match = snap_data.loc[snap_data['代码'] == clean_code]
if not match.empty: # 如果找到了匹配项
result.append(match['市盈率TTM'].iloc[0])
result.append(match['市净率'].iloc[0])
result.append(match['总市值'].iloc[0])
else:
logging.warning(f'{market}.{clean_code} has no snapshot data.')
all_results.append(result)
if debug:
break
if all_results:
file_name = f'{res_dir}/stock_statistics_{list}_{current_date}'
if debug:
file_name = f'{file_name}_debug'
file_name = f'{file_name}.csv'
write_to_csv(all_results, f'{file_name}')
logging.info(f"统计结果已写入 CSV 文件 {file_name}")
else:
logging.warning("没有可写入的统计数据")
if __name__ == "__main__":
# 命令行参数处理
parser = argparse.ArgumentParser(description='计算指定股票的区间收益率')
parser.add_argument('--list', type=str, default='futu', help='Stocklist to process (futu , index, all)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode (limit records)')
args = parser.parse_args()
# 调用主函数
#flush_code_map()
main(args.list, args.debug)

View File

@ -0,0 +1,125 @@
"""
Script Name:
Description: 根据yahoo提供的不复权数据和分红及拆股数据来计算前复权和后复权数据。
注意:
结果对不上!
按照yahoo规则不复权数据已经处理了拆股所以只要把分红加上去就行但处理结果与它返回的前复权数据仍然对不对上
有些比如AAPL差不多可以对得上但对于KDP等差异甚大找不到原因。。
所以,这个程序暂时无法使用。。。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import pandas as pd
import logging
import os
import time
import config
# 设置日志
config.setup_logging()
logger = logging.getLogger()
# 数据库连接函数
def connect_to_db():
return pymysql.connect(**config.db_config)
# 读取 sp500 表中的所有行,获取 code 和 name
def fetch_sp500_codes(connection):
query = "SELECT code, code_name as name FROM sp500 "
return pd.read_sql(query, connection)
# 读取 sp500_his_kline_none 表中的数据并按 time_key 降序排列
def fetch_sp500_his_kline_none(connection, code):
query = f"SELECT * FROM sp500_his_kline_none WHERE code = '{code}' ORDER BY time_key DESC"
return pd.read_sql(query, connection)
# 将计算结果插入到 sp500_ajust_kline_202410 表中
def insert_adjusted_kline_data(connection, data):
try:
with connection.cursor() as cursor:
insert_query = """
INSERT INTO sp500_ajust_kline_202410 (code, name, time_key, hfq_open, hfq_close, qfq_open, qfq_close, none_open, none_close)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
hfq_open = VALUES(hfq_open),
hfq_close = VALUES(hfq_close),
qfq_open = VALUES(qfq_open),
qfq_close = VALUES(qfq_close),
none_open = VALUES(none_open),
none_close = VALUES(none_close)
"""
cursor.executemany(insert_query, data)
connection.commit()
except Exception as e:
logger.error(f"Error inserting data: {e}")
# 计算前复权和后复权的价格,并插入到 sp500_ajust_kline_202410
def process_and_insert_adjusted_kline(connection, code, name, result_none):
dividends_qfq = 0
dividends_hfq = 0
dividends_total = result_none['dividends'].sum()
adjusted_data = []
for index, row in result_none.iterrows():
# 计算前复权和后复权的开盘价和收盘价
qfq_close = row['close'] - dividends_qfq
qfq_open = row['open'] - dividends_qfq
hfq_close = row['close'] + dividends_hfq
hfq_open = row['open'] + dividends_hfq
adjusted_data.append((
row['code'], row['name'], row['time_key'],
hfq_open, hfq_close, qfq_open, qfq_close,
row['open'], row['close']
))
dividends_qfq += row['dividends']
dividends_hfq = dividends_total - dividends_qfq
# 插入到 sp500_ajust_kline_202410 表中
insert_adjusted_kline_data(connection, adjusted_data)
logger.info(f"Successfully processed and inserted data for code {code}")
# 主函数
def main():
try:
connection = connect_to_db()
# 读取 sp500 表中的所有行,得到 code 和 name 字段
sp500_codes = fetch_sp500_codes(connection)
for index, row in sp500_codes.iterrows():
code = row['code']
name = row['name']
logger.info(f"Processing data for code: {code}, name: {name}")
# 读取 sp500_his_kline_none 表中的数据并按 time_key 降序排列
result_none = fetch_sp500_his_kline_none(connection, code)
if result_none.empty:
logger.warning(f"No data found for code: {code}")
continue
# 处理并插入调整后的 K 线数据
process_and_insert_adjusted_kline(connection, code, name, result_none)
except Exception as e:
logger.error(f"Error occurred: {e}")
finally:
if connection:
connection.close()
if __name__ == "__main__":
main()

280
src/stat_yield_rate.py Normal file
View File

@ -0,0 +1,280 @@
"""
Script Name:
Description: 统计过去十年来hs300 和 sp500成分股的投资胜率年化回报率等数据。
Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0
Modification History:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
- YYYY-MM-DD [Your Name]:
"""
import pymysql
import logging
import sys
import time
import numpy as np
from datetime import datetime
import argparse
import config
# 配置日志
config.setup_logging()
# 设置默认值
default_min_stat_years = 5
allowed_min_stat_years = [3, 5] # 允许的年份统计范围
default_debug = False
default_market_key = "hs300"
allowed_market_keys = ['hs300', 'sp500']
# 配置命令行参数
def parse_arguments():
parser = argparse.ArgumentParser(description="Run stock yield statistics.")
# 添加 min_stat_years 参数
parser.add_argument('--market', type=str, choices=allowed_market_keys,
help=f'Set market key for statistics (allowed: {allowed_market_keys}). Default is {default_market_key}.')
# 添加 min_stat_years 参数
parser.add_argument('--min_stat_years', type=int, choices=allowed_min_stat_years,
help=f'Set minimum years for statistics (allowed: {allowed_min_stat_years}). Default is {default_min_stat_years}.')
# 添加 debug 参数
parser.add_argument('--debug', action='store_true', help='Enable debug mode (default: False).')
args = parser.parse_args()
# 如果没有提供 --min_stat_years使用默认值
min_stat_years = args.min_stat_years if args.min_stat_years else default_min_stat_years
debug = args.debug if args.debug else default_debug
market_key = args.market if args.market else default_market_key
return min_stat_years, debug, market_key
# 获取用户输入的参数
min_stat_years, debug, market_key = parse_arguments()
# 传入表名的映射
table_mapping = {
"hs300": {
"codes": "hs300",
"his_data": "hs300_ajust_kline_202410",
"stat_res": f"hs300_{min_stat_years}years_yield_stats_2410"
},
"sp500": {
"codes": "sp500",
"his_data": "sp500_ajust_kline_202410",
"stat_res": f"sp500_{min_stat_years}years_yield_stats_2410"
}
}
# 连接 MySQL
connection = pymysql.connect(**config.db_config)
# 获取股票代码
def get_codes(table_mapping, index_name):
try:
with connection.cursor() as cursor:
if debug:
# 如果 debug 模式开启,查询一条数据
sql = f"SELECT code, code_name FROM {table_mapping[index_name]['codes']} LIMIT 1"
else:
# 否则查询所有数据
sql = f"SELECT code, code_name FROM {table_mapping[index_name]['codes']} "
cursor.execute(sql)
return cursor.fetchall()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while reading {table_mapping[index_name]['codes']} : {e}", exc_info=True)
# 获取历史行情数据
def get_historical_data(table_mapping, index_name, code):
try:
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
sql = f"SELECT * FROM {table_mapping[index_name]['his_data']} WHERE code = %s ORDER BY time_key"
cursor.execute(sql, (code,))
return cursor.fetchall()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while reading {table_mapping[index_name]['his_data']}: {e}", exc_info=True)
# 插入统计结果
def insert_yield_stats(connection, table_mapping, index_name, code, name, diff_year, max_entry, min_entry, avg_yield, median_yield, win_rate, annual_max_entry, annual_min_entry, annual_avg_yield, annual_median_yield, max_deficit_entry, annual_yield_variance):
try:
with connection.cursor() as cursor:
sql = f"""
INSERT INTO {table_mapping[index_name]['stat_res']}
(code, name, year_diff, max_yield_rate, max_yield_rate_start, max_yield_rate_end,
min_yield_rate, min_yield_rate_start, min_yield_rate_end, avg_yield_rate,
median_yield_rate, win_rate, annual_max_yield_rate, annual_max_yield_rate_start,
annual_max_yield_rate_end, annual_min_yield_rate, annual_min_yield_rate_start,
annual_min_yield_rate_end, annual_avg_yield_rate, annual_median_yield_rate,
max_deficit_days, max_deficit_start, max_deficit_end, annual_yield_variance)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
max_yield_rate = VALUES(max_yield_rate),
max_yield_rate_start = VALUES(max_yield_rate_start),
max_yield_rate_end = VALUES(max_yield_rate_end),
min_yield_rate = VALUES(min_yield_rate),
min_yield_rate_start = VALUES(min_yield_rate_start),
min_yield_rate_end = VALUES(min_yield_rate_end),
avg_yield_rate = VALUES(avg_yield_rate),
median_yield_rate = VALUES(median_yield_rate),
win_rate = VALUES(win_rate),
annual_max_yield_rate = VALUES(annual_max_yield_rate),
annual_max_yield_rate_start = VALUES(annual_max_yield_rate_start),
annual_max_yield_rate_end = VALUES(annual_max_yield_rate_end),
annual_min_yield_rate = VALUES(annual_min_yield_rate),
annual_min_yield_rate_start = VALUES(annual_min_yield_rate_start),
annual_min_yield_rate_end = VALUES(annual_min_yield_rate_end),
annual_avg_yield_rate = VALUES(annual_avg_yield_rate),
annual_median_yield_rate = VALUES(annual_median_yield_rate),
max_deficit_days = VALUES(max_deficit_days),
max_deficit_start = VALUES(max_deficit_start),
max_deficit_end = VALUES(max_deficit_end),
annual_yield_variance = VALUES(annual_yield_variance)
"""
cursor.execute(sql, (
code, name, int(diff_year),
float(max_entry['yield_rate']), max_entry['start_time_key'], max_entry['end_time_key'],
float(min_entry['yield_rate']), min_entry['start_time_key'], min_entry['end_time_key'],
float(avg_yield), float(median_yield), win_rate,
float(annual_max_entry['annual_yield_rate']), annual_max_entry['start_time_key'], annual_max_entry['end_time_key'],
float(annual_min_entry['annual_yield_rate']), annual_min_entry['start_time_key'], annual_min_entry['end_time_key'],
float(annual_avg_yield), float(annual_median_yield),
max_deficit_entry['max_deficit_days'], max_deficit_entry['max_deficit_start'], max_deficit_entry['max_deficit_end'],
annual_yield_variance
))
connection.commit()
except pymysql.MySQLError as e:
logging.error(f"Error occurred while inserting yield stats for code {code}: {e}", exc_info=True)
# 计算收益率并计算最长连续亏损
def calculate_yield_rate(data):
results = {}
all_entries = []
num_rows = len(data)
for i in range(num_rows):
for j in range(i + 1, num_rows):
try:
start_time_key = data[i]['time_key']
end_time_key = data[j]['time_key']
time_diff = int((end_time_key - start_time_key).days / 365.0)
if time_diff < min_stat_years:
continue
# 使用后复权数据避免前复权中出现负数0从而影响统计。
close_start = data[i]['hfq_close']
close_end = data[j]['hfq_close']
yield_rate = (close_end / close_start) - 1
annual_yield_rate = yield_rate * 365 / (end_time_key - start_time_key).days
# 找到从 data[i]['close'] 到 data[j]['close'] 之间的最大连续亏损
max_deficit_days = 0
max_deficit_start = start_time_key
max_deficit_end = end_time_key
for k in range(i + 1, j):
if data[k]['hfq_close'] > close_start:
deficit_days = (data[k]['time_key'] - start_time_key).days
max_deficit_days = deficit_days
max_deficit_end = data[k]['time_key']
break
# 如果没有找到符合条件的亏损结束点,则认为 j 是亏损结束点
if max_deficit_days == 0:
max_deficit_days = (end_time_key - start_time_key).days
max_deficit_end = end_time_key
entry = {
'diff_year': time_diff,
'start_time_key': start_time_key,
'end_time_key': end_time_key,
'yield_rate': yield_rate,
'annual_yield_rate': annual_yield_rate,
'max_deficit_days': max_deficit_days,
'max_deficit_start': max_deficit_start,
'max_deficit_end': max_deficit_end
}
all_entries.append(entry)
if time_diff not in results:
results[time_diff] = []
results[time_diff].append(entry)
except ZeroDivisionError:
logging.warning(f"Zero division error for code {data[i]['code']}")
except Exception as e:
logging.error(f"Error occurred while calculating yield rate: {e}", exc_info=True)
# 将全局最大亏损信息加入到汇总部分
results[10000] = all_entries # 汇总
return results
# 统计结果并输出
def compute_statistics(connection, table_mapping, index_name, code, name, results):
for diff_year, entries in results.items():
yield_rates = [entry['yield_rate'] for entry in entries]
annual_yield_rates = [entry['annual_yield_rate'] for entry in entries]
if yield_rates:
max_yield = max(yield_rates)
min_yield = min(yield_rates)
avg_yield = np.mean(yield_rates)
median_yield = np.median(yield_rates)
max_entry = next(entry for entry in entries if entry['yield_rate'] == max_yield)
min_entry = next(entry for entry in entries if entry['yield_rate'] == min_yield)
# 年化收益率统计
annual_max_yield = max(annual_yield_rates)
annual_min_yield = min(annual_yield_rates)
annual_avg_yield = np.mean(annual_yield_rates)
annual_median_yield = np.median(annual_yield_rates)
annual_max_entry = next(entry for entry in entries if entry['annual_yield_rate'] == annual_max_yield)
annual_min_entry = next(entry for entry in entries if entry['annual_yield_rate'] == annual_min_yield)
# 计算 win_rate
win_rate = len([r for r in yield_rates if r > 0]) / len(yield_rates)
# 计算年化收益率方差
annual_yield_variance = np.var(annual_yield_rates)
# 处理最大连续亏损
max_deficit_entry = max(entries, key=lambda x: x['max_deficit_days'])
# 插入数据库
insert_yield_stats(connection, table_mapping, index_name, code, name, diff_year,
max_entry, min_entry, avg_yield, median_yield, win_rate,
annual_max_entry, annual_min_entry, annual_avg_yield, annual_median_yield,
max_deficit_entry, annual_yield_variance)
# 主函数
def main(index_name):
try:
codes = get_codes(table_mapping, index_name)
for code_row in codes:
code, name = code_row[0], code_row[1]
logging.info(f"开始处理 {code} ({name}) 的数据")
data = get_historical_data(table_mapping, index_name, code)
if not data:
logging.warning(f"未找到 {code} 的历史数据")
continue
results = calculate_yield_rate(data)
compute_statistics(connection, table_mapping, index_name, code, name, results)
logging.info(f"完成 {code} 的处理")
time.sleep(1)
except Exception as e:
logging.error(f"处理过程中出现错误: {e}", exc_info=True)
finally:
connection.close()
if __name__ == "__main__":
main(market_key)

145
src/test_hs300_quant.py Normal file
View File

@ -0,0 +1,145 @@
import pandas as pd
import pymysql
import backtrader as bt
import config
# 从 MySQL 提取数据
def fetch_data_from_mysql():
connection = pymysql.connect(**config.db_config)
# ['datetime', 'open', 'high', 'low', 'close', 'volume', 'openinterest']]
#query = "SELECT code, time_key, qfq_close as close FROM hs300_ajust_kline_202410 where time_key>='2023-01-01 00:00:00' "
query = "SELECT code, time_key, open, high, low, close, volume FROM hs300_his_kline_none where time_key>='2023-01-01 00:00:00' "
data = pd.read_sql(query, connection)
data['time_key'] = pd.to_datetime(data['time_key'], errors='coerce') # 确保为datetime格式
data['openinterest'] = 0 # 添加 openinterest 列并赋值为 0
connection.close()
return data
# 格式化数据为Backtrader兼容的格式
data = fetch_data_from_mysql()
data_by_code = {code: df for code, df in data.groupby('code')}
class BestPortfolioStrategy(bt.Strategy):
params = dict(
max_stocks=30,
rebalance_monthly=True,
commission=0.0003,
slippage=0.005,
maperiod = 15,
printlog = False
)
def __init__(self):
self.stocks = [] # 持有的股票
self.rebalance_date = None
def next(self):
# 判断是否是月初
if self.rebalance_date and self.data.datetime.date(0) < self.rebalance_date.date():
return
print(f"rebalance date: {self.rebalance_date}")
# 设置下次调仓日
self.rebalance_date = self.data.datetime.date(0).replace(day=1) + pd.DateOffset(months=1)
# 选股逻辑计算过去一段时间的收益率选取最高的30只股票
returns = {}
for data in self.datas:
returns[data._name] = (data.close[0] / data.close[-20]) - 1 # 上个月的收益率
# 按收益率排序并选择最佳组合
sorted_stocks = sorted(returns, key=returns.get, reverse=True)[:self.params.max_stocks]
print(sorted_stocks)
# 调仓:卖出非选中股票,买入新选股票
self.rebalance(sorted_stocks)
def rebalance(self, selected_stocks):
for stock in self.stocks:
if stock not in selected_stocks:
self.close(self.getdatabyname(stock))
for stock in selected_stocks:
if stock not in self.stocks:
self.buy(self.getdatabyname(stock), size=100)
self.stocks = selected_stocks
# 交易记录日志(可省略,默认不输出结果)
def log(self, txt, dt=None, doprint=False):
if self.params.printlog or doprint:
dt = dt or self.datas[0].datetime.date(0)
print(f'{dt.isoformat()},{txt}')
def notify_order(self, order):
# 未被处理的订单
if order.status in [order.Submitted, order.Accepted]:
return
# 已经处理的订单
if order.status in [order.Completed, order.Canceled, order.Margin]:
if order.isbuy():
self.log(
'BUY EXECUTED, ref:%.0fPrice: %.2f, Cost: %.2f, Comm %.2f, Size: %.2f, Stock: %s' %
(order.ref, # 订单编号
order.executed.price, # 成交价
order.executed.value, # 成交额
order.executed.comm, # 佣金
order.executed.size, # 成交量
order.data._name)) # 股票名称
else: # Sell
self.log('SELL EXECUTED, ref:%.0f, Price: %.2f, Cost: %.2f, Comm %.2f, Size: %.2f, Stock: %s' %
(order.ref,
order.executed.price,
order.executed.value,
order.executed.comm,
order.executed.size,
order.data._name))
cerebro = bt.Cerebro()
cerebro.broker.setcash(1_000_000) # 初始资金 100万
cerebro.broker.setcommission(commission=0.0003) # 设置交易费率 万分之三
cerebro.broker.set_slippage_perc(0.005) # 设置滑点 0.005
# 加载数据
for code, df in data_by_code.items():
# 删除缺失值
df = df.dropna(subset=['time_key'])
# 设置DataFrame为Backtrader的数据格式
data_feed = bt.feeds.PandasData(
dataname=df,
datetime='time_key',
openinterest=None # 如果没有openinterest列则为None
)
#bt_data = bt.feeds.PandasData(dataname=df)
cerebro.adddata(data_feed, name=code)
cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='pnl') # 返回收益率时序数据
cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='_AnnualReturn') # 年化收益率
cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='_SharpeRatio') # 夏普比率
cerebro.addanalyzer(bt.analyzers.DrawDown, _name='_DrawDown') # 回撤
# 加载策略
cerebro.addstrategy(BestPortfolioStrategy, printlog=True)
# 运行回测
results = cerebro.run()
# 获取分析结果
results = cerebro.run()
strat = results[0]
# 返回日度收益率序列
daily_return = pd.Series(strat.analyzers.pnl.get_analysis())
# 打印评价指标
print("--------------- AnnualReturn -----------------")
print(strat.analyzers._AnnualReturn.get_analysis())
print("--------------- SharpeRatio -----------------")
print(strat.analyzers._SharpeRatio.get_analysis())
print("--------------- DrawDown -----------------")
print(strat.analyzers._DrawDown.get_analysis())

83
src/test_quant.py Normal file
View File

@ -0,0 +1,83 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as sco
import yfinance as yf
# 下载资产历史数据
assets = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA'] # 资产代码
data = yf.download(assets, start="2020-01-01", end="2023-01-01")['Adj Close']
print(data)
# 计算每日收益率
returns = data.pct_change().dropna()
print(returns)
# 计算年化预期收益和协方差矩阵
annual_returns = returns.mean() * 252
cov_matrix = returns.cov() * 252
print(annual_returns)
print(cov_matrix)
# 定义组合的预期收益和风险(方差)
def portfolio_performance(weights, mean_returns, cov_matrix):
returns = np.sum(weights * mean_returns)
volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
return returns, volatility
# 定义目标函数:最小化风险(方差)
def minimize_volatility(weights, mean_returns, cov_matrix):
return portfolio_performance(weights, mean_returns, cov_matrix)[1]
# 定义约束条件
def constraint_sum(weights):
return np.sum(weights) - 1
# 初始化权重和边界每个资产的权重在0-1之间
num_assets = len(assets)
bounds = tuple((0, 1) for asset in range(num_assets))
initial_weights = num_assets * [1. / num_assets] # 初始等权重
# 定义约束条件总权重为1
constraints = ({'type': 'eq', 'fun': constraint_sum})
# 优化组合,使得组合的方差最小
opt_result = sco.minimize(minimize_volatility, initial_weights, args=(annual_returns, cov_matrix),
method='SLSQP', bounds=bounds, constraints=constraints)
# 提取最优权重
optimal_weights = opt_result.x
# 计算最优组合的收益和风险
optimal_return, optimal_volatility = portfolio_performance(optimal_weights, annual_returns, cov_matrix)
# 输出最优组合结果
print("最优资产组合权重:")
for i, asset in enumerate(assets):
print(f"{asset}: {optimal_weights[i]:.2%}")
print(f"\n最优组合的年化预期收益: {optimal_return:.2%}")
print(f"最优组合的年化风险(标准差): {optimal_volatility:.2%}")
# 可视化有效前沿
def plot_efficient_frontier(mean_returns, cov_matrix, num_portfolios=10000):
results = np.zeros((3, num_portfolios))
for i in range(num_portfolios):
weights = np.random.random(num_assets)
weights /= np.sum(weights)
portfolio_return, portfolio_volatility = portfolio_performance(weights, mean_returns, cov_matrix)
results[0, i] = portfolio_return
results[1, i] = portfolio_volatility
results[2, i] = results[0, i] / results[1, i] # 计算夏普比率
plt.figure(figsize=(10, 6))
plt.scatter(results[1, :], results[0, :], c=results[2, :], cmap='viridis')
plt.colorbar(label='Sharpe Ratio')
plt.scatter(optimal_volatility, optimal_return, c='red', marker='*', s=200) # 最优组合
plt.title('Efficient Frontier')
plt.xlabel('Volatility (Risk)')
plt.ylabel('Return')
plt.show()
# 绘制有效前沿
plot_efficient_frontier(annual_returns, cov_matrix)