modify scripts

This commit is contained in:
oscarz
2025-03-22 10:01:30 +08:00
parent 564c7bd442
commit 0ce1ff3657
5 changed files with 1540 additions and 0 deletions

129
src/crawler/bak_em/codes.py Normal file
View File

@ -0,0 +1,129 @@
"""
Date: 2022/6/19 15:26
Desc: 东方财富网-行情首页-沪深京 A 股
"""
import requests
import pandas as pd
import time
import json
from functools import lru_cache
em_market_config = {
'china_all' : { # 全部A股
'fs' : "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'hk_all' : {
'fs' : "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'us_all' : {
'fs' : "m:105,m:106,m:107",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'us_china': {
'fs' : "b:MK0201",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'hk_connect': {
'fs' : "b:DLMK0146,b:DLMK0144",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'hk_china_corps': {
'fs' : "b:DLMK0112",
'cb' : 'jQuery37103053011545475828_1742564157141'
}
}
# 配置部分
config_template = {
'url': 'https://push2.eastmoney.com/api/qt/clist/get',
'params': {
'np': 1,
'fltt': 1,
'invt': 2,
'cb': 'jQuery37103053011545475828_1742564157141',
'fs': 'm:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048',
'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23',
'fid': 'f3',
'pn': 1,
'pz': 100,
'po': 1,
'dect': 1,
'ut': 'fa5fd1943c7b386f172d6893dbfba10b',
'_': int(time.time() * 1000)
},
'headers': {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Referer': 'https://quote.eastmoney.com/center/gridlist.html'
},
'max_retries': 3,
'retry_delay': 5
}
# 获取数据,带重试,并且对结果进行判断
def fetch_data(config):
retries = 0
while retries < config['max_retries']:
try:
response = requests.get(config['url'], params=config['params'], headers=config['headers'])
response.raise_for_status()
# 验证返回内容
if config['params']['cb'] not in response.text:
raise ValueError("Callback not found in response")
# 提取 JSON 数据
json_data = response.text.split(config['params']['cb'] + '(')[-1].rstrip(');')
data = json.loads(json_data)
if 'data' not in data or 'diff' not in data['data']:
raise ValueError("Invalid data format")
return data['data']
except (requests.RequestException, ValueError) as e:
print(f"Error fetching data: {e}")
retries += 1
time.sleep(config['retry_delay'])
return None
# 拉取代码
def get_market_codes(fs, cb):
# 示例:获取前 3 页的数据
max_pages = 100000
page = 1
codes = []
while page <= max_pages:
while True:
config = config_template
config['params']['pn'] = page
config['params']['cb'] = cb
config['params']['fs'] = fs
config['params']['fields'] = 'f12,f14'
data = fetch_data(config)
if data:
break
if page == 1:
total = data.get('total', 1000000)
pz = int(config['params']['pz'])
max_pages = (int(total) + pz - 1) // pz
page += 1
for row in data.get('diff', []):
code = row['f12']
name = row['f14']
codes.append({'code': code, 'name': name})
return codes
if __name__ == "__main__":
config = em_market_config['hk_connect']
all_data = []
data = get_market_codes(config['fs'], config['cb'])
if data:
all_data.extend(data)
print(json.dumps(all_data, indent=4, ensure_ascii=False))
print(f'total codes: {len(all_data)}')

View File

@ -0,0 +1,523 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/6/19 15:26
Desc: 东方财富网-行情首页-沪深京 A 股
"""
import requests
import pandas as pd
import time
from functools import lru_cache
def fetch_with_retries_em(url, params, max_retries=3, delay=2):
"""带重试机制的 GET 请求"""
for attempt in range(max_retries):
try:
response = requests.get(url, params=params, timeout=5)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"请求失败,第 {attempt + 1} 次重试: {e}")
time.sleep(delay)
return None
def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=100) -> pd.DataFrame:
"""
东方财富网-沪深京 A 股-实时行情
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
"""
url = "http://82.push2.eastmoney.com/api/qt/clist/get"
pn = 1 # 初始页数
pn_max = 10000 # 设定初始最大页数
all_data = []
while pn <= pn_max:
params = {
"pn": str(pn),
"pz": str(pz),
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": fs,
"fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
"_": "1623833739532",
}
data_json = fetch_with_retries_em(url, params)
if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
break
diff_data = data_json["data"]["diff"]
if not diff_data:
break
all_data.extend(diff_data)
# 获取 total 数据来更新 pn_max
if pn == 1:
pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz
print(f'total pages: {pn_max}, total data lines: {data_json["data"].get("total", 0)}, curr lines: {len(diff_data)}, page size: {pz}')
pn += 1
time.sleep(0.5) # 防止请求过快
if not all_data:
return pd.DataFrame()
temp_df = pd.DataFrame(all_data)
column_map = {
"f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率",
"f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低",
"f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅",
"f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份",
"f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润",
"f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产",
"f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期"
}
temp_df.rename(columns=column_map, inplace=True)
numeric_columns = [
"最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅",
"年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润",
"加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份",
"总市值", "流通市值"
]
for col in numeric_columns:
temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce")
temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")
return temp_df
@lru_cache()
def code_id_map_em() -> dict:
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
pz = 200 # 固定每页 200 条
pn = 1 # 初始页码
pn_max = 10000 # 预设一个较大的初始值
params = {
"pn": str(pn),
"pz": str(pz),
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "",
"fields": "f12,f13",
"_": "1623833739532",
}
market_fs = {
"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"
}
code_id_dict = {}
for market_id, fs in market_fs.items():
params["fs"] = fs
pn = 1 # 每个市场都从第一页开始
total = 0
fetched_cnt = 0
while pn <= pn_max:
params["pn"] = str(pn)
data_json = fetch_with_retries_em(url, params)
if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
print(f"市场 {market_id} 数据获取失败或为空,跳过。")
break
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
# 处理 total 以计算 pn_max
if pn == 1 and "total" in data_json["data"]:
total = int(data_json["data"]["total"])
pn_max = (total // pz) + 1 # 计算最大页数
print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}, 当前获取数量: {len(temp_df)}, 每页最大拉取行数: {pz}")
# 按 f13 进行分组并存入字典
grouped = temp_df.groupby('f13')
for id, group in grouped:
code_id_dict.update(dict.fromkeys(group["f12"], id))
fetched_cnt += len(group)
# print(f'获取 {market_id} 股票列表f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
pn += 1 # 翻页继续
print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
return code_id_dict
@lru_cache()
def code_id_map_em2() -> dict:
"""
东方财富-股票和市场代码
http://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 股票和市场代码
:rtype: dict
"""
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:1 t:2,m:1 t:23",
"fields": "f12,f13",
"_": "1623833739532",
}
market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"}
code_id_dict = dict()
for market_id, fs in market_fs.items():
params['fs'] = fs
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
# 把数据保存到字典中。按照f13的值分别存储
grouped = temp_df.groupby('f13')
for id, group in grouped:
temp_df[f"{market_id}_{id}"] = id
#code_id_dict.update(dict(zip(group["f12"], str(id))))
code_id_dict.update(dict.fromkeys(group["f12"], id))
print(f'get {market_id} stock list. f13: {id}, stock count: {len(group)}')
return code_id_dict
def stock_zh_a_hist(
symbol: str = "000001",
period: str = "daily",
start_date: str = "19700101",
end_date: str = "20500101",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日行情
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param period: choice of {'daily', 'weekly', 'monthly'}
:type period: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"}
:type adjust: str
:return: 每日行情
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
adjust_dict = {"qfq": "1", "hfq": "2", "": "0"}
period_dict = {"daily": "101", "weekly": "102", "monthly": "103"}
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period_dict[period],
"fqt": adjust_dict[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": start_date,
"end": end_date,
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
if not (data_json["data"] and data_json["data"]["klines"]):
return pd.DataFrame()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"日期",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["日期"])
temp_df.reset_index(inplace=True, drop=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
return temp_df
def stock_zh_a_hist_min_em(
symbol: str = "000001",
start_date: str = "1979-09-01 09:32:00",
end_date: str = "2222-01-01 09:32:00",
period: str = "5",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日分时行情
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param period: choice of {'1', '5', '15', '30', '60'}
:type period: str
:param adjust: choice of {'', 'qfq', 'hfq'}
:type adjust: str
:return: 每日分时行情
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
adjust_map = {
"": "0",
"qfq": "1",
"hfq": "2",
}
if period == "1":
url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"ndays": "5",
"iscr": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
else:
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period,
"fqt": adjust_map[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": "0",
"end": "20500000",
"_": "1630930917857",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
temp_df = temp_df[
[
"时间",
"开盘",
"收盘",
"最高",
"最低",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
]
]
return temp_df
def stock_zh_a_hist_pre_min_em(
symbol: str = "000001",
start_time: str = "09:00:00",
end_time: str = "15:50:00",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日分时行情包含盘前数据
http://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param start_time: 开始时间
:type start_time: str
:param end_time: 结束时间
:type end_time: str
:return: 每日分时行情包含盘前数据
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
url = "https://push2.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "fa5fd1943c7b386f172d6893dbfba10b",
"ndays": "1",
"iscr": "1",
"iscca": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
date_format = temp_df.index[0].date().isoformat()
temp_df = temp_df[
date_format + " " + start_time : date_format + " " + end_time
]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
if __name__ == "__main__":
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="000858",
period="daily",
start_date="20220516",
end_date="20220722",
adjust="",
)
print(stock_zh_a_hist_df)
exit(0)
stock_zh_a_spot_em_df = stock_zh_a_spot_em()
print(stock_zh_a_spot_em_df)
code_id_map_em_df = code_id_map_em()
print(code_id_map_em_df)
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="430090",
period="daily",
start_date="20220516",
end_date="20220722",
adjust="hfq",
)
print(stock_zh_a_hist_df)
stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(symbol="833454", period="1")
print(stock_zh_a_hist_min_em_df)
stock_zh_a_hist_pre_min_em_df = stock_zh_a_hist_pre_min_em(symbol="833454")
print(stock_zh_a_hist_pre_min_em_df)
stock_zh_a_spot_em_df = stock_zh_a_spot_em()
print(stock_zh_a_spot_em_df)
stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(
symbol="000001", period='1'
)
print(stock_zh_a_hist_min_em_df)
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="833454",
period="daily",
start_date="20170301",
end_date="20211115",
adjust="hfq",
)
print(stock_zh_a_hist_df)

View File

View File

@ -0,0 +1,69 @@
import time
# 配置部分
china_all_config = {
'url': 'https://push2.eastmoney.com/api/qt/clist/get',
'params': {
'np': 1,
'fltt': 1,
'invt': 2,
'cb': 'jQuery37103053011545475828_1742564157141',
'fs': 'm:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048',
'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23',
'fid': 'f3',
'pn': 1,
'pz': 20,
'po': 1,
'dect': 1,
'ut': 'fa5fd1943c7b386f172d6893dbfba10b',
'_': int(time.time() * 1000)
},
'headers': {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Referer': 'https://quote.eastmoney.com/center/gridlist.html'
},
'max_retries': 3,
'retry_delay': 5
}
'''
curl 'https://push2.eastmoney.com/api/qt/clist/get?np=1&fltt=1&invt=2&cb=jQuery37103053011545475828_1742564157141&fs=m%3A128%2Bt%3A3%2Cm%3A128%2Bt%3A4%2Cm%3A128%2Bt%3A1%2Cm%3A128%2Bt%3A2&fields=f12%2Cf13%2Cf14%2Cf19%2Cf1%2Cf2%2Cf4%2Cf3%2Cf152%2Cf17%2Cf18%2Cf15%2Cf16%2Cf5%2Cf6&fid=f3&pn=2&pz=20&po=1&dect=1&ut=fa5fd1943c7b386f172d6893dbfba10b&wbp2u=%7C0%7C0%7C0%7Cweb&_=1742564157184' \
-H 'Accept: */*' \
-H 'Accept-Language: zh-CN,zh;q=0.9' \
-H 'Connection: keep-alive' \
-b 'sid=173318833; vtpst=%7c; st_si=63334912574582; qgqp_b_id=5107797c7296e8e7fc529ab2daa8bf8b; AUTH_FUND.EASTMONEY.COM_GSJZ=AUTH*TTJJ*TOKEN; fullscreengg=1; fullscreengg2=1; xsb_history=831566%7C%u76DB%u5927%u5728%u7EBF%2C874086%7C%u5C0F%u5510%u79D1%u6280; HAList=ty-116-00700-%u817E%u8BAF%u63A7%u80A1%2Cty-124-HSTECH-%u6052%u751F%u79D1%u6280%u6307%u6570%2Cty-1-000300-%u6CAA%u6DF1300%2Cty-0-159995-%u82AF%u7247ETF%2Cty-106-RDDT-Reddit%20Inc-A%2Cty-116-08321-%u6CF0%u9526%u63A7%u80A1%2Cty-105-AAPL-%u82F9%u679C%2Cty-105-NLSPW-NLS%20Pharmaceutics%20Ltd%20Wt%2Cty-116-09890-%u4E2D%u65ED%u672A%u6765%2Cty-116-00396-%u5174%u5229%28%u9999%u6E2F%29%u63A7%u80A1; has_jump_to_web=1; st_asi=delete; st_pvi=05050221710102; st_sp=2022-01-20%2014%3A22%3A55; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=525; st_psi=20250322081050269-113200301321-1939683963' \
-H 'Referer: https://quote.eastmoney.com/center/gridlist.html' \
-H 'Sec-Fetch-Dest: script' \
-H 'Sec-Fetch-Mode: no-cors' \
-H 'Sec-Fetch-Site: same-site' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36' \
-H 'sec-ch-ua: "Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"'
'''
hk_all_config = {
'url': 'https://push2.eastmoney.com/api/qt/clist/get',
'params': {
'np': 1,
'fltt': 1,
'invt': 2,
'cb': 'jQuery37103053011545475828_1742564157141',
'fs': ' m:128+t:3,m:128+t:4,m:128+t:1,m:128+t:2',
'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23',
'fid': 'f3',
'pn': 1,
'pz': 20,
'po': 1,
'dect': 1,
'ut': 'fa5fd1943c7b386f172d6893dbfba10b',
'_': int(time.time() * 1000)
},
'headers': {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Referer': 'https://quote.eastmoney.com/center/gridlist.html'
},
'max_retries': 3,
'retry_delay': 5
}

819
src/crawler/em/stock.py Normal file
View File

@ -0,0 +1,819 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/6/19 15:26
Desc: 东方财富网-行情首页-沪深京 A 股
"""
import requests
import pandas as pd
import time
from functools import lru_cache
def fetch_with_retries_em(url, params, max_retries=3, delay=2):
"""带重试机制的 GET 请求"""
for attempt in range(max_retries):
try:
response = requests.get(url, params=params, timeout=5)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"请求失败,第 {attempt + 1} 次重试: {e}")
time.sleep(delay)
return None
def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=100) -> pd.DataFrame:
"""
东方财富网-沪深京 A 股-实时行情
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
"""
url = "http://82.push2.eastmoney.com/api/qt/clist/get"
pn = 1 # 初始页数
pn_max = 10000 # 设定初始最大页数
all_data = []
while pn <= pn_max:
params = {
"pn": str(pn),
"pz": str(pz),
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": fs,
"fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
"_": "1623833739532",
}
data_json = fetch_with_retries_em(url, params)
if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
break
diff_data = data_json["data"]["diff"]
if not diff_data:
break
all_data.extend(diff_data)
# 获取 total 数据来更新 pn_max
if pn == 1:
pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz
print(f'total pages: {pn_max}, total data lines: {data_json["data"].get("total", 0)}, curr lines: {len(diff_data)}, page size: {pz}')
pn += 1
time.sleep(0.5) # 防止请求过快
if not all_data:
return pd.DataFrame()
temp_df = pd.DataFrame(all_data)
column_map = {
"f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率",
"f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低",
"f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅",
"f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份",
"f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润",
"f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产",
"f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期"
}
temp_df.rename(columns=column_map, inplace=True)
numeric_columns = [
"最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅",
"年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润",
"加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份",
"总市值", "流通市值"
]
for col in numeric_columns:
temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce")
temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")
return temp_df
def stock_zh_a_spot_em_old(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame:
"""
东方财富网-沪深京 A 股-实时行情
https://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 实时行情
:rtype: pandas.DataFrame
"""
url = "http://82.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": fs,
"fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return pd.DataFrame()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df.columns = [
"最新价",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
"市盈率动",
"量比",
"5分钟涨跌",
"代码",
"名称",
"最高",
"最低",
"今开",
"昨收",
"总市值",
"流通市值",
"涨速",
"市净率",
"60日涨跌幅",
"年初至今涨跌幅",
"上市时间",
"加权净资产收益率",
"总股本",
"已流通股份",
"营业收入",
"营业收入同比增长",
"归属净利润",
"归属净利润同比增长",
"每股未分配利润",
"毛利率",
"资产负债率",
"每股公积金",
"所处行业",
"每股收益",
"每股净资产",
"市盈率静",
"市盈率TTM",
"报告期"
]
temp_df = temp_df[
[
"代码",
"名称",
"最新价",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
"量比",
"今开",
"最高",
"最低",
"昨收",
"涨速",
"5分钟涨跌",
"60日涨跌幅",
"年初至今涨跌幅",
"市盈率动",
"市盈率TTM",
"市盈率静",
"市净率",
"每股收益",
"每股净资产",
"每股公积金",
"每股未分配利润",
"加权净资产收益率",
"毛利率",
"资产负债率",
"营业收入",
"营业收入同比增长",
"归属净利润",
"归属净利润同比增长",
"报告期",
"总股本",
"已流通股份",
"总市值",
"流通市值",
"所处行业",
"上市时间"
]
]
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce")
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce")
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce")
temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce")
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce")
temp_df["最高"] = pd.to_numeric(temp_df["最高"], errors="coerce")
temp_df["最低"] = pd.to_numeric(temp_df["最低"], errors="coerce")
temp_df["今开"] = pd.to_numeric(temp_df["今开"], errors="coerce")
temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce")
temp_df["涨速"] = pd.to_numeric(temp_df["涨速"], errors="coerce")
temp_df["5分钟涨跌"] = pd.to_numeric(temp_df["5分钟涨跌"], errors="coerce")
temp_df["60日涨跌幅"] = pd.to_numeric(temp_df["60日涨跌幅"], errors="coerce")
temp_df["年初至今涨跌幅"] = pd.to_numeric(temp_df["年初至今涨跌幅"], errors="coerce")
temp_df["市盈率动"] = pd.to_numeric(temp_df["市盈率动"], errors="coerce")
temp_df["市盈率TTM"] = pd.to_numeric(temp_df["市盈率TTM"], errors="coerce")
temp_df["市盈率静"] = pd.to_numeric(temp_df["市盈率静"], errors="coerce")
temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
temp_df["每股收益"] = pd.to_numeric(temp_df["每股收益"], errors="coerce")
temp_df["每股净资产"] = pd.to_numeric(temp_df["每股净资产"], errors="coerce")
temp_df["每股公积金"] = pd.to_numeric(temp_df["每股公积金"], errors="coerce")
temp_df["每股未分配利润"] = pd.to_numeric(temp_df["每股未分配利润"], errors="coerce")
temp_df["加权净资产收益率"] = pd.to_numeric(temp_df["加权净资产收益率"], errors="coerce")
temp_df["毛利率"] = pd.to_numeric(temp_df["毛利率"], errors="coerce")
temp_df["资产负债率"] = pd.to_numeric(temp_df["资产负债率"], errors="coerce")
temp_df["营业收入"] = pd.to_numeric(temp_df["营业收入"], errors="coerce")
temp_df["营业收入同比增长"] = pd.to_numeric(temp_df["营业收入同比增长"], errors="coerce")
temp_df["归属净利润"] = pd.to_numeric(temp_df["归属净利润"], errors="coerce")
temp_df["归属净利润同比增长"] = pd.to_numeric(temp_df["归属净利润同比增长"], errors="coerce")
temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
temp_df["总股本"] = pd.to_numeric(temp_df["总股本"], errors="coerce")
temp_df["已流通股份"] = pd.to_numeric(temp_df["已流通股份"], errors="coerce")
temp_df["总市值"] = pd.to_numeric(temp_df["总市值"], errors="coerce")
temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce")
temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")
return temp_df
#原有版本,实现的比较繁琐,后面有个简化版本替代它。
#@lru_cache()
def code_id_map_em_older() -> dict:
"""
东方财富-股票和市场代码
http://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 股票和市场代码
:rtype: dict
"""
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:1 t:2,m:1 t:23",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
temp_df.columns = ["sh_code", "sh_id"]
code_id_dict = dict(zip(temp_df["sh_code"], temp_df["sh_id"]))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:0 t:6,m:0 t:80",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["sz_id"] = 0
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["sz_id"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:0 t:81 s:2048",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["bj_id"] = 0
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["bj_id"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:128 t:3",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["hk_main"] = 116
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["hk_main"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:128 t:4",
"fields": "f12",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
temp_df_sz["hk_cyb"] = 116
code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["hk_cyb"])))
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:105,m:106,m:107",
"fields": "f12,f13",
"_": "1623833739532",
}
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df_sz = pd.DataFrame(data_json["data"]["diff"])
# 把数据保存到字典中。按照f13的值分别存储
grouped = temp_df_sz.groupby('f13')
for id, group in grouped:
temp_df_sz[f"us_all_{id}"] = id
code_id_dict.update(dict(zip(group["f12"], str(id))))
#print(f"分组 f13 = {id}:")
#print(group)
#temp_df_sz["us_all"] = 105
#code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["us_all"])))
print(code_id_dict)
return code_id_dict
@lru_cache()
def code_id_map_em() -> dict:
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
pz = 200 # 固定每页 200 条
pn = 1 # 初始页码
pn_max = 10000 # 预设一个较大的初始值
params = {
"pn": str(pn),
"pz": str(pz),
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "",
"fields": "f12,f13",
"_": "1623833739532",
}
market_fs = {
"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"
}
code_id_dict = {}
for market_id, fs in market_fs.items():
params["fs"] = fs
pn = 1 # 每个市场都从第一页开始
total = 0
fetched_cnt = 0
while pn <= pn_max:
params["pn"] = str(pn)
data_json = fetch_with_retries_em(url, params)
if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
print(f"市场 {market_id} 数据获取失败或为空,跳过。")
break
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
# 处理 total 以计算 pn_max
if pn == 1 and "total" in data_json["data"]:
total = int(data_json["data"]["total"])
pn_max = (total // pz) + 1 # 计算最大页数
print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}, 当前获取数量: {len(temp_df)}, 每页最大拉取行数: {pz}")
# 按 f13 进行分组并存入字典
grouped = temp_df.groupby('f13')
for id, group in grouped:
code_id_dict.update(dict.fromkeys(group["f12"], id))
fetched_cnt += len(group)
# print(f'获取 {market_id} 股票列表f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
pn += 1 # 翻页继续
print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
return code_id_dict
@lru_cache()
def code_id_map_em2() -> dict:
"""
东方财富-股票和市场代码
http://quote.eastmoney.com/center/gridlist.html#hs_a_board
:return: 股票和市场代码
:rtype: dict
"""
url = "http://80.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": "1",
"pz": "50000",
"po": "1",
"np": "1",
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
"fltt": "2",
"invt": "2",
"fid": "f3",
"fs": "m:1 t:2,m:1 t:23",
"fields": "f12,f13",
"_": "1623833739532",
}
market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
"us": "m:105,m:106,m:107"}
code_id_dict = dict()
for market_id, fs in market_fs.items():
params['fs'] = fs
r = requests.get(url, params=params)
data_json = r.json()
if not data_json["data"]["diff"]:
return dict()
temp_df = pd.DataFrame(data_json["data"]["diff"])
temp_df["market_id"] = 1
# 把数据保存到字典中。按照f13的值分别存储
grouped = temp_df.groupby('f13')
for id, group in grouped:
temp_df[f"{market_id}_{id}"] = id
#code_id_dict.update(dict(zip(group["f12"], str(id))))
code_id_dict.update(dict.fromkeys(group["f12"], id))
print(f'get {market_id} stock list. f13: {id}, stock count: {len(group)}')
return code_id_dict
def stock_zh_a_hist(
symbol: str = "000001",
period: str = "daily",
start_date: str = "19700101",
end_date: str = "20500101",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日行情
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param period: choice of {'daily', 'weekly', 'monthly'}
:type period: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"}
:type adjust: str
:return: 每日行情
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
adjust_dict = {"qfq": "1", "hfq": "2", "": "0"}
period_dict = {"daily": "101", "weekly": "102", "monthly": "103"}
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period_dict[period],
"fqt": adjust_dict[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": start_date,
"end": end_date,
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
if not (data_json["data"] and data_json["data"]["klines"]):
return pd.DataFrame()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"日期",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["日期"])
temp_df.reset_index(inplace=True, drop=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
return temp_df
def stock_zh_a_hist_min_em(
symbol: str = "000001",
start_date: str = "1979-09-01 09:32:00",
end_date: str = "2222-01-01 09:32:00",
period: str = "5",
adjust: str = "",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日分时行情
https://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param start_date: 开始日期
:type start_date: str
:param end_date: 结束日期
:type end_date: str
:param period: choice of {'1', '5', '15', '30', '60'}
:type period: str
:param adjust: choice of {'', 'qfq', 'hfq'}
:type adjust: str
:return: 每日分时行情
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
adjust_map = {
"": "0",
"qfq": "1",
"hfq": "2",
}
if period == "1":
url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"ndays": "5",
"iscr": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
else:
url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61",
"ut": "7eea3edcaed734bea9cbfc24409ed989",
"klt": period,
"fqt": adjust_map[adjust],
"secid": f"{code_id_dict[symbol]}.{symbol}",
"beg": "0",
"end": "20500000",
"_": "1630930917857",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["klines"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"振幅",
"涨跌幅",
"涨跌额",
"换手率",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
temp_df = temp_df[start_date:end_date]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
temp_df = temp_df[
[
"时间",
"开盘",
"收盘",
"最高",
"最低",
"涨跌幅",
"涨跌额",
"成交量",
"成交额",
"振幅",
"换手率",
]
]
return temp_df
def stock_zh_a_hist_pre_min_em(
symbol: str = "000001",
start_time: str = "09:00:00",
end_time: str = "15:50:00",
) -> pd.DataFrame:
"""
东方财富网-行情首页-沪深京 A 股-每日分时行情包含盘前数据
http://quote.eastmoney.com/concept/sh603777.html?from=classic
:param symbol: 股票代码
:type symbol: str
:param start_time: 开始时间
:type start_time: str
:param end_time: 结束时间
:type end_time: str
:return: 每日分时行情包含盘前数据
:rtype: pandas.DataFrame
"""
code_id_dict = code_id_map_em()
url = "https://push2.eastmoney.com/api/qt/stock/trends2/get"
params = {
"fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
"fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
"ut": "fa5fd1943c7b386f172d6893dbfba10b",
"ndays": "1",
"iscr": "1",
"iscca": "0",
"secid": f"{code_id_dict[symbol]}.{symbol}",
"_": "1623766962675",
}
r = requests.get(url, params=params)
data_json = r.json()
temp_df = pd.DataFrame(
[item.split(",") for item in data_json["data"]["trends"]]
)
temp_df.columns = [
"时间",
"开盘",
"收盘",
"最高",
"最低",
"成交量",
"成交额",
"最新价",
]
temp_df.index = pd.to_datetime(temp_df["时间"])
date_format = temp_df.index[0].date().isoformat()
temp_df = temp_df[
date_format + " " + start_time : date_format + " " + end_time
]
temp_df.reset_index(drop=True, inplace=True)
temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
temp_df["最高"] = pd.to_numeric(temp_df["最高"])
temp_df["最低"] = pd.to_numeric(temp_df["最低"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
return temp_df
if __name__ == "__main__":
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="000858",
period="daily",
start_date="20220516",
end_date="20220722",
adjust="",
)
print(stock_zh_a_hist_df)
exit(0)
stock_zh_a_spot_em_df = stock_zh_a_spot_em()
print(stock_zh_a_spot_em_df)
code_id_map_em_df = code_id_map_em()
print(code_id_map_em_df)
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="430090",
period="daily",
start_date="20220516",
end_date="20220722",
adjust="hfq",
)
print(stock_zh_a_hist_df)
stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(symbol="833454", period="1")
print(stock_zh_a_hist_min_em_df)
stock_zh_a_hist_pre_min_em_df = stock_zh_a_hist_pre_min_em(symbol="833454")
print(stock_zh_a_hist_pre_min_em_df)
stock_zh_a_spot_em_df = stock_zh_a_spot_em()
print(stock_zh_a_spot_em_df)
stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(
symbol="000001", period='1'
)
print(stock_zh_a_hist_min_em_df)
stock_zh_a_hist_df = stock_zh_a_hist(
symbol="833454",
period="daily",
start_date="20170301",
end_date="20211115",
adjust="hfq",
)
print(stock_zh_a_hist_df)