diff --git a/src/crawler/bak_em/codes.py b/src/crawler/bak_em/codes.py new file mode 100644 index 0000000..d761b33 --- /dev/null +++ b/src/crawler/bak_em/codes.py @@ -0,0 +1,129 @@ + +""" +Date: 2022/6/19 15:26 +Desc: 东方财富网-行情首页-沪深京 A 股 +""" +import requests +import pandas as pd +import time +import json + +from functools import lru_cache + +em_market_config = { + 'china_all' : { # 全部A股 + 'fs' : "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048", + 'cb' : 'jQuery37103053011545475828_1742564157141' + }, + 'hk_all' : { + 'fs' : "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2", + 'cb' : 'jQuery37103053011545475828_1742564157141' + }, + 'us_all' : { + 'fs' : "m:105,m:106,m:107", + 'cb' : 'jQuery37103053011545475828_1742564157141' + }, + 'us_china': { + 'fs' : "b:MK0201", + 'cb' : 'jQuery37103053011545475828_1742564157141' + }, + 'hk_connect': { + 'fs' : "b:DLMK0146,b:DLMK0144", + 'cb' : 'jQuery37103053011545475828_1742564157141' + }, + 'hk_china_corps': { + 'fs' : "b:DLMK0112", + 'cb' : 'jQuery37103053011545475828_1742564157141' + } + +} + +# 配置部分 +config_template = { + 'url': 'https://push2.eastmoney.com/api/qt/clist/get', + 'params': { + 'np': 1, + 'fltt': 1, + 'invt': 2, + 'cb': 'jQuery37103053011545475828_1742564157141', + 'fs': 'm:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048', + 'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23', + 'fid': 'f3', + 'pn': 1, + 'pz': 100, + 'po': 1, + 'dect': 1, + 'ut': 'fa5fd1943c7b386f172d6893dbfba10b', + '_': int(time.time() * 1000) + }, + 'headers': { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', + 'Referer': 'https://quote.eastmoney.com/center/gridlist.html' + }, + 'max_retries': 3, + 'retry_delay': 5 +} + +# 获取数据,带重试,并且对结果进行判断 +def fetch_data(config): + retries = 0 + while retries < config['max_retries']: + try: + response = requests.get(config['url'], params=config['params'], headers=config['headers']) + response.raise_for_status() + + # 验证返回内容 + if config['params']['cb'] not in response.text: + raise ValueError("Callback not found in response") + + # 提取 JSON 数据 + json_data = response.text.split(config['params']['cb'] + '(')[-1].rstrip(');') + data = json.loads(json_data) + if 'data' not in data or 'diff' not in data['data']: + raise ValueError("Invalid data format") + return data['data'] + except (requests.RequestException, ValueError) as e: + print(f"Error fetching data: {e}") + retries += 1 + time.sleep(config['retry_delay']) + return None + +# 拉取代码 +def get_market_codes(fs, cb): + # 示例:获取前 3 页的数据 + max_pages = 100000 + page = 1 + codes = [] + while page <= max_pages: + while True: + config = config_template + config['params']['pn'] = page + config['params']['cb'] = cb + config['params']['fs'] = fs + config['params']['fields'] = 'f12,f14' + + data = fetch_data(config) + if data: + break + if page == 1: + total = data.get('total', 1000000) + pz = int(config['params']['pz']) + max_pages = (int(total) + pz - 1) // pz + page += 1 + + for row in data.get('diff', []): + code = row['f12'] + name = row['f14'] + codes.append({'code': code, 'name': name}) + return codes + + +if __name__ == "__main__": + config = em_market_config['hk_connect'] + all_data = [] + data = get_market_codes(config['fs'], config['cb']) + if data: + all_data.extend(data) + + print(json.dumps(all_data, indent=4, ensure_ascii=False)) + print(f'total codes: {len(all_data)}') \ No newline at end of file diff --git a/src/crawler/bak_em/crawler_his_kline.py b/src/crawler/bak_em/crawler_his_kline.py new file mode 100644 index 0000000..54b6ea5 --- /dev/null +++ b/src/crawler/bak_em/crawler_his_kline.py @@ -0,0 +1,523 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- +""" +Date: 2022/6/19 15:26 +Desc: 东方财富网-行情首页-沪深京 A 股 +""" +import requests +import pandas as pd +import time + +from functools import lru_cache + + +def fetch_with_retries_em(url, params, max_retries=3, delay=2): + """带重试机制的 GET 请求""" + for attempt in range(max_retries): + try: + response = requests.get(url, params=params, timeout=5) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + print(f"请求失败,第 {attempt + 1} 次重试: {e}") + time.sleep(delay) + return None + +def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=100) -> pd.DataFrame: + """ + 东方财富网-沪深京 A 股-实时行情 + https://quote.eastmoney.com/center/gridlist.html#hs_a_board + """ + url = "http://82.push2.eastmoney.com/api/qt/clist/get" + pn = 1 # 初始页数 + pn_max = 10000 # 设定初始最大页数 + all_data = [] + + while pn <= pn_max: + params = { + "pn": str(pn), + "pz": str(pz), + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": fs, + "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221", + "_": "1623833739532", + } + + data_json = fetch_with_retries_em(url, params) + if not data_json or "data" not in data_json or "diff" not in data_json["data"]: + break + + diff_data = data_json["data"]["diff"] + if not diff_data: + break + + all_data.extend(diff_data) + + # 获取 total 数据来更新 pn_max + if pn == 1: + pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz + print(f'total pages: {pn_max}, total data lines: {data_json["data"].get("total", 0)}, curr lines: {len(diff_data)}, page size: {pz}') + + pn += 1 + time.sleep(0.5) # 防止请求过快 + + if not all_data: + return pd.DataFrame() + + temp_df = pd.DataFrame(all_data) + column_map = { + "f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率", + "f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低", + "f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅", + "f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份", + "f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润", + "f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产", + "f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期" + } + temp_df.rename(columns=column_map, inplace=True) + + numeric_columns = [ + "最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅", + "年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润", + "加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份", + "总市值", "流通市值" + ] + for col in numeric_columns: + temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce") + + temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce") + temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce") + + return temp_df + + +@lru_cache() +def code_id_map_em() -> dict: + url = "http://80.push2.eastmoney.com/api/qt/clist/get" + pz = 200 # 固定每页 200 条 + pn = 1 # 初始页码 + pn_max = 10000 # 预设一个较大的初始值 + + params = { + "pn": str(pn), + "pz": str(pz), + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "", + "fields": "f12,f13", + "_": "1623833739532", + } + + market_fs = { + "china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048", + "hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2", + "us": "m:105,m:106,m:107" + } + + code_id_dict = {} + + for market_id, fs in market_fs.items(): + params["fs"] = fs + pn = 1 # 每个市场都从第一页开始 + total = 0 + fetched_cnt = 0 + while pn <= pn_max: + params["pn"] = str(pn) + data_json = fetch_with_retries_em(url, params) + + if not data_json or "data" not in data_json or "diff" not in data_json["data"]: + print(f"市场 {market_id} 数据获取失败或为空,跳过。") + break + + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df["market_id"] = 1 + + # 处理 total 以计算 pn_max + if pn == 1 and "total" in data_json["data"]: + total = int(data_json["data"]["total"]) + pn_max = (total // pz) + 1 # 计算最大页数 + print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}, 当前获取数量: {len(temp_df)}, 每页最大拉取行数: {pz}") + + # 按 f13 进行分组并存入字典 + grouped = temp_df.groupby('f13') + for id, group in grouped: + code_id_dict.update(dict.fromkeys(group["f12"], id)) + fetched_cnt += len(group) + # print(f'获取 {market_id} 股票列表,f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}') + + pn += 1 # 翻页继续 + + print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}') + + return code_id_dict + +@lru_cache() +def code_id_map_em2() -> dict: + """ + 东方财富-股票和市场代码 + http://quote.eastmoney.com/center/gridlist.html#hs_a_board + :return: 股票和市场代码 + :rtype: dict + """ + url = "http://80.push2.eastmoney.com/api/qt/clist/get" + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:1 t:2,m:1 t:23", + "fields": "f12,f13", + "_": "1623833739532", + } + market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048", + "hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2", + "us": "m:105,m:106,m:107"} + code_id_dict = dict() + + for market_id, fs in market_fs.items(): + params['fs'] = fs + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df["market_id"] = 1 + # 把数据保存到字典中。按照f13的值分别存储 + grouped = temp_df.groupby('f13') + for id, group in grouped: + temp_df[f"{market_id}_{id}"] = id + #code_id_dict.update(dict(zip(group["f12"], str(id)))) + code_id_dict.update(dict.fromkeys(group["f12"], id)) + print(f'get {market_id} stock list. f13: {id}, stock count: {len(group)}') + + return code_id_dict + +def stock_zh_a_hist( + symbol: str = "000001", + period: str = "daily", + start_date: str = "19700101", + end_date: str = "20500101", + adjust: str = "", +) -> pd.DataFrame: + """ + 东方财富网-行情首页-沪深京 A 股-每日行情 + https://quote.eastmoney.com/concept/sh603777.html?from=classic + :param symbol: 股票代码 + :type symbol: str + :param period: choice of {'daily', 'weekly', 'monthly'} + :type period: str + :param start_date: 开始日期 + :type start_date: str + :param end_date: 结束日期 + :type end_date: str + :param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"} + :type adjust: str + :return: 每日行情 + :rtype: pandas.DataFrame + """ + code_id_dict = code_id_map_em() + adjust_dict = {"qfq": "1", "hfq": "2", "": "0"} + period_dict = {"daily": "101", "weekly": "102", "monthly": "103"} + url = "http://push2his.eastmoney.com/api/qt/stock/kline/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116", + "ut": "7eea3edcaed734bea9cbfc24409ed989", + "klt": period_dict[period], + "fqt": adjust_dict[adjust], + "secid": f"{code_id_dict[symbol]}.{symbol}", + "beg": start_date, + "end": end_date, + "_": "1623766962675", + } + r = requests.get(url, params=params) + data_json = r.json() + if not (data_json["data"] and data_json["data"]["klines"]): + return pd.DataFrame() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["klines"]] + ) + temp_df.columns = [ + "日期", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "振幅", + "涨跌幅", + "涨跌额", + "换手率", + ] + temp_df.index = pd.to_datetime(temp_df["日期"]) + temp_df.reset_index(inplace=True, drop=True) + + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"]) + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"]) + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"]) + temp_df["换手率"] = pd.to_numeric(temp_df["换手率"]) + + return temp_df + + +def stock_zh_a_hist_min_em( + symbol: str = "000001", + start_date: str = "1979-09-01 09:32:00", + end_date: str = "2222-01-01 09:32:00", + period: str = "5", + adjust: str = "", +) -> pd.DataFrame: + """ + 东方财富网-行情首页-沪深京 A 股-每日分时行情 + https://quote.eastmoney.com/concept/sh603777.html?from=classic + :param symbol: 股票代码 + :type symbol: str + :param start_date: 开始日期 + :type start_date: str + :param end_date: 结束日期 + :type end_date: str + :param period: choice of {'1', '5', '15', '30', '60'} + :type period: str + :param adjust: choice of {'', 'qfq', 'hfq'} + :type adjust: str + :return: 每日分时行情 + :rtype: pandas.DataFrame + """ + code_id_dict = code_id_map_em() + adjust_map = { + "": "0", + "qfq": "1", + "hfq": "2", + } + if period == "1": + url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58", + "ut": "7eea3edcaed734bea9cbfc24409ed989", + "ndays": "5", + "iscr": "0", + "secid": f"{code_id_dict[symbol]}.{symbol}", + "_": "1623766962675", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["trends"]] + ) + temp_df.columns = [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "最新价", + ] + temp_df.index = pd.to_datetime(temp_df["时间"]) + temp_df = temp_df[start_date:end_date] + temp_df.reset_index(drop=True, inplace=True) + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"]) + temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str) + return temp_df + else: + url = "http://push2his.eastmoney.com/api/qt/stock/kline/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61", + "ut": "7eea3edcaed734bea9cbfc24409ed989", + "klt": period, + "fqt": adjust_map[adjust], + "secid": f"{code_id_dict[symbol]}.{symbol}", + "beg": "0", + "end": "20500000", + "_": "1630930917857", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["klines"]] + ) + temp_df.columns = [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "振幅", + "涨跌幅", + "涨跌额", + "换手率", + ] + temp_df.index = pd.to_datetime(temp_df["时间"]) + temp_df = temp_df[start_date:end_date] + temp_df.reset_index(drop=True, inplace=True) + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"]) + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"]) + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"]) + temp_df["换手率"] = pd.to_numeric(temp_df["换手率"]) + temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str) + temp_df = temp_df[ + [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "涨跌幅", + "涨跌额", + "成交量", + "成交额", + "振幅", + "换手率", + ] + ] + return temp_df + + +def stock_zh_a_hist_pre_min_em( + symbol: str = "000001", + start_time: str = "09:00:00", + end_time: str = "15:50:00", +) -> pd.DataFrame: + """ + 东方财富网-行情首页-沪深京 A 股-每日分时行情包含盘前数据 + http://quote.eastmoney.com/concept/sh603777.html?from=classic + :param symbol: 股票代码 + :type symbol: str + :param start_time: 开始时间 + :type start_time: str + :param end_time: 结束时间 + :type end_time: str + :return: 每日分时行情包含盘前数据 + :rtype: pandas.DataFrame + """ + code_id_dict = code_id_map_em() + url = "https://push2.eastmoney.com/api/qt/stock/trends2/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58", + "ut": "fa5fd1943c7b386f172d6893dbfba10b", + "ndays": "1", + "iscr": "1", + "iscca": "0", + "secid": f"{code_id_dict[symbol]}.{symbol}", + "_": "1623766962675", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["trends"]] + ) + temp_df.columns = [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "最新价", + ] + temp_df.index = pd.to_datetime(temp_df["时间"]) + date_format = temp_df.index[0].date().isoformat() + temp_df = temp_df[ + date_format + " " + start_time : date_format + " " + end_time + ] + temp_df.reset_index(drop=True, inplace=True) + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"]) + temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str) + return temp_df + + +if __name__ == "__main__": + + stock_zh_a_hist_df = stock_zh_a_hist( + symbol="000858", + period="daily", + start_date="20220516", + end_date="20220722", + adjust="", + ) + print(stock_zh_a_hist_df) + exit(0) + + stock_zh_a_spot_em_df = stock_zh_a_spot_em() + print(stock_zh_a_spot_em_df) + + code_id_map_em_df = code_id_map_em() + print(code_id_map_em_df) + + stock_zh_a_hist_df = stock_zh_a_hist( + symbol="430090", + period="daily", + start_date="20220516", + end_date="20220722", + adjust="hfq", + ) + print(stock_zh_a_hist_df) + + stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(symbol="833454", period="1") + print(stock_zh_a_hist_min_em_df) + + stock_zh_a_hist_pre_min_em_df = stock_zh_a_hist_pre_min_em(symbol="833454") + print(stock_zh_a_hist_pre_min_em_df) + + stock_zh_a_spot_em_df = stock_zh_a_spot_em() + print(stock_zh_a_spot_em_df) + + stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em( + symbol="000001", period='1' + ) + print(stock_zh_a_hist_min_em_df) + + stock_zh_a_hist_df = stock_zh_a_hist( + symbol="833454", + period="daily", + start_date="20170301", + end_date="20211115", + adjust="hfq", + ) + print(stock_zh_a_hist_df) + diff --git a/src/crawler/bak_em/net_utils.py b/src/crawler/bak_em/net_utils.py new file mode 100644 index 0000000..e69de29 diff --git a/src/crawler/bak_em/url_config.py b/src/crawler/bak_em/url_config.py new file mode 100644 index 0000000..b04c377 --- /dev/null +++ b/src/crawler/bak_em/url_config.py @@ -0,0 +1,69 @@ +import time + +# 配置部分 +china_all_config = { + 'url': 'https://push2.eastmoney.com/api/qt/clist/get', + 'params': { + 'np': 1, + 'fltt': 1, + 'invt': 2, + 'cb': 'jQuery37103053011545475828_1742564157141', + 'fs': 'm:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048', + 'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23', + 'fid': 'f3', + 'pn': 1, + 'pz': 20, + 'po': 1, + 'dect': 1, + 'ut': 'fa5fd1943c7b386f172d6893dbfba10b', + '_': int(time.time() * 1000) + }, + 'headers': { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', + 'Referer': 'https://quote.eastmoney.com/center/gridlist.html' + }, + 'max_retries': 3, + 'retry_delay': 5 +} + +''' + curl 'https://push2.eastmoney.com/api/qt/clist/get?np=1&fltt=1&invt=2&cb=jQuery37103053011545475828_1742564157141&fs=m%3A128%2Bt%3A3%2Cm%3A128%2Bt%3A4%2Cm%3A128%2Bt%3A1%2Cm%3A128%2Bt%3A2&fields=f12%2Cf13%2Cf14%2Cf19%2Cf1%2Cf2%2Cf4%2Cf3%2Cf152%2Cf17%2Cf18%2Cf15%2Cf16%2Cf5%2Cf6&fid=f3&pn=2&pz=20&po=1&dect=1&ut=fa5fd1943c7b386f172d6893dbfba10b&wbp2u=%7C0%7C0%7C0%7Cweb&_=1742564157184' \ + -H 'Accept: */*' \ + -H 'Accept-Language: zh-CN,zh;q=0.9' \ + -H 'Connection: keep-alive' \ + -b 'sid=173318833; vtpst=%7c; st_si=63334912574582; qgqp_b_id=5107797c7296e8e7fc529ab2daa8bf8b; AUTH_FUND.EASTMONEY.COM_GSJZ=AUTH*TTJJ*TOKEN; fullscreengg=1; fullscreengg2=1; xsb_history=831566%7C%u76DB%u5927%u5728%u7EBF%2C874086%7C%u5C0F%u5510%u79D1%u6280; HAList=ty-116-00700-%u817E%u8BAF%u63A7%u80A1%2Cty-124-HSTECH-%u6052%u751F%u79D1%u6280%u6307%u6570%2Cty-1-000300-%u6CAA%u6DF1300%2Cty-0-159995-%u82AF%u7247ETF%2Cty-106-RDDT-Reddit%20Inc-A%2Cty-116-08321-%u6CF0%u9526%u63A7%u80A1%2Cty-105-AAPL-%u82F9%u679C%2Cty-105-NLSPW-NLS%20Pharmaceutics%20Ltd%20Wt%2Cty-116-09890-%u4E2D%u65ED%u672A%u6765%2Cty-116-00396-%u5174%u5229%28%u9999%u6E2F%29%u63A7%u80A1; has_jump_to_web=1; st_asi=delete; st_pvi=05050221710102; st_sp=2022-01-20%2014%3A22%3A55; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=525; st_psi=20250322081050269-113200301321-1939683963' \ + -H 'Referer: https://quote.eastmoney.com/center/gridlist.html' \ + -H 'Sec-Fetch-Dest: script' \ + -H 'Sec-Fetch-Mode: no-cors' \ + -H 'Sec-Fetch-Site: same-site' \ + -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36' \ + -H 'sec-ch-ua: "Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"' \ + -H 'sec-ch-ua-mobile: ?0' \ + -H 'sec-ch-ua-platform: "macOS"' +''' + +hk_all_config = { + 'url': 'https://push2.eastmoney.com/api/qt/clist/get', + 'params': { + 'np': 1, + 'fltt': 1, + 'invt': 2, + 'cb': 'jQuery37103053011545475828_1742564157141', + 'fs': ' m:128+t:3,m:128+t:4,m:128+t:1,m:128+t:2', + 'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23', + 'fid': 'f3', + 'pn': 1, + 'pz': 20, + 'po': 1, + 'dect': 1, + 'ut': 'fa5fd1943c7b386f172d6893dbfba10b', + '_': int(time.time() * 1000) + }, + 'headers': { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', + 'Referer': 'https://quote.eastmoney.com/center/gridlist.html' + }, + 'max_retries': 3, + 'retry_delay': 5 + +} \ No newline at end of file diff --git a/src/crawler/em/stock.py b/src/crawler/em/stock.py new file mode 100644 index 0000000..086b145 --- /dev/null +++ b/src/crawler/em/stock.py @@ -0,0 +1,819 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- +""" +Date: 2022/6/19 15:26 +Desc: 东方财富网-行情首页-沪深京 A 股 +""" +import requests +import pandas as pd +import time + +from functools import lru_cache + + +def fetch_with_retries_em(url, params, max_retries=3, delay=2): + """带重试机制的 GET 请求""" + for attempt in range(max_retries): + try: + response = requests.get(url, params=params, timeout=5) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + print(f"请求失败,第 {attempt + 1} 次重试: {e}") + time.sleep(delay) + return None + +def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=100) -> pd.DataFrame: + """ + 东方财富网-沪深京 A 股-实时行情 + https://quote.eastmoney.com/center/gridlist.html#hs_a_board + """ + url = "http://82.push2.eastmoney.com/api/qt/clist/get" + pn = 1 # 初始页数 + pn_max = 10000 # 设定初始最大页数 + all_data = [] + + while pn <= pn_max: + params = { + "pn": str(pn), + "pz": str(pz), + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": fs, + "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221", + "_": "1623833739532", + } + + data_json = fetch_with_retries_em(url, params) + if not data_json or "data" not in data_json or "diff" not in data_json["data"]: + break + + diff_data = data_json["data"]["diff"] + if not diff_data: + break + + all_data.extend(diff_data) + + # 获取 total 数据来更新 pn_max + if pn == 1: + pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz + print(f'total pages: {pn_max}, total data lines: {data_json["data"].get("total", 0)}, curr lines: {len(diff_data)}, page size: {pz}') + + pn += 1 + time.sleep(0.5) # 防止请求过快 + + if not all_data: + return pd.DataFrame() + + temp_df = pd.DataFrame(all_data) + column_map = { + "f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率", + "f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低", + "f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅", + "f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份", + "f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润", + "f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产", + "f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期" + } + temp_df.rename(columns=column_map, inplace=True) + + numeric_columns = [ + "最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅", + "年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润", + "加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份", + "总市值", "流通市值" + ] + for col in numeric_columns: + temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce") + + temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce") + temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce") + + return temp_df + + +def stock_zh_a_spot_em_old(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame: + """ + 东方财富网-沪深京 A 股-实时行情 + https://quote.eastmoney.com/center/gridlist.html#hs_a_board + :return: 实时行情 + :rtype: pandas.DataFrame + """ + url = "http://82.push2.eastmoney.com/api/qt/clist/get" + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": fs, + "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return pd.DataFrame() + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df.columns = [ + "最新价", + "涨跌幅", + "涨跌额", + "成交量", + "成交额", + "振幅", + "换手率", + "市盈率动", + "量比", + "5分钟涨跌", + "代码", + "名称", + "最高", + "最低", + "今开", + "昨收", + "总市值", + "流通市值", + "涨速", + "市净率", + "60日涨跌幅", + "年初至今涨跌幅", + "上市时间", + "加权净资产收益率", + "总股本", + "已流通股份", + "营业收入", + "营业收入同比增长", + "归属净利润", + "归属净利润同比增长", + "每股未分配利润", + "毛利率", + "资产负债率", + "每股公积金", + "所处行业", + "每股收益", + "每股净资产", + "市盈率静", + "市盈率TTM", + "报告期" + ] + temp_df = temp_df[ + [ + "代码", + "名称", + "最新价", + "涨跌幅", + "涨跌额", + "成交量", + "成交额", + "振幅", + "换手率", + "量比", + "今开", + "最高", + "最低", + "昨收", + "涨速", + "5分钟涨跌", + "60日涨跌幅", + "年初至今涨跌幅", + "市盈率动", + "市盈率TTM", + "市盈率静", + "市净率", + "每股收益", + "每股净资产", + "每股公积金", + "每股未分配利润", + "加权净资产收益率", + "毛利率", + "资产负债率", + "营业收入", + "营业收入同比增长", + "归属净利润", + "归属净利润同比增长", + "报告期", + "总股本", + "已流通股份", + "总市值", + "流通市值", + "所处行业", + "上市时间" + ] + ] + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce") + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce") + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce") + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce") + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce") + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce") + temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce") + temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce") + temp_df["最高"] = pd.to_numeric(temp_df["最高"], errors="coerce") + temp_df["最低"] = pd.to_numeric(temp_df["最低"], errors="coerce") + temp_df["今开"] = pd.to_numeric(temp_df["今开"], errors="coerce") + temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce") + temp_df["涨速"] = pd.to_numeric(temp_df["涨速"], errors="coerce") + temp_df["5分钟涨跌"] = pd.to_numeric(temp_df["5分钟涨跌"], errors="coerce") + temp_df["60日涨跌幅"] = pd.to_numeric(temp_df["60日涨跌幅"], errors="coerce") + temp_df["年初至今涨跌幅"] = pd.to_numeric(temp_df["年初至今涨跌幅"], errors="coerce") + temp_df["市盈率动"] = pd.to_numeric(temp_df["市盈率动"], errors="coerce") + temp_df["市盈率TTM"] = pd.to_numeric(temp_df["市盈率TTM"], errors="coerce") + temp_df["市盈率静"] = pd.to_numeric(temp_df["市盈率静"], errors="coerce") + temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce") + temp_df["每股收益"] = pd.to_numeric(temp_df["每股收益"], errors="coerce") + temp_df["每股净资产"] = pd.to_numeric(temp_df["每股净资产"], errors="coerce") + temp_df["每股公积金"] = pd.to_numeric(temp_df["每股公积金"], errors="coerce") + temp_df["每股未分配利润"] = pd.to_numeric(temp_df["每股未分配利润"], errors="coerce") + temp_df["加权净资产收益率"] = pd.to_numeric(temp_df["加权净资产收益率"], errors="coerce") + temp_df["毛利率"] = pd.to_numeric(temp_df["毛利率"], errors="coerce") + temp_df["资产负债率"] = pd.to_numeric(temp_df["资产负债率"], errors="coerce") + temp_df["营业收入"] = pd.to_numeric(temp_df["营业收入"], errors="coerce") + temp_df["营业收入同比增长"] = pd.to_numeric(temp_df["营业收入同比增长"], errors="coerce") + temp_df["归属净利润"] = pd.to_numeric(temp_df["归属净利润"], errors="coerce") + temp_df["归属净利润同比增长"] = pd.to_numeric(temp_df["归属净利润同比增长"], errors="coerce") + temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce") + temp_df["总股本"] = pd.to_numeric(temp_df["总股本"], errors="coerce") + temp_df["已流通股份"] = pd.to_numeric(temp_df["已流通股份"], errors="coerce") + temp_df["总市值"] = pd.to_numeric(temp_df["总市值"], errors="coerce") + temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce") + temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce") + + return temp_df + +#原有版本,实现的比较繁琐,后面有个简化版本替代它。 +#@lru_cache() +def code_id_map_em_older() -> dict: + """ + 东方财富-股票和市场代码 + http://quote.eastmoney.com/center/gridlist.html#hs_a_board + :return: 股票和市场代码 + :rtype: dict + """ + url = "http://80.push2.eastmoney.com/api/qt/clist/get" + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:1 t:2,m:1 t:23", + "fields": "f12", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df["market_id"] = 1 + temp_df.columns = ["sh_code", "sh_id"] + code_id_dict = dict(zip(temp_df["sh_code"], temp_df["sh_id"])) + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:0 t:6,m:0 t:80", + "fields": "f12", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df_sz = pd.DataFrame(data_json["data"]["diff"]) + temp_df_sz["sz_id"] = 0 + code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["sz_id"]))) + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:0 t:81 s:2048", + "fields": "f12", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df_sz = pd.DataFrame(data_json["data"]["diff"]) + temp_df_sz["bj_id"] = 0 + code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["bj_id"]))) + + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:128 t:3", + "fields": "f12", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df_sz = pd.DataFrame(data_json["data"]["diff"]) + temp_df_sz["hk_main"] = 116 + code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["hk_main"]))) + + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:128 t:4", + "fields": "f12", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df_sz = pd.DataFrame(data_json["data"]["diff"]) + temp_df_sz["hk_cyb"] = 116 + code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["hk_cyb"]))) + + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:105,m:106,m:107", + "fields": "f12,f13", + "_": "1623833739532", + } + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df_sz = pd.DataFrame(data_json["data"]["diff"]) + # 把数据保存到字典中。按照f13的值分别存储 + grouped = temp_df_sz.groupby('f13') + for id, group in grouped: + temp_df_sz[f"us_all_{id}"] = id + code_id_dict.update(dict(zip(group["f12"], str(id)))) + #print(f"分组 f13 = {id}:") + #print(group) + #temp_df_sz["us_all"] = 105 + #code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["us_all"]))) + print(code_id_dict) + return code_id_dict + +@lru_cache() +def code_id_map_em() -> dict: + url = "http://80.push2.eastmoney.com/api/qt/clist/get" + pz = 200 # 固定每页 200 条 + pn = 1 # 初始页码 + pn_max = 10000 # 预设一个较大的初始值 + + params = { + "pn": str(pn), + "pz": str(pz), + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "", + "fields": "f12,f13", + "_": "1623833739532", + } + + market_fs = { + "china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048", + "hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2", + "us": "m:105,m:106,m:107" + } + + code_id_dict = {} + + for market_id, fs in market_fs.items(): + params["fs"] = fs + pn = 1 # 每个市场都从第一页开始 + total = 0 + fetched_cnt = 0 + while pn <= pn_max: + params["pn"] = str(pn) + data_json = fetch_with_retries_em(url, params) + + if not data_json or "data" not in data_json or "diff" not in data_json["data"]: + print(f"市场 {market_id} 数据获取失败或为空,跳过。") + break + + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df["market_id"] = 1 + + # 处理 total 以计算 pn_max + if pn == 1 and "total" in data_json["data"]: + total = int(data_json["data"]["total"]) + pn_max = (total // pz) + 1 # 计算最大页数 + print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}, 当前获取数量: {len(temp_df)}, 每页最大拉取行数: {pz}") + + # 按 f13 进行分组并存入字典 + grouped = temp_df.groupby('f13') + for id, group in grouped: + code_id_dict.update(dict.fromkeys(group["f12"], id)) + fetched_cnt += len(group) + # print(f'获取 {market_id} 股票列表,f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}') + + pn += 1 # 翻页继续 + + print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}') + + return code_id_dict + +@lru_cache() +def code_id_map_em2() -> dict: + """ + 东方财富-股票和市场代码 + http://quote.eastmoney.com/center/gridlist.html#hs_a_board + :return: 股票和市场代码 + :rtype: dict + """ + url = "http://80.push2.eastmoney.com/api/qt/clist/get" + params = { + "pn": "1", + "pz": "50000", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "m:1 t:2,m:1 t:23", + "fields": "f12,f13", + "_": "1623833739532", + } + market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048", + "hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2", + "us": "m:105,m:106,m:107"} + code_id_dict = dict() + + for market_id, fs in market_fs.items(): + params['fs'] = fs + r = requests.get(url, params=params) + data_json = r.json() + if not data_json["data"]["diff"]: + return dict() + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df["market_id"] = 1 + # 把数据保存到字典中。按照f13的值分别存储 + grouped = temp_df.groupby('f13') + for id, group in grouped: + temp_df[f"{market_id}_{id}"] = id + #code_id_dict.update(dict(zip(group["f12"], str(id)))) + code_id_dict.update(dict.fromkeys(group["f12"], id)) + print(f'get {market_id} stock list. f13: {id}, stock count: {len(group)}') + + return code_id_dict + +def stock_zh_a_hist( + symbol: str = "000001", + period: str = "daily", + start_date: str = "19700101", + end_date: str = "20500101", + adjust: str = "", +) -> pd.DataFrame: + """ + 东方财富网-行情首页-沪深京 A 股-每日行情 + https://quote.eastmoney.com/concept/sh603777.html?from=classic + :param symbol: 股票代码 + :type symbol: str + :param period: choice of {'daily', 'weekly', 'monthly'} + :type period: str + :param start_date: 开始日期 + :type start_date: str + :param end_date: 结束日期 + :type end_date: str + :param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"} + :type adjust: str + :return: 每日行情 + :rtype: pandas.DataFrame + """ + code_id_dict = code_id_map_em() + adjust_dict = {"qfq": "1", "hfq": "2", "": "0"} + period_dict = {"daily": "101", "weekly": "102", "monthly": "103"} + url = "http://push2his.eastmoney.com/api/qt/stock/kline/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116", + "ut": "7eea3edcaed734bea9cbfc24409ed989", + "klt": period_dict[period], + "fqt": adjust_dict[adjust], + "secid": f"{code_id_dict[symbol]}.{symbol}", + "beg": start_date, + "end": end_date, + "_": "1623766962675", + } + r = requests.get(url, params=params) + data_json = r.json() + if not (data_json["data"] and data_json["data"]["klines"]): + return pd.DataFrame() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["klines"]] + ) + temp_df.columns = [ + "日期", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "振幅", + "涨跌幅", + "涨跌额", + "换手率", + ] + temp_df.index = pd.to_datetime(temp_df["日期"]) + temp_df.reset_index(inplace=True, drop=True) + + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"]) + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"]) + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"]) + temp_df["换手率"] = pd.to_numeric(temp_df["换手率"]) + + return temp_df + + +def stock_zh_a_hist_min_em( + symbol: str = "000001", + start_date: str = "1979-09-01 09:32:00", + end_date: str = "2222-01-01 09:32:00", + period: str = "5", + adjust: str = "", +) -> pd.DataFrame: + """ + 东方财富网-行情首页-沪深京 A 股-每日分时行情 + https://quote.eastmoney.com/concept/sh603777.html?from=classic + :param symbol: 股票代码 + :type symbol: str + :param start_date: 开始日期 + :type start_date: str + :param end_date: 结束日期 + :type end_date: str + :param period: choice of {'1', '5', '15', '30', '60'} + :type period: str + :param adjust: choice of {'', 'qfq', 'hfq'} + :type adjust: str + :return: 每日分时行情 + :rtype: pandas.DataFrame + """ + code_id_dict = code_id_map_em() + adjust_map = { + "": "0", + "qfq": "1", + "hfq": "2", + } + if period == "1": + url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58", + "ut": "7eea3edcaed734bea9cbfc24409ed989", + "ndays": "5", + "iscr": "0", + "secid": f"{code_id_dict[symbol]}.{symbol}", + "_": "1623766962675", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["trends"]] + ) + temp_df.columns = [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "最新价", + ] + temp_df.index = pd.to_datetime(temp_df["时间"]) + temp_df = temp_df[start_date:end_date] + temp_df.reset_index(drop=True, inplace=True) + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"]) + temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str) + return temp_df + else: + url = "http://push2his.eastmoney.com/api/qt/stock/kline/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61", + "ut": "7eea3edcaed734bea9cbfc24409ed989", + "klt": period, + "fqt": adjust_map[adjust], + "secid": f"{code_id_dict[symbol]}.{symbol}", + "beg": "0", + "end": "20500000", + "_": "1630930917857", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["klines"]] + ) + temp_df.columns = [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "振幅", + "涨跌幅", + "涨跌额", + "换手率", + ] + temp_df.index = pd.to_datetime(temp_df["时间"]) + temp_df = temp_df[start_date:end_date] + temp_df.reset_index(drop=True, inplace=True) + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"]) + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"]) + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"]) + temp_df["换手率"] = pd.to_numeric(temp_df["换手率"]) + temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str) + temp_df = temp_df[ + [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "涨跌幅", + "涨跌额", + "成交量", + "成交额", + "振幅", + "换手率", + ] + ] + return temp_df + + +def stock_zh_a_hist_pre_min_em( + symbol: str = "000001", + start_time: str = "09:00:00", + end_time: str = "15:50:00", +) -> pd.DataFrame: + """ + 东方财富网-行情首页-沪深京 A 股-每日分时行情包含盘前数据 + http://quote.eastmoney.com/concept/sh603777.html?from=classic + :param symbol: 股票代码 + :type symbol: str + :param start_time: 开始时间 + :type start_time: str + :param end_time: 结束时间 + :type end_time: str + :return: 每日分时行情包含盘前数据 + :rtype: pandas.DataFrame + """ + code_id_dict = code_id_map_em() + url = "https://push2.eastmoney.com/api/qt/stock/trends2/get" + params = { + "fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13", + "fields2": "f51,f52,f53,f54,f55,f56,f57,f58", + "ut": "fa5fd1943c7b386f172d6893dbfba10b", + "ndays": "1", + "iscr": "1", + "iscca": "0", + "secid": f"{code_id_dict[symbol]}.{symbol}", + "_": "1623766962675", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame( + [item.split(",") for item in data_json["data"]["trends"]] + ) + temp_df.columns = [ + "时间", + "开盘", + "收盘", + "最高", + "最低", + "成交量", + "成交额", + "最新价", + ] + temp_df.index = pd.to_datetime(temp_df["时间"]) + date_format = temp_df.index[0].date().isoformat() + temp_df = temp_df[ + date_format + " " + start_time : date_format + " " + end_time + ] + temp_df.reset_index(drop=True, inplace=True) + temp_df["开盘"] = pd.to_numeric(temp_df["开盘"]) + temp_df["收盘"] = pd.to_numeric(temp_df["收盘"]) + temp_df["最高"] = pd.to_numeric(temp_df["最高"]) + temp_df["最低"] = pd.to_numeric(temp_df["最低"]) + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"]) + temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str) + return temp_df + + +if __name__ == "__main__": + + stock_zh_a_hist_df = stock_zh_a_hist( + symbol="000858", + period="daily", + start_date="20220516", + end_date="20220722", + adjust="", + ) + print(stock_zh_a_hist_df) + exit(0) + + stock_zh_a_spot_em_df = stock_zh_a_spot_em() + print(stock_zh_a_spot_em_df) + + code_id_map_em_df = code_id_map_em() + print(code_id_map_em_df) + + stock_zh_a_hist_df = stock_zh_a_hist( + symbol="430090", + period="daily", + start_date="20220516", + end_date="20220722", + adjust="hfq", + ) + print(stock_zh_a_hist_df) + + stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em(symbol="833454", period="1") + print(stock_zh_a_hist_min_em_df) + + stock_zh_a_hist_pre_min_em_df = stock_zh_a_hist_pre_min_em(symbol="833454") + print(stock_zh_a_hist_pre_min_em_df) + + stock_zh_a_spot_em_df = stock_zh_a_spot_em() + print(stock_zh_a_spot_em_df) + + stock_zh_a_hist_min_em_df = stock_zh_a_hist_min_em( + symbol="000001", period='1' + ) + print(stock_zh_a_hist_min_em_df) + + stock_zh_a_hist_df = stock_zh_a_hist( + symbol="833454", + period="daily", + start_date="20170301", + end_date="20211115", + adjust="hfq", + ) + print(stock_zh_a_hist_df) +