Files
stock/src/crawler/bak_em/codes.py
2025-03-22 10:01:30 +08:00

129 lines
3.8 KiB
Python

"""
Date: 2022/6/19 15:26
Desc: 东方财富网-行情首页-沪深京 A 股
"""
import requests
import pandas as pd
import time
import json
from functools import lru_cache
em_market_config = {
'china_all' : { # 全部A股
'fs' : "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'hk_all' : {
'fs' : "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'us_all' : {
'fs' : "m:105,m:106,m:107",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'us_china': {
'fs' : "b:MK0201",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'hk_connect': {
'fs' : "b:DLMK0146,b:DLMK0144",
'cb' : 'jQuery37103053011545475828_1742564157141'
},
'hk_china_corps': {
'fs' : "b:DLMK0112",
'cb' : 'jQuery37103053011545475828_1742564157141'
}
}
# 配置部分
config_template = {
'url': 'https://push2.eastmoney.com/api/qt/clist/get',
'params': {
'np': 1,
'fltt': 1,
'invt': 2,
'cb': 'jQuery37103053011545475828_1742564157141',
'fs': 'm:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048',
'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23',
'fid': 'f3',
'pn': 1,
'pz': 100,
'po': 1,
'dect': 1,
'ut': 'fa5fd1943c7b386f172d6893dbfba10b',
'_': int(time.time() * 1000)
},
'headers': {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Referer': 'https://quote.eastmoney.com/center/gridlist.html'
},
'max_retries': 3,
'retry_delay': 5
}
# 获取数据,带重试,并且对结果进行判断
def fetch_data(config):
retries = 0
while retries < config['max_retries']:
try:
response = requests.get(config['url'], params=config['params'], headers=config['headers'])
response.raise_for_status()
# 验证返回内容
if config['params']['cb'] not in response.text:
raise ValueError("Callback not found in response")
# 提取 JSON 数据
json_data = response.text.split(config['params']['cb'] + '(')[-1].rstrip(');')
data = json.loads(json_data)
if 'data' not in data or 'diff' not in data['data']:
raise ValueError("Invalid data format")
return data['data']
except (requests.RequestException, ValueError) as e:
print(f"Error fetching data: {e}")
retries += 1
time.sleep(config['retry_delay'])
return None
# 拉取代码
def get_market_codes(fs, cb):
# 示例:获取前 3 页的数据
max_pages = 100000
page = 1
codes = []
while page <= max_pages:
while True:
config = config_template
config['params']['pn'] = page
config['params']['cb'] = cb
config['params']['fs'] = fs
config['params']['fields'] = 'f12,f14'
data = fetch_data(config)
if data:
break
if page == 1:
total = data.get('total', 1000000)
pz = int(config['params']['pz'])
max_pages = (int(total) + pz - 1) // pz
page += 1
for row in data.get('diff', []):
code = row['f12']
name = row['f14']
codes.append({'code': code, 'name': name})
return codes
if __name__ == "__main__":
config = em_market_config['hk_connect']
all_data = []
data = get_market_codes(config['fs'], config['cb'])
if data:
all_data.extend(data)
print(json.dumps(all_data, indent=4, ensure_ascii=False))
print(f'total codes: {len(all_data)}')