129 lines
3.8 KiB
Python
129 lines
3.8 KiB
Python
|
|
"""
|
|
Date: 2022/6/19 15:26
|
|
Desc: 东方财富网-行情首页-沪深京 A 股
|
|
"""
|
|
import requests
|
|
import pandas as pd
|
|
import time
|
|
import json
|
|
|
|
from functools import lru_cache
|
|
|
|
em_market_config = {
|
|
'china_all' : { # 全部A股
|
|
'fs' : "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
|
|
'cb' : 'jQuery37103053011545475828_1742564157141'
|
|
},
|
|
'hk_all' : {
|
|
'fs' : "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
|
|
'cb' : 'jQuery37103053011545475828_1742564157141'
|
|
},
|
|
'us_all' : {
|
|
'fs' : "m:105,m:106,m:107",
|
|
'cb' : 'jQuery37103053011545475828_1742564157141'
|
|
},
|
|
'us_china': {
|
|
'fs' : "b:MK0201",
|
|
'cb' : 'jQuery37103053011545475828_1742564157141'
|
|
},
|
|
'hk_connect': {
|
|
'fs' : "b:DLMK0146,b:DLMK0144",
|
|
'cb' : 'jQuery37103053011545475828_1742564157141'
|
|
},
|
|
'hk_china_corps': {
|
|
'fs' : "b:DLMK0112",
|
|
'cb' : 'jQuery37103053011545475828_1742564157141'
|
|
}
|
|
|
|
}
|
|
|
|
# 配置部分
|
|
config_template = {
|
|
'url': 'https://push2.eastmoney.com/api/qt/clist/get',
|
|
'params': {
|
|
'np': 1,
|
|
'fltt': 1,
|
|
'invt': 2,
|
|
'cb': 'jQuery37103053011545475828_1742564157141',
|
|
'fs': 'm:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048',
|
|
'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23',
|
|
'fid': 'f3',
|
|
'pn': 1,
|
|
'pz': 100,
|
|
'po': 1,
|
|
'dect': 1,
|
|
'ut': 'fa5fd1943c7b386f172d6893dbfba10b',
|
|
'_': int(time.time() * 1000)
|
|
},
|
|
'headers': {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
|
'Referer': 'https://quote.eastmoney.com/center/gridlist.html'
|
|
},
|
|
'max_retries': 3,
|
|
'retry_delay': 5
|
|
}
|
|
|
|
# 获取数据,带重试,并且对结果进行判断
|
|
def fetch_data(config):
|
|
retries = 0
|
|
while retries < config['max_retries']:
|
|
try:
|
|
response = requests.get(config['url'], params=config['params'], headers=config['headers'])
|
|
response.raise_for_status()
|
|
|
|
# 验证返回内容
|
|
if config['params']['cb'] not in response.text:
|
|
raise ValueError("Callback not found in response")
|
|
|
|
# 提取 JSON 数据
|
|
json_data = response.text.split(config['params']['cb'] + '(')[-1].rstrip(');')
|
|
data = json.loads(json_data)
|
|
if 'data' not in data or 'diff' not in data['data']:
|
|
raise ValueError("Invalid data format")
|
|
return data['data']
|
|
except (requests.RequestException, ValueError) as e:
|
|
print(f"Error fetching data: {e}")
|
|
retries += 1
|
|
time.sleep(config['retry_delay'])
|
|
return None
|
|
|
|
# 拉取代码
|
|
def get_market_codes(fs, cb):
|
|
# 示例:获取前 3 页的数据
|
|
max_pages = 100000
|
|
page = 1
|
|
codes = []
|
|
while page <= max_pages:
|
|
while True:
|
|
config = config_template
|
|
config['params']['pn'] = page
|
|
config['params']['cb'] = cb
|
|
config['params']['fs'] = fs
|
|
config['params']['fields'] = 'f12,f14'
|
|
|
|
data = fetch_data(config)
|
|
if data:
|
|
break
|
|
if page == 1:
|
|
total = data.get('total', 1000000)
|
|
pz = int(config['params']['pz'])
|
|
max_pages = (int(total) + pz - 1) // pz
|
|
page += 1
|
|
|
|
for row in data.get('diff', []):
|
|
code = row['f12']
|
|
name = row['f14']
|
|
codes.append({'code': code, 'name': name})
|
|
return codes
|
|
|
|
|
|
if __name__ == "__main__":
|
|
config = em_market_config['hk_connect']
|
|
all_data = []
|
|
data = get_market_codes(config['fs'], config['cb'])
|
|
if data:
|
|
all_data.extend(data)
|
|
|
|
print(json.dumps(all_data, indent=4, ensure_ascii=False))
|
|
print(f'total codes: {len(all_data)}') |