217 lines
8.6 KiB
Python
217 lines
8.6 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding:utf-8 -*-
|
||
"""
|
||
Date: 2022/11/19 12:00
|
||
Desc: 新浪财经-龙虎榜
|
||
https://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml
|
||
"""
|
||
import pandas as pd
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
from tqdm import tqdm
|
||
|
||
|
||
def stock_lhb_detail_daily_sina(
|
||
trade_date: str = "20200730", symbol: str = "当日无价格涨跌幅限制的A股,出现异常波动停牌的股票"
|
||
) -> pd.DataFrame:
|
||
"""
|
||
龙虎榜-每日详情
|
||
http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml
|
||
:param trade_date: 交易日, e.g., trade_date="20200729"
|
||
:type trade_date: str
|
||
:param symbol: 指定标题
|
||
:type symbol: str
|
||
:return: 龙虎榜-每日详情
|
||
:rtype: pandas.DataFrame
|
||
"""
|
||
trade_date = "-".join([trade_date[:4], trade_date[4:6], trade_date[6:]])
|
||
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml"
|
||
params = {"tradedate": trade_date}
|
||
r = requests.get(url, params=params)
|
||
soup = BeautifulSoup(r.text, "lxml")
|
||
table_name_list = [
|
||
item.get_text().strip()
|
||
for item in soup.find_all(
|
||
"span", attrs={"style": "font-weight:bold;font-size:14px;"}
|
||
)
|
||
if item.get_text().strip() != ""
|
||
]
|
||
if symbol == "返回当前交易日所有可查询的指标":
|
||
return table_name_list
|
||
else:
|
||
position_num = table_name_list.index(symbol)
|
||
if len(table_name_list) == position_num + 1:
|
||
temp_df_1 = pd.read_html(r.text, flavor='bs4', header=1)[position_num].iloc[0:, :]
|
||
temp_df_2 = pd.read_html(r.text, flavor='bs4', header=1)[position_num + 1].iloc[0:, :]
|
||
temp_df_3 = pd.read_html(r.text, flavor='bs4', header=1)[position_num + 2].iloc[0:, :]
|
||
temp_df = pd.concat([temp_df_1, temp_df_2, temp_df_3], ignore_index=True)
|
||
else:
|
||
temp_df = pd.read_html(r.text, flavor='bs4', header=1)[position_num].iloc[0:, :]
|
||
temp_df["股票代码"] = temp_df["股票代码"].astype(str).str.zfill(6)
|
||
del temp_df["查看详情"]
|
||
temp_df.columns = ["序号", "股票代码", "股票名称", "收盘价", "对应值", "成交量", "成交额"]
|
||
return temp_df
|
||
|
||
|
||
def _find_last_page(url: str = None, recent_day: str = "60"):
|
||
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml"
|
||
params = {
|
||
"last": recent_day,
|
||
"p": "1",
|
||
}
|
||
r = requests.get(url, params=params)
|
||
soup = BeautifulSoup(r.text, "lxml")
|
||
try:
|
||
previous_page = int(soup.find_all(attrs={"class": "page"})[-2].text)
|
||
except:
|
||
previous_page = 1
|
||
if previous_page != 1:
|
||
while True:
|
||
params = {
|
||
"last": recent_day,
|
||
"p": previous_page,
|
||
}
|
||
r = requests.get(url, params=params)
|
||
soup = BeautifulSoup(r.text, "lxml")
|
||
last_page = int(soup.find_all(attrs={"class": "page"})[-2].text)
|
||
if last_page != previous_page:
|
||
previous_page = last_page
|
||
continue
|
||
else:
|
||
break
|
||
return previous_page
|
||
|
||
|
||
def stock_lhb_ggtj_sina(recent_day: str = "30") -> pd.DataFrame:
|
||
"""
|
||
龙虎榜-个股上榜统计
|
||
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml
|
||
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
|
||
:type recent_day: str
|
||
:return: 龙虎榜-每日详情
|
||
:rtype: pandas.DataFrame
|
||
"""
|
||
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml"
|
||
last_page_num = _find_last_page(url, recent_day)
|
||
big_df = pd.DataFrame()
|
||
for page in tqdm(range(1, last_page_num + 1), leave=False):
|
||
params = {
|
||
"last": recent_day,
|
||
"p": page,
|
||
}
|
||
r = requests.get(url, params=params)
|
||
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
|
||
big_df = pd.concat([big_df, temp_df], ignore_index=True)
|
||
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
|
||
big_df.columns = ["股票代码", "股票名称", "上榜次数", "累积购买额", "累积卖出额", "净额", "买入席位数", "卖出席位数"]
|
||
return big_df
|
||
|
||
|
||
def stock_lhb_yytj_sina(recent_day: str = "5") -> pd.DataFrame:
|
||
"""
|
||
龙虎榜-营业部上榜统计
|
||
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml
|
||
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
|
||
:type recent_day: str
|
||
:return: 龙虎榜-营业部上榜统计
|
||
:rtype: pandas.DataFrame
|
||
"""
|
||
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml"
|
||
last_page_num = _find_last_page(url, recent_day)
|
||
big_df = pd.DataFrame()
|
||
for page in tqdm(range(1, last_page_num + 1), leave=False):
|
||
params = {
|
||
"last": "5",
|
||
"p": page,
|
||
}
|
||
r = requests.get(url, params=params)
|
||
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
|
||
big_df = pd.concat([big_df, temp_df], ignore_index=True)
|
||
big_df.columns = ["营业部名称", "上榜次数", "累积购买额", "买入席位数", "累积卖出额", "卖出席位数", "买入前三股票"]
|
||
big_df['上榜次数'] = pd.to_numeric(big_df['上榜次数'], errors="coerce")
|
||
big_df['买入席位数'] = pd.to_numeric(big_df['买入席位数'], errors="coerce")
|
||
big_df['卖出席位数'] = pd.to_numeric(big_df['卖出席位数'], errors="coerce")
|
||
return big_df
|
||
|
||
|
||
def stock_lhb_jgzz_sina(recent_day: str = "5") -> pd.DataFrame:
|
||
"""
|
||
龙虎榜-机构席位追踪
|
||
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml
|
||
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
|
||
:type recent_day: str
|
||
:return: 龙虎榜-机构席位追踪
|
||
:rtype: pandas.DataFrame
|
||
"""
|
||
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml"
|
||
last_page_num = _find_last_page(url, recent_day)
|
||
big_df = pd.DataFrame()
|
||
for page in tqdm(range(1, last_page_num + 1), leave=False):
|
||
params = {
|
||
"last": recent_day,
|
||
"p": page,
|
||
}
|
||
r = requests.get(url, params=params)
|
||
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
|
||
big_df = pd.concat([big_df, temp_df], ignore_index=True)
|
||
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
|
||
del big_df["当前价"]
|
||
del big_df["涨跌幅"]
|
||
big_df.columns = ["股票代码", "股票名称", "累积买入额", "买入次数", "累积卖出额", "卖出次数", "净额"]
|
||
big_df['买入次数'] = pd.to_numeric(big_df['买入次数'], errors="coerce")
|
||
big_df['卖出次数'] = pd.to_numeric(big_df['卖出次数'], errors="coerce")
|
||
return big_df
|
||
|
||
|
||
def stock_lhb_jgmx_sina() -> pd.DataFrame:
|
||
"""
|
||
龙虎榜-机构席位成交明细
|
||
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml
|
||
:return: 龙虎榜-机构席位成交明细
|
||
:rtype: pandas.DataFrame
|
||
"""
|
||
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml"
|
||
params = {
|
||
"p": "1",
|
||
}
|
||
r = requests.get(url, params=params)
|
||
soup = BeautifulSoup(r.text, "lxml")
|
||
try:
|
||
last_page_num = int(soup.find_all(attrs={"class": "page"})[-2].text)
|
||
except:
|
||
last_page_num = 1
|
||
big_df = pd.DataFrame()
|
||
for page in tqdm(range(1, last_page_num + 1), leave=False):
|
||
params = {
|
||
"p": page,
|
||
}
|
||
r = requests.get(url, params=params)
|
||
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
|
||
big_df = pd.concat([big_df, temp_df], ignore_index=True)
|
||
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
|
||
return big_df
|
||
|
||
|
||
if __name__ == "__main__":
|
||
indicator_name_list = stock_lhb_detail_daily_sina(
|
||
trade_date="20221118", symbol="返回当前交易日所有可查询的指标"
|
||
)
|
||
print(indicator_name_list)
|
||
|
||
stock_lhb_detail_daily_sina_df = stock_lhb_detail_daily_sina(
|
||
trade_date="20221118", symbol="换手率达20%的证券"
|
||
)
|
||
print(stock_lhb_detail_daily_sina_df)
|
||
|
||
stock_lhb_ggtj_sina_df = stock_lhb_ggtj_sina(recent_day="60")
|
||
print(stock_lhb_ggtj_sina_df)
|
||
|
||
stock_lhb_yytj_sina_df = stock_lhb_yytj_sina(recent_day="60")
|
||
print(stock_lhb_yytj_sina_df)
|
||
|
||
stock_lhb_jgzz_sina_df = stock_lhb_jgzz_sina(recent_day="30")
|
||
print(stock_lhb_jgzz_sina_df)
|
||
|
||
stock_lhb_jgmx_sina_df = stock_lhb_jgmx_sina()
|
||
print(stock_lhb_jgmx_sina_df)
|