Files
stock/stockapp/src/crawling/stock_lhb_sina.py
2024-10-23 12:11:11 +08:00

217 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/11/19 12:00
Desc: 新浪财经-龙虎榜
https://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml
"""
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
def stock_lhb_detail_daily_sina(
trade_date: str = "20200730", symbol: str = "当日无价格涨跌幅限制的A股出现异常波动停牌的股票"
) -> pd.DataFrame:
"""
龙虎榜-每日详情
http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml
:param trade_date: 交易日, e.g., trade_date="20200729"
:type trade_date: str
:param symbol: 指定标题
:type symbol: str
:return: 龙虎榜-每日详情
:rtype: pandas.DataFrame
"""
trade_date = "-".join([trade_date[:4], trade_date[4:6], trade_date[6:]])
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/lhb/index.phtml"
params = {"tradedate": trade_date}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
table_name_list = [
item.get_text().strip()
for item in soup.find_all(
"span", attrs={"style": "font-weight:bold;font-size:14px;"}
)
if item.get_text().strip() != ""
]
if symbol == "返回当前交易日所有可查询的指标":
return table_name_list
else:
position_num = table_name_list.index(symbol)
if len(table_name_list) == position_num + 1:
temp_df_1 = pd.read_html(r.text, flavor='bs4', header=1)[position_num].iloc[0:, :]
temp_df_2 = pd.read_html(r.text, flavor='bs4', header=1)[position_num + 1].iloc[0:, :]
temp_df_3 = pd.read_html(r.text, flavor='bs4', header=1)[position_num + 2].iloc[0:, :]
temp_df = pd.concat([temp_df_1, temp_df_2, temp_df_3], ignore_index=True)
else:
temp_df = pd.read_html(r.text, flavor='bs4', header=1)[position_num].iloc[0:, :]
temp_df["股票代码"] = temp_df["股票代码"].astype(str).str.zfill(6)
del temp_df["查看详情"]
temp_df.columns = ["序号", "股票代码", "股票名称", "收盘价", "对应值", "成交量", "成交额"]
return temp_df
def _find_last_page(url: str = None, recent_day: str = "60"):
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml"
params = {
"last": recent_day,
"p": "1",
}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
try:
previous_page = int(soup.find_all(attrs={"class": "page"})[-2].text)
except:
previous_page = 1
if previous_page != 1:
while True:
params = {
"last": recent_day,
"p": previous_page,
}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
last_page = int(soup.find_all(attrs={"class": "page"})[-2].text)
if last_page != previous_page:
previous_page = last_page
continue
else:
break
return previous_page
def stock_lhb_ggtj_sina(recent_day: str = "30") -> pd.DataFrame:
"""
龙虎榜-个股上榜统计
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
:type recent_day: str
:return: 龙虎榜-每日详情
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/ggtj/index.phtml"
last_page_num = _find_last_page(url, recent_day)
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"last": recent_day,
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
big_df.columns = ["股票代码", "股票名称", "上榜次数", "累积购买额", "累积卖出额", "净额", "买入席位数", "卖出席位数"]
return big_df
def stock_lhb_yytj_sina(recent_day: str = "5") -> pd.DataFrame:
"""
龙虎榜-营业部上榜统计
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
:type recent_day: str
:return: 龙虎榜-营业部上榜统计
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/yytj/index.phtml"
last_page_num = _find_last_page(url, recent_day)
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"last": "5",
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.columns = ["营业部名称", "上榜次数", "累积购买额", "买入席位数", "累积卖出额", "卖出席位数", "买入前三股票"]
big_df['上榜次数'] = pd.to_numeric(big_df['上榜次数'], errors="coerce")
big_df['买入席位数'] = pd.to_numeric(big_df['买入席位数'], errors="coerce")
big_df['卖出席位数'] = pd.to_numeric(big_df['卖出席位数'], errors="coerce")
return big_df
def stock_lhb_jgzz_sina(recent_day: str = "5") -> pd.DataFrame:
"""
龙虎榜-机构席位追踪
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml
:param recent_day: choice of {"5": 最近 5 天; "10": 最近 10 天; "30": 最近 30 天; "60": 最近 60 天;}
:type recent_day: str
:return: 龙虎榜-机构席位追踪
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgzz/index.phtml"
last_page_num = _find_last_page(url, recent_day)
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"last": recent_day,
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
del big_df["当前价"]
del big_df["涨跌幅"]
big_df.columns = ["股票代码", "股票名称", "累积买入额", "买入次数", "累积卖出额", "卖出次数", "净额"]
big_df['买入次数'] = pd.to_numeric(big_df['买入次数'], errors="coerce")
big_df['卖出次数'] = pd.to_numeric(big_df['卖出次数'], errors="coerce")
return big_df
def stock_lhb_jgmx_sina() -> pd.DataFrame:
"""
龙虎榜-机构席位成交明细
http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml
:return: 龙虎榜-机构席位成交明细
:rtype: pandas.DataFrame
"""
url = "http://vip.stock.finance.sina.com.cn/q/go.php/vLHBData/kind/jgmx/index.phtml"
params = {
"p": "1",
}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
try:
last_page_num = int(soup.find_all(attrs={"class": "page"})[-2].text)
except:
last_page_num = 1
big_df = pd.DataFrame()
for page in tqdm(range(1, last_page_num + 1), leave=False):
params = {
"p": page,
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text)[0].iloc[0:, :]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
return big_df
if __name__ == "__main__":
indicator_name_list = stock_lhb_detail_daily_sina(
trade_date="20221118", symbol="返回当前交易日所有可查询的指标"
)
print(indicator_name_list)
stock_lhb_detail_daily_sina_df = stock_lhb_detail_daily_sina(
trade_date="20221118", symbol="换手率达20%的证券"
)
print(stock_lhb_detail_daily_sina_df)
stock_lhb_ggtj_sina_df = stock_lhb_ggtj_sina(recent_day="60")
print(stock_lhb_ggtj_sina_df)
stock_lhb_yytj_sina_df = stock_lhb_yytj_sina(recent_day="60")
print(stock_lhb_yytj_sina_df)
stock_lhb_jgzz_sina_df = stock_lhb_jgzz_sina(recent_day="30")
print(stock_lhb_jgzz_sina_df)
stock_lhb_jgmx_sina_df = stock_lhb_jgmx_sina()
print(stock_lhb_jgmx_sina_df)