准备工作
1. 获取专属token
- 在tushare官网
https://tushare.pro/
注册 - 注册成功后获取专属的token
https://tushare.pro/user/token
2. 安装tushare
# 使用 pip 进行安装
pip install tushare
在国内使用pip安装Python库时,由于原服务器在国外,下载速度可能会很慢,因此需要使用国内的镜像源进行下载安装。以下是一些常用的pip国内镜像源:
- 清华大学镜像源:
https://pypi.tuna.tsinghua.edu.cn/simple
- 阿里云镜像源:
https://mirrors.aliyun.com/pypi/simple
- 豆瓣镜像源:
https://pypi.douban.com/simple
- 中科大镜像源:
https://pypi.mirrors.ustc.edu.cn/simple
# 比如使用豆瓣镜像源下载
pip install tushare -i https://pypi.douban.com/simple
正式使用
注: 本文使用Python SDK进行获取数据
1. 获取股票列表数据
1.1 输入参数
名称 | 类型 | 必选 | 描述 |
---|---|---|---|
is_hs | str | N | 是否沪深港通标的,N否 H沪股通 S深股通 |
list_status | str | N | 上市状态 L上市 D退市 P暂停上市,默认是L |
exchange | str | N | 交易所 SSE上交所 SZSE深交所 BSE北交所 |
ts_code | str | N | TS股票代码 |
market | str | N | 市场类别 (主板/创业板/科创板/CDR/北交所) |
limit | int | N | |
offset | int | N | |
name | str | N | 名称 |
1.2 输出参数
名称 | 类型 | 默认显示 | 描述 |
---|---|---|---|
ts_code | str | Y | TS代码 |
symbol | str | Y | 股票代码 |
name | str | Y | 股票名称 |
area | str | Y | 地域 |
industry | str | Y | 所属行业 |
fullname | str | N | 股票全称 |
enname | str | N | 英文全称 |
cnspell | str | N | 拼音缩写 |
market | str | Y | 市场类型(主板/创业板/科创板/CDR) |
exchange | str | N | 交易所代码 |
curr_type | str | N | 交易货币 |
list_status | str | N | 上市状态 L上市 D退市 P暂停上市 |
list_date | str | Y | 上市日期 |
delist_date | str | N | 退市日期 |
is_hs | str | N | 是否沪深港通标的,N否 H沪股通 S深股通 |
1.3 获取数据
import tushare as ts
if __name__ == '__main__':
# token为第一步获取的token
pro = ts.pro_api('token')
df = pro.stock_basic(**{
"ts_code": "",
"name": "",
"exchange": "",
"market": "",
"is_hs": "",
"list_status": "",
"limit": "",
"offset": ""
}, fields=[
"ts_code",
"symbol",
"name",
"area",
"industry",
"market",
"list_date",
"fullname",
"enname",
"cnspell",
"exchange",
"curr_type",
"list_status",
"delist_date",
"is_hs"
])
print(df)
1.4 保存数据到MongoDB数据库
1.4.1 安装MongoDB
1.4.1.1 linux安装MongoDB
1、下载安装包
curl -O https://fastdl.mongodb.org/linux/mongodb-linux-x86_64-3.2.12.tgz
2、解压
tar -zxvf mongodb-linux-x86_64-3.2.12.tgz
3、移动到指定位置
mv mongodb-linux-x86_64-3.2.12/ /usr/local/mongodb
4、在/usr/local/mongodb下创建文件夹
mkdir -p /data/db
mkdir /logs
5、在/usr/local/mongodb/bin下新建配置
vi mongodb.conf
dbpath = /data/db
logpath = /data/logs/mongodb.log
port = 27017
fork = true
nohttpinterface = true
auth=true
bind_ip=0.0.0.0
6、环境变量配置
vi /etc/profile
export MONGODB_HOME=/usr/local/mongodb
export PATH=$PATH:$MONGODB_HOME/bin
保存后,重启系统配置
source /etc/profile
保存后,重启系统配置
source /etc/profile
7、启动
在/usr/local/mongodb/bin下
mongod -f mongodb.conf 或 ./mongod -f mongodb.conf
8、关闭
mongod -f ./mongodb.conf --shutdown 或./mongod -f ./mongodb.conf --shutdown
1.4.1.2 Window安装MongoDB
window安装MongoDB可以参考以下文章
https://www.jianshu.com/p/71929a1606fc
1.4.2 安装Pymongo
pip install pymongo -i https://pypi.douban.com/simple
Python操作数据库之MongoDB可以参考以下文章
https://www.jianshu.com/p/71929a1606fc
1.4.3 保存数据
import tushare as ts
from pymongo import MongoClient
class MongoDBClient:
def __init__(self):
self.host = 'localhost'
self.port = 27017
self.client = MongoClient(host=self.host, port=self.port)
class TushareApi:
def __init__(self):
# token为第一步获取的token
self.pro = ts.pro_api('token')
def insert_dataframe_to_mongodb(client, df):
db = client.client['tushare']
collection = db['stock_basic']
collection.insert_many(df.to_dict('records'))
def retrieve_and_insert_stock_data(api, client):
df = api.pro.stock_basic(**{
"ts_code": "",
"name": "",
"exchange": "",
"market": "",
"is_hs": "",
"list_status": "",
"limit": "",
"offset": ""
}, fields=[
"ts_code",
"symbol",
"name",
"area",
"industry",
"market",
"list_date",
"fullname",
"enname",
"cnspell",
"exchange",
"curr_type",
"list_status",
"delist_date",
"is_hs"
])
insert_dataframe_to_mongodb(client, df)
if __name__ == '__main__':
mongo_client = MongoDBClient()
tushare_api = TushareApi()
retrieve_and_insert_stock_data(tushare_api, mongo_client)
爬取后数据如下:总共获得5132条股票数据
1.5 保存数据到Postgresql数据库
1.5.1 安装Postgre数据库
略
1.5.2 安装psycopg2
pip install psycopg2 -i https://pypi.douban.com/simple
1.5.3 创建对应的表
CREATE TABLE "public"."stock_basic" (
"ts_code" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
"symbol" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"name" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
"area" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
"industry" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
"fullname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
"enname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
"cnspell" VARCHAR ( 50 ) COLLATE "pg_catalog"."default" NOT NULL,
"market" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"exchange" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
"curr_type" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
"list_status" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
"list_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"delist_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"is_hs" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
CONSTRAINT "stock_basic_pkey" PRIMARY KEY ( "ts_code" )
);
ALTER TABLE "public"."stock_basic" OWNER TO "postgres";
COMMENT ON COLUMN "public"."stock_basic"."ts_code" IS 'TS代码';
COMMENT ON COLUMN "public"."stock_basic"."symbol" IS '股票代码';
COMMENT ON COLUMN "public"."stock_basic"."name" IS '股票名称';
COMMENT ON COLUMN "public"."stock_basic"."area" IS '地域';
COMMENT ON COLUMN "public"."stock_basic"."industry" IS '所属行业';
COMMENT ON COLUMN "public"."stock_basic"."fullname" IS '股票全称';
COMMENT ON COLUMN "public"."stock_basic"."enname" IS '英文全称';
COMMENT ON COLUMN "public"."stock_basic"."cnspell" IS '拼音缩写';
COMMENT ON COLUMN "public"."stock_basic"."market" IS '市场类型';
COMMENT ON COLUMN "public"."stock_basic"."exchange" IS '交易所代码';
COMMENT ON COLUMN "public"."stock_basic"."curr_type" IS '交易货币';
COMMENT ON COLUMN "public"."stock_basic"."list_status" IS '上市状态 L上市 D退市 P暂停上市';
COMMENT ON COLUMN "public"."stock_basic"."list_date" IS '上市日期';
COMMENT ON COLUMN "public"."stock_basic"."delist_date" IS '退市日期';
COMMENT ON COLUMN "public"."stock_basic"."is_hs" IS '是否沪深港通标的,N否 H沪股通 S深股通';
1.5.4 保存数据
1.5.4.1 execute逐行插入
import tushare as ts
import psycopg2
class PostgreSQLClient:
def __init__(self):
self.host = 'localhost'
self.port = 5432
self.user = 'postgres'
self.password = '123456'
self.database = 'tushare'
self.conn = psycopg2.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database
)
class TushareApi:
def __init__(self):
# token为第一步获取的token
self.pro = ts.pro_api('token')
def insert_dataframe_to_postgresql(client, df):
cur = client.conn.cursor()
for index, row in df.iterrows():
cur.execute(
"INSERT INTO stock_basic (ts_code, symbol, name, area, industry, market, list_date, list_status, fullname) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
(row['ts_code'], row['symbol'], row['name'], row['area'], row['industry'], row['market'], row['list_date'],
row['list_status'], row['fullname'])
)
client.conn.commit()
cur.close()
def retrieve_and_insert_stock_data(api, client):
df = api.pro.stock_basic(**{
"ts_code": "",
"name": "",
"exchange": "",
"market": "",
"is_hs": "",
"list_status": "",
"limit": "",
"offset": ""
}, fields=[
"ts_code",
"symbol",
"name",
"area",
"industry",
"market",
"list_date",
"fullname",
"enname",
"cnspell",
"exchange",
"curr_type",
"list_status",
"delist_date",
"is_hs"
])
insert_dataframe_to_postgresql(client, df)
if __name__ == '__main__':
postgresql_client = PostgreSQLClient()
tushare_api = TushareApi()
retrieve_and_insert_stock_data(tushare_api, postgresql_client)
1.5.4.2 execute_values批量插入
- 减少开销:使用 execute_values 批量插入多行数据时,执行许多单独的插入语句的开销会减少。当插入大量数据时,这可以带来显著的性能提升。
- 简化代码:使用 execute_values 可以简化代码,减少插入数据到数据库所需的样板代码。这可以使代码更易于阅读和维护。
- 改进错误处理:execute_values 提供比逐行插入更好的错误处理,因为它可以更有效地检测和报告错误。这可以帮助更快地识别和解决问题。
- 总的来说,使用 execute_values 是提高代码效率和可读性的好方法,特别是在处理大量数据时。
values = [tuple(row) for row in df.to_numpy()]
execute_values(cur,
"INSERT INTO stock_basic (ts_code, symbol, name, area, industry, fullname, enname, cnspell, market, exchange, curr_type, list_status, list_date, delist_date, is_hs) VALUES %s",
values)
也可以执行插入列的顺序
columns = ["ts_code", "symbol", "name", "area", "industry", "fullname", "enname", "cnspell", "market", "exchange", "curr_type", "list_status", "list_date", "delist_date", "is_hs"]
values = [tuple(row[col] for col in columns) for _, row in df.iterrows()]
query = "INSERT INTO stock_basic ({}) VALUES %s".format(", ".join(columns))
execute_values(cur, query, values)
注:遇到某列超长时可能会导致插入失败,详细错误如下:psycopg2.errors.StringDataRightTruncation: 错误: 对于可变字符类型来说,值太长了(50)
可以使用以下代码检测哪一列过长
# 执行过程中, 判断那些列超长
for index, row in df.iterrows():
for col in row.index:
value = str(row[col])
if len(value) > 50:
print("too long", col, value)
完整代码如下
import tushare as ts
import psycopg2
from psycopg2.extras import execute_values
class PostgreSQLClient:
def __init__(self):
self.host = 'localhost'
self.port = 5432
self.user = 'postgres'
self.password = '123456'
self.database = 'tushare'
self.conn = psycopg2.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database
)
class TushareApi:
def __init__(self):
# token为第一步获取的token
self.pro = ts.pro_api('token')
def insert_dataframe_to_postgresql(client, df):
cur = client.conn.cursor()
# 执行过程中, 判断那些列超长
# for index, row in df.iterrows():
# for col in row.index:
# value = str(row[col])
# if len(value) > 50:
# print("too long", col, value)
# execute逐行插入,后面改为execute_values批量插入
# 减少开销:使用 execute_values 批量插入多行数据时,执行许多单独的插入语句的开销会减少。当插入大量数据时,这可以带来显著的性能提升。
# 简化代码:使用 execute_values 可以简化代码,减少插入数据到数据库所需的样板代码。这可以使代码更易于阅读和维护。
# 改进错误处理:execute_values 提供比逐行插入更好的错误处理,因为它可以更有效地检测和报告错误。这可以帮助更快地识别和解决问题。
# 总的来说,使用 execute_values 是提高代码效率和可读性的好方法,特别是在处理大量数据时。
# for index, row in df.iterrows():
# cur.execute(
# "INSERT INTO stock_basic (ts_code, symbol, name, area, industry, market, list_date, list_status, fullname) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
# (row['ts_code'], row['symbol'], row['name'], row['area'], row['industry'], row['market'], row['list_date'],
# row['list_status'], row['fullname'])
# )
# 先指定插入顺序,并拼接SQL
# 首先定义了要插入的列的顺序。然后,我们使用列表推导式将每一行的值转换为元组,并将这些元组存储在values列表中。
# 接下来,我们使用join方法将列名连接为一个字符串,并将其插入到SQL查询中
# columns = ["ts_code", "symbol", "name", "area", "industry", "fullname", "enname", "cnspell", "market", "exchange", "curr_type", "list_status", "list_date", "delist_date", "is_hs"]
# values = [tuple(row[col] for col in columns) for _, row in df.iterrows()]
# query = "INSERT INTO stock_basic ({}) VALUES %s".format(", ".join(columns))
# execute_values(cur, query, values)
values = [tuple(row) for row in df.to_numpy()]
execute_values(cur,
"INSERT INTO stock_basic (ts_code, symbol, name, area, industry, fullname, enname, cnspell, market, exchange, curr_type, list_status, list_date, delist_date, is_hs) VALUES %s",
values)
client.conn.commit()
cur.close()
def retrieve_and_insert_stock_data(api, client):
df = api.pro.stock_basic(**{
"ts_code": "",
"name": "",
"exchange": "",
"market": "",
"is_hs": "",
"list_status": "",
"limit": "",
"offset": ""
}, fields=[
"ts_code",
"symbol",
"name",
"area",
"industry",
"market",
"list_date",
"fullname",
"enname",
"cnspell",
"exchange",
"curr_type",
"list_status",
"delist_date",
"is_hs"
])
insert_dataframe_to_postgresql(client, df)
if __name__ == '__main__':
postgresql_client = PostgreSQLClient()
tushare_api = TushareApi()
retrieve_and_insert_stock_data(tushare_api, postgresql_client)
爬取后数据如下:总共获得5132条股票数据
2. 获取交易日数据
2.1 输入参数
名称 | 类型 | 必选 | 描述 |
---|---|---|---|
exchange | str | N | 交易所 SSE上交所,SZSE深交所,CFFEX 中金所,SHFE 上期所,CZCE 郑商所,DCE 大商所,INE 上能源 |
start_date | str | N | 开始日期 (格式:YYYYMMDD 下同) |
end_date | str | N | 结束日期 |
is_open | str | N | 是否交易 '0'休市 '1'交易 |
2.2 输出参数
名称 | 类型 | 默认显示 | 描述 |
---|---|---|---|
exchange | str | Y | 交易所 SSE上交所 SZSE深交所 |
cal_date | str | Y | 日历日期 |
is_open | str | Y | 是否交易 0休市 1交易 |
pretrade_date | str | Y | 上一个交易日 |
2.3 创建对应的表
CREATE TABLE "public"."trade_cal" (
"id" int4 NOT NULL DEFAULT nextval( 'daily_id_seq' :: regclass ),
"exchange" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
"cal_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
"is_open" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
"pretrade_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
CONSTRAINT "trade_pkey" PRIMARY KEY ( "id" )
);
ALTER TABLE "public"."trade_cal" OWNER TO "postgres";
COMMENT ON COLUMN "public"."trade_cal"."exchange" IS '交易所 SSE上交所 SZSE深交所';
COMMENT ON COLUMN "public"."trade_cal"."cal_date" IS '日历日期';
COMMENT ON COLUMN "public"."trade_cal"."is_open" IS '是否交易 0休市 1交易';
COMMENT ON COLUMN "public"."trade_cal"."pretrade_date" IS '上一个交易日';
2.4 获取数据并保存到Postgresql数据库
import tushare as ts
import psycopg2
from psycopg2.extras import execute_values
class PostgreSQLClient:
def __init__(self):
self.host = 'localhost'
self.port = 5432
self.user = 'postgres'
self.password = '123456'
self.database = 'tushare'
self.conn = psycopg2.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database
)
class TushareApi:
def __init__(self):
# token为第一步获取的token
self.pro = ts.pro_api('token')
def insert_dataframe_to_postgresql(client, df):
cur = client.conn.cursor()
values = [tuple(row) for row in df.to_numpy()]
execute_values(cur,
"INSERT INTO trade_cal (exchange, cal_date, is_open, pretrade_date) VALUES %s",
values)
client.conn.commit()
cur.close()
def get_trade_cal(api, client):
df = api.pro.trade_cal(**{
"exchange": "",
"cal_date": "",
"start_date": "",
"end_date": "",
"is_open": 1,
"limit": "",
"offset": ""
}, fields=[
"exchange",
"cal_date",
"is_open",
"pretrade_date"
])
insert_dataframe_to_postgresql(client, df)
if __name__ == '__main__':
postgresql_client = PostgreSQLClient()
tushare_api = TushareApi()
get_trade_cal(tushare_api, postgresql_client)
爬取后数据如下:总共获得7644条交易日数据
3. 获取股票日线行情数据
3.1 输入参数
名称 | 类型 | 必选 | 描述 |
---|---|---|---|
ts_code | str | N | 股票代码(支持多个股票同时提取,逗号分隔) |
trade_date | str | N | 交易日期(YYYYMMDD) |
start_date | str | N | 开始日期(YYYYMMDD) |
end_date | str | N | 结束日期(YYYYMMDD) |
注:日期都填YYYYMMDD格式,比如20181010
3.2 输出参数
名称 | 类型 | 描述 |
---|---|---|
ts_code | str | 股票代码 |
trade_date | str | 交易日期 |
open | float | 开盘价 |
high | float | 最高价 |
low | float | 最低价 |
close | float | 收盘价 |
pre_close | float | 昨收价(前复权) |
change | float | 涨跌额 |
pct_chg | float | 涨跌幅 (未复权,如果是复权请用 通用行情接口 ) |
vol | float | 成交量 (手) |
amount | float | 成交额 (千元) |
3.3 创建对应的表
CREATE TABLE "public"."stock_basic" (
"ts_code" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
"symbol" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"name" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
"area" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
"industry" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
"fullname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
"enname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
"cnspell" VARCHAR ( 50 ) COLLATE "pg_catalog"."default" NOT NULL,
"market" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"exchange" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
"curr_type" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
"list_status" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
"list_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"delist_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
"is_hs" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
CONSTRAINT "stock_basic_pkey" PRIMARY KEY ( "ts_code" )
);
ALTER TABLE "public"."stock_basic" OWNER TO "postgres";
COMMENT ON COLUMN "public"."stock_basic"."ts_code" IS 'TS代码';
COMMENT ON COLUMN "public"."stock_basic"."symbol" IS '股票代码';
COMMENT ON COLUMN "public"."stock_basic"."name" IS '股票名称';
COMMENT ON COLUMN "public"."stock_basic"."area" IS '地域';
COMMENT ON COLUMN "public"."stock_basic"."industry" IS '所属行业';
COMMENT ON COLUMN "public"."stock_basic"."fullname" IS '股票全称';
COMMENT ON COLUMN "public"."stock_basic"."enname" IS '英文全称';
COMMENT ON COLUMN "public"."stock_basic"."cnspell" IS '拼音缩写';
COMMENT ON COLUMN "public"."stock_basic"."market" IS '市场类型';
COMMENT ON COLUMN "public"."stock_basic"."exchange" IS '交易所代码';
COMMENT ON COLUMN "public"."stock_basic"."curr_type" IS '交易货币';
COMMENT ON COLUMN "public"."stock_basic"."list_status" IS '上市状态 L上市 D退市 P暂停上市';
COMMENT ON COLUMN "public"."stock_basic"."list_date" IS '上市日期';
COMMENT ON COLUMN "public"."stock_basic"."delist_date" IS '退市日期';
COMMENT ON COLUMN "public"."stock_basic"."is_hs" IS '是否沪深港通标的,N否 H沪股通 S深股通';
3.4 注意
tushare短时间内大量请求,会出现报错,提示远程主机强迫关闭了一个现有的连接,报错详情如下
解决方法
- 可以在每个请求之前加入一定的延时
- 该接口是由日期倒序执行的,可以在报错之后,在end_date上减去一天 比如在20201231日接口报错终止,则修改end_date为20201230,然后重启程序
- 添加重试机制
3.1 固定重试3次定义
import time def retry(func): def wrapper(*args, **kwargs): for i in range(3): result = func(*args, **kwargs) if result is not None: return result time.sleep(1) return None return wrapper
引用
@retry def my_function(): # do something that might fail return None
3.2 自定义重试N次
定义
def retry(n): def decorator(func): def wrapper(*args, **kwargs): for i in range(n): result = func(*args, **kwargs) if result is not None: return result time.sleep(n) return None return wrapper return decorator
使用
@retry(3) def my_function(): # do something that might fail return None
3.4 获取数据并保存到Postgresql数据库
import tushare as ts
import psycopg2
import time
from psycopg2.extras import execute_values
class PostgreSQLClient:
def __init__(self):
self.host = 'localhost'
self.port = 5432
self.user = 'postgres'
self.password = '123456'
self.database = 'tushare'
self.conn = psycopg2.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database
)
class TushareApi:
def __init__(self):
# token为第一步获取的token
self.pro = ts.pro_api('token')
def retry(n):
def decorator(func):
def wrapper(*args, **kwargs):
for i in range(n):
result = func(*args, **kwargs)
if result is not None:
return result
time.sleep(n)
return None
return wrapper
return decorator
def insert_dataframe_to_postgresql(client, df):
cur = client.conn.cursor()
values = [tuple(row) for row in df.to_numpy()]
execute_values(cur,
"INSERT INTO daily (ts_code, trade_date, open, high, low, close, pre_close, change, pct_chg, vol, amount) VALUES %s",
values)
client.conn.commit()
cur.close()
def get_trade_date(api, client):
df = api.pro.trade_cal(exchange='SSE', is_open='1',
start_date='',
end_date='20230330',
fields='cal_date')
for date in df['cal_date'].values:
df = get_daily(api, date)
print("date", date, df)
insert_dataframe_to_postgresql(client, df)
@retry(5)
def get_daily(api, date):
df = api.pro.daily(**{
"ts_code": "",
"trade_date": date,
"start_date": "",
"end_date": "",
"offset": "",
"limit": ""
}, fields=[
"ts_code",
"trade_date",
"open",
"high",
"low",
"close",
"pre_close",
"change",
"pct_chg",
"vol",
"amount"
])
return df
if __name__ == '__main__':
postgresql_client = PostgreSQLClient()
tushare_api = TushareApi()
get_trade_date(tushare_api, postgresql_client)
爬取后数据如下:总共获得13628761条股票数据
4.校验数据
4.1 交易日期
共有7886条数据,数据范围为:19901219 - 20230330
4.2 股票日线数据
按照交易日期分组共有7886条数据