Python使用tushare获取全部股票数据

606 阅读11分钟

准备工作

1. 获取专属token

  1. 在tushare官网https://tushare.pro/注册
  2. 注册成功后获取专属的tokenhttps://tushare.pro/user/token

image.png

2. 安装tushare

# 使用 pip 进行安装
pip install tushare

在国内使用pip安装Python库时,由于原服务器在国外,下载速度可能会很慢,因此需要使用国内的镜像源进行下载安装。以下是一些常用的pip国内镜像源:

  1. 清华大学镜像源:https://pypi.tuna.tsinghua.edu.cn/simple
  2. 阿里云镜像源:https://mirrors.aliyun.com/pypi/simple
  3. 豆瓣镜像源:https://pypi.douban.com/simple
  4. 中科大镜像源:https://pypi.mirrors.ustc.edu.cn/simple
# 比如使用豆瓣镜像源下载
pip install tushare -i https://pypi.douban.com/simple

正式使用

注: 本文使用Python SDK进行获取数据

1. 获取股票列表数据

1.1 输入参数

名称类型必选描述
is_hsstrN是否沪深港通标的,N否 H沪股通 S深股通
list_statusstrN上市状态 L上市 D退市 P暂停上市,默认是L
exchangestrN交易所 SSE上交所 SZSE深交所 BSE北交所
ts_codestrNTS股票代码
marketstrN市场类别 (主板/创业板/科创板/CDR/北交所)
limitintN
offsetintN
namestrN名称

1.2 输出参数

名称类型默认显示描述
ts_codestrYTS代码
symbolstrY股票代码
namestrY股票名称
areastrY地域
industrystrY所属行业
fullnamestrN股票全称
ennamestrN英文全称
cnspellstrN拼音缩写
marketstrY市场类型(主板/创业板/科创板/CDR)
exchangestrN交易所代码
curr_typestrN交易货币
list_statusstrN上市状态 L上市 D退市 P暂停上市
list_datestrY上市日期
delist_datestrN退市日期
is_hsstrN是否沪深港通标的,N否 H沪股通 S深股通

1.3 获取数据


import tushare as ts



if __name__ == '__main__':
    # token为第一步获取的token
    pro = ts.pro_api('token')
    df = pro.stock_basic(**{
        "ts_code": "",
        "name": "",
        "exchange": "",
        "market": "",
        "is_hs": "",
        "list_status": "",
        "limit": "",
        "offset": ""
    }, fields=[
        "ts_code",
        "symbol",
        "name",
        "area",
        "industry",
        "market",
        "list_date",
        "fullname",
        "enname",
        "cnspell",
        "exchange",
        "curr_type",
        "list_status",
        "delist_date",
        "is_hs"
    ])
    print(df)

1.4 保存数据到MongoDB数据库

1.4.1 安装MongoDB
1.4.1.1 linux安装MongoDB
1、下载安装包

 curl -O https://fastdl.mongodb.org/linux/mongodb-linux-x86_64-3.2.12.tgz

 2、解压

 tar -zxvf mongodb-linux-x86_64-3.2.12.tgz

 3、移动到指定位置

 mv  mongodb-linux-x86_64-3.2.12/ /usr/local/mongodb

 4、在/usr/local/mongodb下创建文件夹

 mkdir -p /data/db

 mkdir  /logs

 5、在/usr/local/mongodb/bin下新建配置

 vi mongodb.conf

 dbpath = /data/db
 logpath = /data/logs/mongodb.log
 port = 27017
 fork = true
 nohttpinterface = true
 auth=true

 bind_ip=0.0.0.0

 6、环境变量配置

 vi /etc/profile 

 export MONGODB_HOME=/usr/local/mongodb
 export PATH=$PATH:$MONGODB_HOME/bin

 保存后,重启系统配置

 source /etc/profile

 保存后,重启系统配置

 source /etc/profile
 7、启动

 在/usr/local/mongodb/bin下

 mongod -f mongodb.conf 或 ./mongod -f mongodb.conf

 8、关闭

 mongod -f ./mongodb.conf --shutdown  或./mongod -f ./mongodb.conf --shutdown
1.4.1.2 Window安装MongoDB

window安装MongoDB可以参考以下文章

https://www.jianshu.com/p/71929a1606fc

1.4.2 安装Pymongo
pip install pymongo -i https://pypi.douban.com/simple

Python操作数据库之MongoDB可以参考以下文章

https://www.jianshu.com/p/71929a1606fc

1.4.3 保存数据
import tushare as ts
from pymongo import MongoClient


class MongoDBClient:
    def __init__(self):
        self.host = 'localhost'
        self.port = 27017
        self.client = MongoClient(host=self.host, port=self.port)


class TushareApi:
    def __init__(self):
        # token为第一步获取的token
        self.pro = ts.pro_api('token')


def insert_dataframe_to_mongodb(client, df):
    db = client.client['tushare']
    collection = db['stock_basic']
    collection.insert_many(df.to_dict('records'))


def retrieve_and_insert_stock_data(api, client):
    df = api.pro.stock_basic(**{
        "ts_code": "",
        "name": "",
        "exchange": "",
        "market": "",
        "is_hs": "",
        "list_status": "",
        "limit": "",
        "offset": ""
    }, fields=[
        "ts_code",
        "symbol",
        "name",
        "area",
        "industry",
        "market",
        "list_date",
        "fullname",
        "enname",
        "cnspell",
        "exchange",
        "curr_type",
        "list_status",
        "delist_date",
        "is_hs"
    ])
    insert_dataframe_to_mongodb(client, df)


if __name__ == '__main__':
    mongo_client = MongoDBClient()
    tushare_api = TushareApi()
    retrieve_and_insert_stock_data(tushare_api, mongo_client)

爬取后数据如下:总共获得5132条股票数据

image.png

1.5 保存数据到Postgresql数据库

1.5.1 安装Postgre数据库

1.5.2 安装psycopg2
pip install psycopg2 -i https://pypi.douban.com/simple
1.5.3 创建对应的表
CREATE TABLE "public"."stock_basic" (
	"ts_code" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
	"symbol" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"name" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
	"area" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
	"industry" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
	"fullname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
	"enname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
	"cnspell" VARCHAR ( 50 ) COLLATE "pg_catalog"."default" NOT NULL,
	"market" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"exchange" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
	"curr_type" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
	"list_status" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
	"list_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"delist_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"is_hs" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
	CONSTRAINT "stock_basic_pkey" PRIMARY KEY ( "ts_code" ) 
);
ALTER TABLE "public"."stock_basic" OWNER TO "postgres";
COMMENT ON COLUMN "public"."stock_basic"."ts_code" IS 'TS代码';
COMMENT ON COLUMN "public"."stock_basic"."symbol" IS '股票代码';
COMMENT ON COLUMN "public"."stock_basic"."name" IS '股票名称';
COMMENT ON COLUMN "public"."stock_basic"."area" IS '地域';
COMMENT ON COLUMN "public"."stock_basic"."industry" IS '所属行业';
COMMENT ON COLUMN "public"."stock_basic"."fullname" IS '股票全称';
COMMENT ON COLUMN "public"."stock_basic"."enname" IS '英文全称';
COMMENT ON COLUMN "public"."stock_basic"."cnspell" IS '拼音缩写';
COMMENT ON COLUMN "public"."stock_basic"."market" IS '市场类型';
COMMENT ON COLUMN "public"."stock_basic"."exchange" IS '交易所代码';
COMMENT ON COLUMN "public"."stock_basic"."curr_type" IS '交易货币';
COMMENT ON COLUMN "public"."stock_basic"."list_status" IS '上市状态 L上市 D退市 P暂停上市';
COMMENT ON COLUMN "public"."stock_basic"."list_date" IS '上市日期';
COMMENT ON COLUMN "public"."stock_basic"."delist_date" IS '退市日期';
COMMENT ON COLUMN "public"."stock_basic"."is_hs" IS '是否沪深港通标的,N否 H沪股通 S深股通';
1.5.4 保存数据
1.5.4.1 execute逐行插入
import tushare as ts
import psycopg2


class PostgreSQLClient:
    def __init__(self):
        self.host = 'localhost'
        self.port = 5432
        self.user = 'postgres'
        self.password = '123456'
        self.database = 'tushare'
        self.conn = psycopg2.connect(
            host=self.host,
            port=self.port,
            user=self.user,
            password=self.password,
            database=self.database
        )


class TushareApi:
    def __init__(self):
        # token为第一步获取的token
        self.pro = ts.pro_api('token')


def insert_dataframe_to_postgresql(client, df):
    cur = client.conn.cursor()

    for index, row in df.iterrows():
        cur.execute(
            "INSERT INTO stock_basic (ts_code, symbol, name, area, industry, market, list_date, list_status, fullname) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
            (row['ts_code'], row['symbol'], row['name'], row['area'], row['industry'], row['market'], row['list_date'],
             row['list_status'], row['fullname'])
        )
    client.conn.commit()
    cur.close()


def retrieve_and_insert_stock_data(api, client):
    df = api.pro.stock_basic(**{
        "ts_code": "",
        "name": "",
        "exchange": "",
        "market": "",
        "is_hs": "",
        "list_status": "",
        "limit": "",
        "offset": ""
    }, fields=[
        "ts_code",
        "symbol",
        "name",
        "area",
        "industry",
        "market",
        "list_date",
        "fullname",
        "enname",
        "cnspell",
        "exchange",
        "curr_type",
        "list_status",
        "delist_date",
        "is_hs"
    ])

    insert_dataframe_to_postgresql(client, df)


if __name__ == '__main__':
    postgresql_client = PostgreSQLClient()
    tushare_api = TushareApi()
    retrieve_and_insert_stock_data(tushare_api, postgresql_client)
1.5.4.2 execute_values批量插入
  • 减少开销:使用 execute_values 批量插入多行数据时,执行许多单独的插入语句的开销会减少。当插入大量数据时,这可以带来显著的性能提升。
  • 简化代码:使用 execute_values 可以简化代码,减少插入数据到数据库所需的样板代码。这可以使代码更易于阅读和维护。
  • 改进错误处理:execute_values 提供比逐行插入更好的错误处理,因为它可以更有效地检测和报告错误。这可以帮助更快地识别和解决问题。
  • 总的来说,使用 execute_values 是提高代码效率和可读性的好方法,特别是在处理大量数据时。
values = [tuple(row) for row in df.to_numpy()]
execute_values(cur,
               "INSERT INTO stock_basic (ts_code, symbol, name, area, industry, fullname, enname, cnspell, market, exchange, curr_type, list_status, list_date, delist_date, is_hs) VALUES %s",
               values)

也可以执行插入列的顺序

columns = ["ts_code", "symbol", "name", "area", "industry", "fullname", "enname", "cnspell", "market", "exchange", "curr_type", "list_status", "list_date", "delist_date", "is_hs"]
values = [tuple(row[col] for col in columns) for _, row in df.iterrows()]
query = "INSERT INTO stock_basic ({}) VALUES %s".format(", ".join(columns))
execute_values(cur, query, values)

注:遇到某列超长时可能会导致插入失败,详细错误如下:psycopg2.errors.StringDataRightTruncation: 错误: 对于可变字符类型来说,值太长了(50) 可以使用以下代码检测哪一列过长

# 执行过程中, 判断那些列超长
for index, row in df.iterrows():
     for col in row.index:
       value = str(row[col])
       if len(value) > 50:
            print("too long", col, value)

完整代码如下

import tushare as ts
import psycopg2
from psycopg2.extras import execute_values


class PostgreSQLClient:
    def __init__(self):
        self.host = 'localhost'
        self.port = 5432
        self.user = 'postgres'
        self.password = '123456'
        self.database = 'tushare'
        self.conn = psycopg2.connect(
            host=self.host,
            port=self.port,
            user=self.user,
            password=self.password,
            database=self.database
        )


class TushareApi:
    def __init__(self):
        # token为第一步获取的token
        self.pro = ts.pro_api('token')


def insert_dataframe_to_postgresql(client, df):
    cur = client.conn.cursor()

    # 执行过程中, 判断那些列超长
    # for index, row in df.iterrows():
    #     for col in row.index:
    #         value = str(row[col])
    #         if len(value) > 50:
    #             print("too long", col, value)

    # execute逐行插入,后面改为execute_values批量插入
    # 减少开销:使用 execute_values 批量插入多行数据时,执行许多单独的插入语句的开销会减少。当插入大量数据时,这可以带来显著的性能提升。
    # 简化代码:使用 execute_values 可以简化代码,减少插入数据到数据库所需的样板代码。这可以使代码更易于阅读和维护。
    # 改进错误处理:execute_values 提供比逐行插入更好的错误处理,因为它可以更有效地检测和报告错误。这可以帮助更快地识别和解决问题。
    # 总的来说,使用 execute_values 是提高代码效率和可读性的好方法,特别是在处理大量数据时。
    # for index, row in df.iterrows():
    #     cur.execute(
    #         "INSERT INTO stock_basic (ts_code, symbol, name, area, industry, market, list_date, list_status, fullname) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
    #         (row['ts_code'], row['symbol'], row['name'], row['area'], row['industry'], row['market'], row['list_date'],
    #          row['list_status'], row['fullname'])
    #     )

    # 先指定插入顺序,并拼接SQL
    # 首先定义了要插入的列的顺序。然后,我们使用列表推导式将每一行的值转换为元组,并将这些元组存储在values列表中。
    # 接下来,我们使用join方法将列名连接为一个字符串,并将其插入到SQL查询中
    # columns = ["ts_code", "symbol", "name", "area", "industry", "fullname", "enname", "cnspell", "market", "exchange", "curr_type", "list_status", "list_date", "delist_date", "is_hs"]
    # values = [tuple(row[col] for col in columns) for _, row in df.iterrows()]
    # query = "INSERT INTO stock_basic ({}) VALUES %s".format(", ".join(columns))
    # execute_values(cur, query, values)

    values = [tuple(row) for row in df.to_numpy()]
    execute_values(cur,
                   "INSERT INTO stock_basic (ts_code, symbol, name, area, industry, fullname, enname, cnspell, market, exchange, curr_type, list_status, list_date, delist_date, is_hs) VALUES %s",
                   values)
    client.conn.commit()
    cur.close()


def retrieve_and_insert_stock_data(api, client):
    df = api.pro.stock_basic(**{
        "ts_code": "",
        "name": "",
        "exchange": "",
        "market": "",
        "is_hs": "",
        "list_status": "",
        "limit": "",
        "offset": ""
    }, fields=[
        "ts_code",
        "symbol",
        "name",
        "area",
        "industry",
        "market",
        "list_date",
        "fullname",
        "enname",
        "cnspell",
        "exchange",
        "curr_type",
        "list_status",
        "delist_date",
        "is_hs"
    ])

    insert_dataframe_to_postgresql(client, df)


if __name__ == '__main__':
    postgresql_client = PostgreSQLClient()
    tushare_api = TushareApi()
    retrieve_and_insert_stock_data(tushare_api, postgresql_client)

爬取后数据如下:总共获得5132条股票数据

image.png

2. 获取交易日数据

2.1 输入参数

名称类型必选描述
exchangestrN交易所 SSE上交所,SZSE深交所,CFFEX 中金所,SHFE 上期所,CZCE 郑商所,DCE 大商所,INE 上能源
start_datestrN开始日期 (格式:YYYYMMDD 下同)
end_datestrN结束日期
is_openstrN是否交易 '0'休市 '1'交易

2.2 输出参数

名称类型默认显示描述
exchangestrY交易所 SSE上交所 SZSE深交所
cal_datestrY日历日期
is_openstrY是否交易 0休市 1交易
pretrade_datestrY上一个交易日

2.3 创建对应的表

CREATE TABLE "public"."trade_cal" (
	"id" int4 NOT NULL DEFAULT nextval( 'daily_id_seq' :: regclass ),
	"exchange" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
	"cal_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
	"is_open" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
	"pretrade_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	CONSTRAINT "trade_pkey" PRIMARY KEY ( "id" ) 
);
ALTER TABLE "public"."trade_cal" OWNER TO "postgres";
COMMENT ON COLUMN "public"."trade_cal"."exchange" IS '交易所 SSE上交所 SZSE深交所';
COMMENT ON COLUMN "public"."trade_cal"."cal_date" IS '日历日期';
COMMENT ON COLUMN "public"."trade_cal"."is_open" IS '是否交易 0休市 1交易';
COMMENT ON COLUMN "public"."trade_cal"."pretrade_date" IS '上一个交易日';

2.4 获取数据并保存到Postgresql数据库

import tushare as ts
import psycopg2
from psycopg2.extras import execute_values


class PostgreSQLClient:
    def __init__(self):
        self.host = 'localhost'
        self.port = 5432
        self.user = 'postgres'
        self.password = '123456'
        self.database = 'tushare'
        self.conn = psycopg2.connect(
            host=self.host,
            port=self.port,
            user=self.user,
            password=self.password,
            database=self.database
        )


class TushareApi:
    def __init__(self):
        # token为第一步获取的token
        self.pro = ts.pro_api('token')




def insert_dataframe_to_postgresql(client, df):
    cur = client.conn.cursor()

    values = [tuple(row) for row in df.to_numpy()]
    execute_values(cur,
                   "INSERT INTO trade_cal (exchange, cal_date, is_open, pretrade_date) VALUES %s",
                   values)
    client.conn.commit()
    cur.close()


def get_trade_cal(api, client):
    df = api.pro.trade_cal(**{
    "exchange": "",
    "cal_date": "",
    "start_date": "",
    "end_date": "",
    "is_open": 1,
    "limit": "",
    "offset": ""
}, fields=[
    "exchange",
    "cal_date",
    "is_open",
    "pretrade_date"
])
    insert_dataframe_to_postgresql(client, df)


if __name__ == '__main__':
    postgresql_client = PostgreSQLClient()
    tushare_api = TushareApi()
    get_trade_cal(tushare_api, postgresql_client)

爬取后数据如下:总共获得7644条交易日数据

image.png

3. 获取股票日线行情数据

3.1 输入参数

名称类型必选描述
ts_codestrN股票代码(支持多个股票同时提取,逗号分隔)
trade_datestrN交易日期(YYYYMMDD)
start_datestrN开始日期(YYYYMMDD)
end_datestrN结束日期(YYYYMMDD)

注:日期都填YYYYMMDD格式,比如20181010

3.2 输出参数

名称类型描述
ts_codestr股票代码
trade_datestr交易日期
openfloat开盘价
highfloat最高价
lowfloat最低价
closefloat收盘价
pre_closefloat昨收价(前复权)
changefloat涨跌额
pct_chgfloat涨跌幅 (未复权,如果是复权请用 通用行情接口 )
volfloat成交量 (手)
amountfloat成交额 (千元)

3.3 创建对应的表

CREATE TABLE "public"."stock_basic" (
	"ts_code" VARCHAR ( 20 ) COLLATE "pg_catalog"."default" NOT NULL,
	"symbol" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"name" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
	"area" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
	"industry" VARCHAR ( 50 ) COLLATE "pg_catalog"."default",
	"fullname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
	"enname" VARCHAR ( 100 ) COLLATE "pg_catalog"."default" NOT NULL,
	"cnspell" VARCHAR ( 50 ) COLLATE "pg_catalog"."default" NOT NULL,
	"market" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"exchange" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
	"curr_type" VARCHAR ( 10 ) COLLATE "pg_catalog"."default" NOT NULL,
	"list_status" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
	"list_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"delist_date" VARCHAR ( 20 ) COLLATE "pg_catalog"."default",
	"is_hs" VARCHAR ( 10 ) COLLATE "pg_catalog"."default",
	CONSTRAINT "stock_basic_pkey" PRIMARY KEY ( "ts_code" ) 
);
ALTER TABLE "public"."stock_basic" OWNER TO "postgres";
COMMENT ON COLUMN "public"."stock_basic"."ts_code" IS 'TS代码';
COMMENT ON COLUMN "public"."stock_basic"."symbol" IS '股票代码';
COMMENT ON COLUMN "public"."stock_basic"."name" IS '股票名称';
COMMENT ON COLUMN "public"."stock_basic"."area" IS '地域';
COMMENT ON COLUMN "public"."stock_basic"."industry" IS '所属行业';
COMMENT ON COLUMN "public"."stock_basic"."fullname" IS '股票全称';
COMMENT ON COLUMN "public"."stock_basic"."enname" IS '英文全称';
COMMENT ON COLUMN "public"."stock_basic"."cnspell" IS '拼音缩写';
COMMENT ON COLUMN "public"."stock_basic"."market" IS '市场类型';
COMMENT ON COLUMN "public"."stock_basic"."exchange" IS '交易所代码';
COMMENT ON COLUMN "public"."stock_basic"."curr_type" IS '交易货币';
COMMENT ON COLUMN "public"."stock_basic"."list_status" IS '上市状态 L上市 D退市 P暂停上市';
COMMENT ON COLUMN "public"."stock_basic"."list_date" IS '上市日期';
COMMENT ON COLUMN "public"."stock_basic"."delist_date" IS '退市日期';
COMMENT ON COLUMN "public"."stock_basic"."is_hs" IS '是否沪深港通标的,N否 H沪股通 S深股通';

3.4 注意

tushare短时间内大量请求,会出现报错,提示远程主机强迫关闭了一个现有的连接,报错详情如下

57586cf4beb44b15d5d63ff3e3b8f32.png

解决方法
  1. 可以在每个请求之前加入一定的延时
  2. 该接口是由日期倒序执行的,可以在报错之后,在end_date上减去一天 比如在20201231日接口报错终止,则修改end_date为20201230,然后重启程序
  3. 添加重试机制
    3.1 固定重试3次

定义

import time

def retry(func):
    def wrapper(*args, **kwargs):
        for i in range(3):
            result = func(*args, **kwargs)
            if result is not None:
                return result
            time.sleep(1)
        return None
    return wrapper

引用

@retry
def my_function():
    # do something that might fail
    return None

3.2 自定义重试N次

定义

def retry(n):
   def decorator(func):
       def wrapper(*args, **kwargs):
           for i in range(n):
               result = func(*args, **kwargs)
               if result is not None:
                   return result
               time.sleep(n)
           return None

       return wrapper

   return decorator

使用

@retry(3)
def my_function():
   # do something that might fail
   return None

3.4 获取数据并保存到Postgresql数据库

import tushare as ts
import psycopg2
import time
from psycopg2.extras import execute_values


class PostgreSQLClient:
    def __init__(self):
        self.host = 'localhost'
        self.port = 5432
        self.user = 'postgres'
        self.password = '123456'
        self.database = 'tushare'
        self.conn = psycopg2.connect(
            host=self.host,
            port=self.port,
            user=self.user,
            password=self.password,
            database=self.database
        )


class TushareApi:
    def __init__(self):
        # token为第一步获取的token
        self.pro = ts.pro_api('token')


def retry(n):
    def decorator(func):
        def wrapper(*args, **kwargs):
            for i in range(n):
                result = func(*args, **kwargs)
                if result is not None:
                    return result
                time.sleep(n)
            return None

        return wrapper

    return decorator


def insert_dataframe_to_postgresql(client, df):
    cur = client.conn.cursor()

    values = [tuple(row) for row in df.to_numpy()]
    execute_values(cur,
                   "INSERT INTO daily (ts_code, trade_date, open, high, low, close, pre_close, change, pct_chg, vol, amount) VALUES %s",
                   values)
    client.conn.commit()
    cur.close()


def get_trade_date(api, client):
    df = api.pro.trade_cal(exchange='SSE', is_open='1',
                           start_date='',
                           end_date='20230330',
                           fields='cal_date')
    for date in df['cal_date'].values:
        df = get_daily(api, date)
        print("date", date, df)
        insert_dataframe_to_postgresql(client, df)


@retry(5)
def get_daily(api, date):
    df = api.pro.daily(**{
        "ts_code": "",
        "trade_date": date,
        "start_date": "",
        "end_date": "",
        "offset": "",
        "limit": ""
    }, fields=[
        "ts_code",
        "trade_date",
        "open",
        "high",
        "low",
        "close",
        "pre_close",
        "change",
        "pct_chg",
        "vol",
        "amount"
    ])
    return df


if __name__ == '__main__':
    postgresql_client = PostgreSQLClient()
    tushare_api = TushareApi()
    get_trade_date(tushare_api, postgresql_client)

爬取后数据如下:总共获得13628761条股票数据

image.png

4.校验数据

4.1 交易日期

共有7886条数据,数据范围为:19901219 - 20230330 image.png

4.2 股票日线数据

按照交易日期分组共有7886条数据

image.png