使用python连接 S3前情提要前文了解了python连接redshift的api操作方式，如果有大批量的插入操作，

前情提要

前文了解了python连接redshift的api操作方式，如果有大批量的插入操作，还是使用copy命令，从S3进行数据导入。这样会节省大量的时间，并且S3也可以看作数仓的ODS层。下面来聊一聊，S3的API，boto3的使用方式。

第一步获取S3的连接连接S3有两个官方提供的方法

boto3.client
boto3.resource
# 两个方法的区别在于 client 会返回字典信息，如果想要获取详细的内容，需要使用get方法获取key对应的值
# 而resource会将S3返回的内容做一层封装，想要获取相应的内容，直接调用属性或者方法即可得到
# 看下来是resource方法更加的高级易用，符合面向对象的特点，由于笔者看到的是采用
# client获取连接的API，所以下文主要介绍在client获取连接基础上实现的一些功能，同样的resource应该可以实现

第二步就可以具体操作S3了操作的方式差不多就是增删改查，权限设置，此外API中还提到了一种生成连接供外部请求的方法。

因为S3中是没有文件夹的概念的，所有的内容都当作对象来使用和查看，所以操作文件的时候注意文件的命名，/结尾会默认为一个可以存储文件的节点（文件夹）

下面是详细代码


import json

import boto3
from botocore.exceptions import ClientError
import logging
import os
import requests


class S3:
    def __init__(self):
        pass

    # 获取S3连接
    def conn_s3(self):
        # Client级别的接口则是返回Dictionary来表示查询到的资源信息。而Resource级别的接口是对Client级别的接口进行了面向对象的封装，接口的返回值大部分都是Resource对象(
        #     如果返回值是某个Resource的信息的话), 我们可以对返回的对象再进行操作（比如删除，修改等）。
        s3 = boto3.client(service_name='s3', aws_access_key_id='',
                          aws_secret_access_key='', region_name='')
        return s3

    # 查看所有bucket
    def showAllBucket(self, s3):
        response = s3.list_buckets()
        # print(response1)

        # Output the bucket names
        print('Existing buckets:')
        for bucket in response['Buckets']:
            print(f'  {bucket["Name"]}')

    # 查看所有bucket中对象
    def list_bucket_keys(self, s3, bucket_name):
        """ :type : pyboto3.s3 """
        result = s3.list_objects(Bucket=bucket_name).get('Contents')
        obj_lis = []
        for _ in result:
            obj_lis.append(_.get('Key'))
            print(_.get('Key'))
        return obj_lis

    # 创建bucket
    def create_bucket(self, bucket_name, s3, region=None):
        """Create an S3 bucket in a specified region

        If a region is not specified, the bucket is created in the S3 default
        region (us-east-1).

        :param bucket_name: Bucket to create
        :param region: String region to create bucket in, e.g., 'us-west-2'
        :return: True if bucket created, else False
        """

        # Create bucket
        try:
            if region is None:
                s3.create_bucket(Bucket=bucket_name)
            else:
                s3_other = boto3.client(service_name='s3', aws_access_key_id='AKIAYDFF4KH5MGBIZ6ON',
                                        aws_secret_access_key='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',
                                        region_name=region)
                location = {'LocationConstraint': region}
                s3_other.create_bucket(Bucket=bucket_name,
                                       CreateBucketConfiguration=location)
        except ClientError as e:
            logging.error(e)
            return False
        return True

    # 上传文件
    def upload_file(self, s3, file_name, bucket, object_name=None):
        """Upload a file to an S3 bucket

        :param file_name: File to upload
        :param bucket: Bucket to upload to
        :param object_name: S3 object name. If not specified then file_name is used
        :return: True if file was uploaded, else False
        """

        # If S3 object_name was not specified, use file_name
        if object_name is None:
            # 将上传文件的名字赋予上传后的对象名
            object_name = os.path.basename(file_name)

        # Upload the file
        try:
            response = s3.upload_file(file_name, bucket, object_name)
            print('文件上传成功')
        except ClientError as e:
            logging.error(e)
            return False
        return True

    # 下载文件
    def download_file(self, s3, bucket_name, object, file):
        try:
            s3.download_file(Bucket=bucket_name, Key=object, Filename=file)
            print('下载成功')
        except Exception as e:
            print('下载失败')
            print(e)

    # 创建临时访问s3对象的连接
    def create_presigned_url(self, s3, bucket_name, object_name, expiration=3600):
        """Generate a presigned URL to share an S3 object

        :param bucket_name: string
        :param object_name: string
        :param expiration: Time in seconds for the presigned URL to remain valid
        :return: Presigned URL as string. If error, returns None.
        """

        # Generate a presigned URL for the S3 object
        try:
            response = s3.generate_presigned_url('get_object',
                                                 Params={'Bucket': bucket_name,
                                                         'Key': object_name},
                                                 ExpiresIn=expiration)
        except ClientError as e:
            logging.error(e)
            return None

        # The response contains the presigned URL
        return response

    # 将s3调用方法的结果对外生成访问连接
    def create_presigned_url_expanded(self, s3, client_method_name, method_parameters=None,
                                      expiration=3600, http_method='GET'):
        """Generate a presigned URL to invoke an S3.Client method

        Not all the client methods provided in the AWS Python SDK are supported.

        :param client_method_name: Name of the S3.Client method, e.g., 'list_buckets'
        :param method_parameters: Dictionary of parameters to send to the method
        :param expiration: Time in seconds for the presigned URL to remain valid
        :param http_method: HTTP method to use (GET, etc.)
        :return: Presigned URL as string. If error, returns None.
        """
        try:
            response = s3.generate_presigned_url(ClientMethod=client_method_name,
                                                 Params=method_parameters,
                                                 ExpiresIn=expiration,
                                                 HttpMethod=http_method)
        except ClientError as e:
            logging.error(e)
            return None

        # The response contains the presigned URL
        return response

    # 将上传s3的方法封装在post中，可以通过post往s3中上传数据。此外，也可以采用HTML方式上传
    def create_presigned_post(self, s3, bucket_name, object_name,
                              fields=None, conditions=None, expiration=3600):
        """Generate a presigned URL S3 POST request to upload a file

        :param bucket_name: string
        :param object_name: string
        :param fields: Dictionary of prefilled form fields
        :param conditions: List of conditions to include in the policy
        :param expiration: Time in seconds for the presigned URL to remain valid
        :return: Dictionary with the following keys:
            url: URL to post to
            fields: Dictionary of form fields and values to submit with the POST
        :return: None if error.
        """

        try:
            response = s3.generate_presigned_post(bucket_name,
                                                  object_name,
                                                  Fields=fields,
                                                  Conditions=conditions,
                                                  ExpiresIn=expiration)
        except ClientError as e:
            logging.error(e)
            return None

        # The response contains the presigned URL and required fields
        return response

    # 创建post上传S3示例
    def generate_post_demo(self, s3):
        # Generate a presigned S3 POST URL
        object_name = 'mindao2.csv'
        response = self.create_presigned_post(s3, 'myawsbucket98765', object_name)
        print(response['url'], response['fields'])
        if response is None:
            exit(1)

        # Demonstrate how another Python program can use the presigned URL to upload a file
        with open(object_name, 'rb') as f:
            files = {'file': (object_name, f)}
            http_response = requests.post(response['url'], data=response['fields'], files=files)
        # If successful, returns HTTP status code 204
        logging.info(f'File upload HTTP status code: {http_response.status_code}')

    # 获取bucket安全策略
    def get_bucket_policy(self, s3, bucket):
        result = s3.get_bucket_policy(Bucket=bucket)
        return result['Policy']

    # 定义bucket安全策略
    def put_bucket_policy(self, s3, bucket, policy):
        s3.put_bucket_policy(Bucket=bucket, Policy=policy)

    # 定义安全策略demo
    def put_bucket_policy_demo(self):
        bucket_name = 'myawsbucket98765'
        bucket_policy = {
            'Version': '2012-10-17',
            'Statement': [{
                'Sid': 'AddPerm',
                'Effect': 'Allow',
                'Principal': '*',
                'Action': ['s3:GetObject'],
                'Resource': f'arn:aws:s3:::{bucket_name}/*'
            }]
        }
        bucket_policy = json.dumps(bucket_policy)
        self.put_bucket_policy(self.conn_s3(), bucket=bucket_name, policy=bucket_policy)


if __name__ == '__main__':
    s3_obj = S3()
    s3 = s3_obj.conn_s3()
    # print(s3_obj.list_bucket_keys(s3, bucket_name='myawsbucket98765'))
    s3_obj.showAllBucket(s3)
    # url = s3_obj.create_presigned_url(s3, 'myawsbucket98765', 'favorite-pics/2006年第三季度業績報告.pdf', expiration=3600)
    # url2 = s3_obj.create_presigned_url_expanded(s3,'showAllBucket')
    # print(url2)
    # s3_obj.upload_file(s3,'2006年第三季度業績報告.pdf','myawsbucket98765','favorite-pics/2006年第三季度業績報告.pdf')
    # s3_obj.download_file(s3, bucket_name='myawsbucket98765', object='favorite-pics/2006年第三季度業績報告.pdf', file='down_pdf.pdf')
    # s3_obj.generate_post_demo(s3)
    bucket_policy = s3_obj.get_bucket_policy(s3, 'myawsbucket98765')
    print(bucket_policy)
    # s3_obj.put_bucket_policy_demo()

以下是一些可以参考的文档

Available services — Boto3 Docs 1.24.2 documentation (amazonaws.com) An Introduction to boto’s S3 interface — boto v2.49.0

以下是一些参考的文章

AWS S3 - python 从S3下载数据到本地 - 掘金 (juejin.cn)
boto3 - 使用Python访问AWS S3 （02） - 掘金 (juejin.cn)
使用boto3批量上传图片到S3以及工作中两个小总结 - 掘金 (juejin.cn)
boto3连接aws的s3及注意事项 - 掘金 (juejin.cn)