获取 OSS 文件,并入库

750 阅读3分钟

需求

1、获取oss中的文件,文件夹名不固定,路径形如:oss://tyron_demo/odps_out/person/20220605-demo/.odps/20220605084131255g4sny8ys/test1111

2、获取到的文件,隔行读取,分隔符:\u0001;

3、将解析到的数据隔行入库;

代码实现

参考地址:help.aliyun.com/document_de…

需求分解:

  • 从oss上下载文件,阿里云官网上有多种实现方式:流式下载、下载到本地文件、范围下载、断点续传下载等等,此处涉及的文件相对不大,暂时不用断点续传,其次读取到的文件用于入库,不需要下载本地;综上,此次使用流式下载方式将文件进行进行下载;
  • 官网的例子中使用的是固定路径文件,此处的 20220605084131255g4sny8ys 并非固定文件路径,参考实现:help.aliyun.com/document_de…
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.model.ListObjectsRequest;
import com.aliyun.oss.model.OSSObject;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
​
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
​
public class Demo {
    public static void main(String[] args) {
        // Endpoint以华东1(杭州)为例,其它Region请按实际情况填写。关于其他Region对应的Endpoint信息,请参见访问域名和数据中心。
        String endpoint = "https://oss-cn-hangzhou.aliyuncs.com";
        // 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
        String accessKeyId = "yourAccessKeyId";
        String accessKeySecret = "yourAccessKeySecret";
        // 填写Bucket名称,例如examplebucket。
        String bucketName = "tyron_demo";
​
        // 创建OSSClient实例。
        OSS ossClient = new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret);
        try {
            // 20220605
            String localDateStr = LocalDateUtils.format(LocalDate.now(), LocalDateUtils.UNSIGNED_DATE_PATTERN);
            // 获取文件夹下的文件
            List<String> paths = getOssFilePaths(ossClient, bucketName, "odps_out", "person", localDateStr);
            System.out.println("path.size=" + paths.size());
            for (String path : paths) {
                System.out.println("path:" + path);
​
                // ossObject包含文件所在的存储空间名称、文件名称、文件元信息以及一个输入流。
                OSSObject ossObject = ossClient.getObject(bucketName, path);
​
                // 读取文件内容。
                System.out.println("Object content:");
                BufferedReader reader = new BufferedReader(new InputStreamReader(ossObject.getObjectContent()));
                while (true) {
                    String line = reader.readLine();
                    if (line == null) {
                        break;
                    }
​
                    System.out.println("\n" + line);
                }
                // 数据读取完成后,获取的流必须关闭,否则会造成连接泄漏,导致请求无连接可用,程序无法正常工作。
                reader.close();
                // ossObject对象使用完毕后必须关闭,否则会造成连接泄漏,导致请求无连接可用,程序无法正常工作。
                ossObject.close();
            }
        } catch (OSSException oe) {
            System.out.println("Caught an OSSException, which means your request made it to OSS, "
                    + "but was rejected with an error response for some reason.");
            System.out.println("Error Message:" + oe.getErrorMessage());
            System.out.println("Error Code:" + oe.getErrorCode());
            System.out.println("Request ID:" + oe.getRequestId());
            System.out.println("Host ID:" + oe.getHostId());
        } catch (Throwable ce) {
            System.out.println("Caught an ClientException, which means the client encountered "
                    + "a serious internal problem while trying to communicate with OSS, "
                    + "such as not being able to access the network.");
            System.out.println("Error Message:" + ce.getMessage());
        } finally {
            if (ossClient != null) {
                ossClient.shutdown();
            }
        }
    }
​
    public static List<String> getOssFilePaths(OSS ossClient, String ossBucketName, String ossDirPrefix, String bizName, String partition) {
        String ossDir = ossDirPrefix + "/" + bizName + "/" + partition + "/" + ".odps" + "/";
        List<String> paths = new ArrayList<>();
        ObjectListing commonPrefixListing = null;
        do {
            ListObjectsRequest listCommonPrefixObjectsRequest = new ListObjectsRequest(ossBucketName);
            listCommonPrefixObjectsRequest.setDelimiter("/");
            if (!ossDir.endsWith("/")) {
                ossDir = ossDir + "/";
            }
            listCommonPrefixObjectsRequest.setPrefix(ossDir);
            if (commonPrefixListing != null) {
                listCommonPrefixObjectsRequest.setMarker(commonPrefixListing.getNextMarker());
            }
            commonPrefixListing = ossClient.listObjects(listCommonPrefixObjectsRequest);
​
            for (String commonPrefix : commonPrefixListing.getCommonPrefixes()) {
                paths.addAll(getObjectSummaryKeys(ossClient, ossBucketName, commonPrefix));
            }
        } while (commonPrefixListing.isTruncated());
        return paths;
    }
​
    private static List<String> getObjectSummaryKeys(OSS ossClient, String ossBucketName, String commonPrefix) {
        List<String> paths = new ArrayList<>(1);
        ObjectListing objectListing = null;
        do {
            ListObjectsRequest listObjectsRequest = new ListObjectsRequest(ossBucketName);
            listObjectsRequest.setDelimiter("/");
            listObjectsRequest.setPrefix(commonPrefix);
            if (objectListing != null) {
                listObjectsRequest.setMarker(objectListing.getNextMarker());
            }
            objectListing = ossClient.listObjects(listObjectsRequest);
            for (OSSObjectSummary objectSummary : objectListing.getObjectSummaries()) {
                if (objectSummary.getSize() > 0) {
                    paths.add(objectSummary.getKey());
                }
            }
        } while (objectListing.isTruncated());
        return paths;
    }
}

输出结果:

16:57:47.294 [main] DEBUG org.apache.http.impl.conn.PoolingHttpClientConnectionManager - Connection released: [id: 0][route: {s}->https://oss-cn-hangzhou.aliyuncs.com:443][total kept alive: 1; route allocated: 1 of 1024; total allocated: 1 of 1024]
path.size=1
path:odps_out/person/20220605-demo/.odps/20220605084131255g4sny8ys/test1111
Object content:2c3eff4d38085ed287•DEMO••DEMO@demo.com.au••[{"test": "demo"}]

注:此文分享解析流程,入库流程完善后分享。