需求
1、获取oss中的文件,文件夹名不固定,路径形如:oss://tyron_demo/odps_out/person/20220605-demo/.odps/20220605084131255g4sny8ys/test1111
2、获取到的文件,隔行读取,分隔符:\u0001;
3、将解析到的数据隔行入库;
代码实现
参考地址:help.aliyun.com/document_de…
需求分解:
- 从oss上下载文件,阿里云官网上有多种实现方式:流式下载、下载到本地文件、范围下载、断点续传下载等等,此处涉及的文件相对不大,暂时不用断点续传,其次读取到的文件用于入库,不需要下载本地;综上,此次使用流式下载方式将文件进行进行下载;
- 官网的例子中使用的是固定路径文件,此处的 20220605084131255g4sny8ys 并非固定文件路径,参考实现:help.aliyun.com/document_de…
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.model.ListObjectsRequest;
import com.aliyun.oss.model.OSSObject;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
public class Demo {
public static void main(String[] args) {
// Endpoint以华东1(杭州)为例,其它Region请按实际情况填写。关于其他Region对应的Endpoint信息,请参见访问域名和数据中心。
String endpoint = "https://oss-cn-hangzhou.aliyuncs.com";
// 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
String accessKeyId = "yourAccessKeyId";
String accessKeySecret = "yourAccessKeySecret";
// 填写Bucket名称,例如examplebucket。
String bucketName = "tyron_demo";
// 创建OSSClient实例。
OSS ossClient = new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret);
try {
// 20220605
String localDateStr = LocalDateUtils.format(LocalDate.now(), LocalDateUtils.UNSIGNED_DATE_PATTERN);
// 获取文件夹下的文件
List<String> paths = getOssFilePaths(ossClient, bucketName, "odps_out", "person", localDateStr);
System.out.println("path.size=" + paths.size());
for (String path : paths) {
System.out.println("path:" + path);
// ossObject包含文件所在的存储空间名称、文件名称、文件元信息以及一个输入流。
OSSObject ossObject = ossClient.getObject(bucketName, path);
// 读取文件内容。
System.out.println("Object content:");
BufferedReader reader = new BufferedReader(new InputStreamReader(ossObject.getObjectContent()));
while (true) {
String line = reader.readLine();
if (line == null) {
break;
}
System.out.println("\n" + line);
}
// 数据读取完成后,获取的流必须关闭,否则会造成连接泄漏,导致请求无连接可用,程序无法正常工作。
reader.close();
// ossObject对象使用完毕后必须关闭,否则会造成连接泄漏,导致请求无连接可用,程序无法正常工作。
ossObject.close();
}
} catch (OSSException oe) {
System.out.println("Caught an OSSException, which means your request made it to OSS, "
+ "but was rejected with an error response for some reason.");
System.out.println("Error Message:" + oe.getErrorMessage());
System.out.println("Error Code:" + oe.getErrorCode());
System.out.println("Request ID:" + oe.getRequestId());
System.out.println("Host ID:" + oe.getHostId());
} catch (Throwable ce) {
System.out.println("Caught an ClientException, which means the client encountered "
+ "a serious internal problem while trying to communicate with OSS, "
+ "such as not being able to access the network.");
System.out.println("Error Message:" + ce.getMessage());
} finally {
if (ossClient != null) {
ossClient.shutdown();
}
}
}
public static List<String> getOssFilePaths(OSS ossClient, String ossBucketName, String ossDirPrefix, String bizName, String partition) {
String ossDir = ossDirPrefix + "/" + bizName + "/" + partition + "/" + ".odps" + "/";
List<String> paths = new ArrayList<>();
ObjectListing commonPrefixListing = null;
do {
ListObjectsRequest listCommonPrefixObjectsRequest = new ListObjectsRequest(ossBucketName);
listCommonPrefixObjectsRequest.setDelimiter("/");
if (!ossDir.endsWith("/")) {
ossDir = ossDir + "/";
}
listCommonPrefixObjectsRequest.setPrefix(ossDir);
if (commonPrefixListing != null) {
listCommonPrefixObjectsRequest.setMarker(commonPrefixListing.getNextMarker());
}
commonPrefixListing = ossClient.listObjects(listCommonPrefixObjectsRequest);
for (String commonPrefix : commonPrefixListing.getCommonPrefixes()) {
paths.addAll(getObjectSummaryKeys(ossClient, ossBucketName, commonPrefix));
}
} while (commonPrefixListing.isTruncated());
return paths;
}
private static List<String> getObjectSummaryKeys(OSS ossClient, String ossBucketName, String commonPrefix) {
List<String> paths = new ArrayList<>(1);
ObjectListing objectListing = null;
do {
ListObjectsRequest listObjectsRequest = new ListObjectsRequest(ossBucketName);
listObjectsRequest.setDelimiter("/");
listObjectsRequest.setPrefix(commonPrefix);
if (objectListing != null) {
listObjectsRequest.setMarker(objectListing.getNextMarker());
}
objectListing = ossClient.listObjects(listObjectsRequest);
for (OSSObjectSummary objectSummary : objectListing.getObjectSummaries()) {
if (objectSummary.getSize() > 0) {
paths.add(objectSummary.getKey());
}
}
} while (objectListing.isTruncated());
return paths;
}
}
输出结果:
16:57:47.294 [main] DEBUG org.apache.http.impl.conn.PoolingHttpClientConnectionManager - Connection released: [id: 0][route: {s}->https://oss-cn-hangzhou.aliyuncs.com:443][total kept alive: 1; route allocated: 1 of 1024; total allocated: 1 of 1024]
path.size=1
path:odps_out/person/20220605-demo/.odps/20220605084131255g4sny8ys/test1111
Object content:2c3eff4d38085ed287•DEMO••DEMO@demo.com.au••[{"test": "demo"}]
注:此文分享解析流程,入库流程完善后分享。