一、pom依赖
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>com-amazonaws</artifactId>
<version>{最新版}</version>
</dependency>
二、创建客户端
public static AmazonS3 createClient(String endPoint, String accessKey, String secretKey) {
ClientConfiguration conf = new ClientConfiguration();
conf.withProtocol(Protocol.HTTP);
conf.setSignerOverride(SignerFactory.VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER);
conf.setMaxConnections(5000);
conf.setConnectionTimeout(30000);
conf.setSocketTimeout(60000);
return AmazonS3ClientBuilder.standard()
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
.withClientConfiguration(conf)
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endPoint, null))
.withPathStyleAccessEnabled(true).build();
}
三、桶操作
public static Boolean createBucket(String bucketName, AmazonS3 client) {
if (bucketCheck(bucketName, client)) {
return true;
}
CreateBucketRequest createBucketRequest = new CreateBucketRequest(bucketName);
createBucketRequest.setCannedAcl(CannedAccessControlList.PublicRead);
Bucket bucket = client.createBucket(createBucketRequest);
if (Objects.nonNull(bucket)) {
return true;
}
return false;
}
public static void delBucket(String bucketName, AmazonS3 client) {
client.deleteBucket(bucketName);
}
public static List<String> getBucketList(AmazonS3 client) {
List<Bucket> buckets = client.listBuckets();
List<String> result = new ArrayList<>();
for (Bucket bucket : buckets) {
result.add(bucket.getName());
}
return result;
}
public static boolean bucketCheck(String bucketName, AmazonS3 client) {
try {
HeadBucketRequest headBucketRequest = new HeadBucketRequest(bucketName);
client.headBucket(headBucketRequest);
return true;
} catch (AmazonServiceException e) {
if ("AmazonS3Exception".equalsIgnoreCase(e.getErrorCode())) {
return false;
} else {
return false;
}
}
}
四、对象操作
public static void putObject(String bucketName, String objectKey, Object content, AmazonS3 client) {
PutObjectResult putObjectResult = client.putObject(bucketName, objectKey, content.toString());
client.setObjectAcl(bucketName, objectKey, CannedAccessControlList.PublicRead);
}
public static void putObject(String bucketName, String objectKey, File file, AmazonS3 client) {
PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, objectKey, file);
putObjectRequest.setCannedAcl(CannedAccessControlList.PublicRead);
PutObjectResult poResp = client.putObject(putObjectRequest);
}
public static void delObject(String bucketName, String key, AmazonS3 client) {
DeleteObjectRequest deleteObjectRequest = new DeleteObjectRequest(
bucketName, key);
client.deleteObject(deleteObjectRequest);
}
public static void modifyObjectName(String sourceBucket, String sourceKey, String targetBucket, String targetKey, AmazonS3 client) {
CopyObjectRequest copyObjRequest = new CopyObjectRequest()
.withSourceBucketName(sourceBucket)
.withSourceKey(sourceKey)
.withDestinationBucketName(targetBucket)
.withDestinationKey(targetKey)
.withCannedAccessControlList(CannedAccessControlList.PublicRead);
client.copyObject(copyObjRequest);
}
public S3Object getObject(String bucketName, String keyName, AmazonS3 client) {
return client.getObject(bucketName, keyName);
}
public static List<S3ObjectSummary> getAllObjects(String bucketName, String prefix, int pageSize, AmazonS3 client) {
List<S3ObjectSummary> list = new ArrayList<>();
S3Objects.withPrefix(client, bucketName, prefix).withBatchSize(pageSize).forEach((S3ObjectSummary objectSummary) -> {
list.add(objectSummary);
});
return list;
}
public static List<String> getAllObjectKey(String bucketName, String prefix, int pageSize, AmazonS3 client) {
List<S3ObjectSummary> list = getAllObjects(bucketName, prefix, pageSize, client);
List<String> result = new ArrayList<>();
for (int i = 0; i < list.size(); i++) {
S3ObjectSummary s3ObjectSummary = list.get(i);
String key = s3ObjectSummary.getKey();
if (key.endsWith("/")) {
continue;
}
result.add(key);
}
return result;
}
public ObjectMetadata getS3ObjectMetadata(String bucket, String key, AmazonS3 client) {
return client.getObjectMetadata(bucket, key);
}
五、大文件分块上传,支持断点上传
public static void main(String[] args) {
//串行上传
bigFileUplaodS3()
//并发上传
concurrenBigFileUplaodS3()
}
/**
* 磁盘文件上传到S3,大文件,分块上传,串行,支持断点续传
*
* @param filePath 文件路径
* @param bucket 桶
* @param key 对象名称
* @param client
*/
private static void bigFileUplaodS3(String filePath, String bucket, String key, AmazonS3 client) {
try {
// 计算文件md5
String md5 = MD5.asHex(MD5.getHash(new File(filePath)))
// 获取文件总长度
File file = new File(filePath)
long size = file.length()
System.out.println("文件大小:" + size)
//设置每块文件大小,根据自己的实际情况设置,我这里设置每块100MB
long minPartSize = 100 * 1024 * 1024
System.out.println("每块文件大小:" + minPartSize)
List<Long> positions = getPositions(size, minPartSize)
System.out.println("一共分为:" + positions.size() + "段")
//从mysql中查询文件上传信息,根据md5且上传状态为"未完成"
FileUploadInfo fileUploadInfo = fileUploadInfoService.queryByMd5(md5)
//上传id,每次上传都会产生一个uploadId,每块公用一个uploadId,相同的文件,每次上传的uploadId也不同
String uploadId
if (Objects.isNull(fileUploadInfo)) {
//没查到上传信息,代表之前没上传过,直接上传,生成新的uploadId
uploadId = getUploadId(bucket, key, client)
//保存文件基本信息
saveUplaodInfo(md5, uploadId, size, minPartSize, positions.size(), filePath, key)
//串行上传分块数据
for (int i = 0
uploadTask(bucket, key, uploadId, i, positions, file, minPartSize, size, client)
}
} else {
//获取未完成的uploadId
uploadId = fileUploadInfo.getUploadId()
//根据uploadId,查询已经上传的分片
List<Integer> partIndex = finishPartIndex(client, bucket, key, uploadId)
if (CollectionUtils.isEmpty(partIndex)) {
//如果查询已上传的分片异常,或者为空,代表这种“孤儿”分片已经被删除(S3可配置定期删除策略),要当作新文件上传
//重新生成uploadId,并生成新的上传记录,保存到mysql
uploadId = getUploadId(bucket, key, client)
saveUplaodInfo(md5, uploadId, size, minPartSize, positions.size(), filePath, key)
}
for (int i = 0
if (!CollectionUtils.isEmpty(partIndex) && partIndex.contains(i + 1)) {
System.out.println("分块数据" + (i + 1) + "已上传")
continue
}
uploadTask(bucket, key, uploadId, i, positions, file, minPartSize, size, client)
}
}
//合并文件
mergePart(bucket, key, uploadId, client, positions.size())
//更新状态 为已完成
fileUploadInfoService.updataStatu("已完成", uploadId)
} catch (Exception e) {
System.out.println("上传文件失败:" + e)
}
}
/**
* 磁盘文件上传到S3,大文件,分块上传,并行,支持断点续传
*
* @param filePath 文件路径
* @param bucket 桶
* @param key 对象名称
* @param client
*/
public void concurrenBigFileUplaodS3(String filePath, String bucket, String key, AmazonS3 client) {
ThreadPoolExecutor taskPool = new ThreadPoolExecutor(10, 20, 1, TimeUnit.SECONDS, new ArrayBlockingQueue<>(10000), new ThreadPoolExecutor.CallerRunsPolicy())
try {
// 计算文件md5
String md5 = MD5.asHex(MD5.getHash(new File(filePath)))
// 获取文件总长度
File file = new File(filePath)
long size = file.length()
System.out.println("文件大小:" + size)
//设置每块文件大小,根据自己的实际情况设置,我这里设置每块100MB
long minPartSize = 100 * 1024 * 1024
System.out.println("每块文件大小:" + minPartSize)
List<Long> positions = getPositions(size, minPartSize)
System.out.println("一共分为:" + positions.size() + "段")
//任务列表
CompletableFuture[] futures = new CompletableFuture[positions.size()]
FileUploadInfo fileUploadInfo = fileUploadInfoService.queryByMd5(md5)
String uploadId
if (Objects.isNull(fileUploadInfo)) {
//没查到上传信息,代表之前没上传过,直接上传,生成新的uploadId
uploadId = getUploadId(bucket, key, client)
//保存文件基本信息
saveUplaodInfo(md5, uploadId, size, minPartSize, positions.size(), filePath, key)
for (int i = 0
int finalI = i
String finalUploadId = uploadId
CompletableFuture<Void> task = CompletableFuture.runAsync(() -> uploadTask(bucket, key, finalUploadId, finalI, positions, file, minPartSize, size, client), taskPool)
futures[i] = task
}
} else {
//获取未完成的uploadId
uploadId = fileUploadInfo.getUploadId()
//根据uploadId,查询已经上传的分片
List<Integer> partIndex = finishPartIndex(client, bucket, key, uploadId)
if (CollectionUtils.isEmpty(partIndex)) {
//如果查询已上传的分片异常,或者为空,代表这种“孤儿”分片已经被删除(S3可配置定期删除策略),要当作新文件上传
//重新生成uploadId,并生成新的上传记录,保存到mysql
uploadId = getUploadId(bucket, key, client)
saveUplaodInfo(md5, uploadId, size, minPartSize, positions.size(), filePath, key)
}
for (int i = 0
if (!CollectionUtils.isEmpty(partIndex) && partIndex.contains(i + 1)) {
System.out.println("分块数据" + (i + 1) + "已上传")
continue
}
int finalI = i
String finalUploadId = uploadId
CompletableFuture<Void> task = CompletableFuture.runAsync(() -> uploadTask(bucket, key, finalUploadId, finalI, positions, file, minPartSize, size, client), taskPool)
futures[i] = task
}
}
CompletableFuture<Void> anyOfFuture = CompletableFuture.allOf(futures)
//等待所有任务完成或抛出异常
System.out.println("等待线程执行完成")
anyOfFuture.join()
//合并文件
mergePart(bucket, key, uploadId, client, positions.size())
//更新状态
fileUploadInfoService.updataStatu("已完成", uploadId)
} catch (Exception e) {
System.out.println("上传文件失败:" + e)
taskPool.shutdownNow()
}
}
/**
* @param bucketName 桶
* @param s3Key 对象的key
* @param uploadId 上传id
* @param partCount 分片数量
*/
public static void mergePart(String bucketName, String s3Key, String uploadId, AmazonS3 client, int partCount) {
List<PartETag> partETags = getUploadPart(bucketName, s3Key, uploadId, client)
if (partCount == partETags.size()) {
CompleteMultipartUploadRequest compRequest = new CompleteMultipartUploadRequest(bucketName, s3Key, uploadId, partETags)
client.completeMultipartUpload(compRequest)
} else {
System.out.println("上传的文件分片数量与S3中要合并的分片数量不匹配")
throw new RuntimeException("上传的文件分片数量与S3中要合并的分片数量不匹配")
}
}
/**
* 获取已上传的part信息
*
* @param bucketName
* @param s3Key
* @param uploadId
* @return
*/
public static List<PartETag> getUploadPart(String bucketName, String s3Key, String uploadId, AmazonS3 client) {
PartListing partListing = client.listParts(new ListPartsRequest(bucketName, s3Key, uploadId))
List<PartSummary> partSummaryList = partListing.getParts()
List<PartETag> partETags = new ArrayList<>()
for (int i = 0
PartSummary partSummary = partSummaryList.get(i)
PartETag partETag = new PartETag(partSummary.getPartNumber(), partSummary.getETag())
partETags.add(partETag)
}
return partETags
}
/**
* 查询已经上传的分片索引
*
* @param amazonS3Client
* @param bucket
* @param key
* @param uploadId
* @return
*/
public static List<Integer> finishPartIndex(AmazonS3 amazonS3Client, String bucket, String key, String uploadId) {
try {
PartListing partListing = amazonS3Client.listParts(new ListPartsRequest(bucket, key, uploadId))
List<PartSummary> partSummaryList = partListing.getParts()
List<Integer> uploadSuccessIndex = partSummaryList.stream().map(s -> s.getPartNumber()).collect(Collectors.toList())
return uploadSuccessIndex
} catch (Exception e) {
return null
}
}
/**
* 分块上传
*
* @param bucket 桶
* @param key 对象名称
* @param uploadId 上传id
* @param i 分块索引
* @param positions
* @param file 文件
* @param minPartSize 每块文件大小
* @param size 文件总长度
* @param amazonS3Client
*/
private static void uploadTask(String bucket, String key, String uploadId, int i, List<Long> positions, File file, long minPartSize, long size, AmazonS3 amazonS3Client) {
long time1 = System.currentTimeMillis()
UploadPartRequest uploadRequest = new UploadPartRequest()
.withBucketName(bucket)
.withKey(key)
.withUploadId(uploadId)
.withPartNumber(i + 1)
.withFileOffset(positions.get(i))
.withFile(file)
.withPartSize(Math.min(minPartSize, (size - positions.get(i))))
// 第二步,上传分段,
PartETag partEtag = amazonS3Client.uploadPart(uploadRequest).getPartETag()
long time2 = System.currentTimeMillis()
//log.info("第{}段上传耗时:{},partEtag:{}", i + 1, (time2 - time1), partEtag.getETag() + "---" + partEtag.getPartNumber())
}
//保存文件分段信息
private static void saveUplaodInfo(String md5, String uploadId, long size, long minPartSize, int chunkCount, String originFile, String targetFile) {
FileUploadInfo fileUploadInfo = new FileUploadInfo()
fileUploadInfo.setMd5(md5)
fileUploadInfo.setUploadId(uploadId)
fileUploadInfo.setSize(size)
fileUploadInfo.setChunkSize(minPartSize)
fileUploadInfo.setChunkCount(chunkCount)
fileUploadInfo.setOriginFile(originFile)
fileUploadInfo.setTargetFile(targetFile)
fileUploadInfo.setCreatedAt(new Date())
fileUploadInfo.setStatus("未完成")
fileUploadInfoService.save(fileUploadInfo)
}
/**
* 获取uploadId
*
* @param bucketName
* @param s3Key
* @return
*/
public static String getUploadId(String bucketName, String s3Key, AmazonS3 client) {
InitiateMultipartUploadRequest initRequest = new InitiateMultipartUploadRequest(bucketName, s3Key)
InitiateMultipartUploadResult initResponse = client.initiateMultipartUpload(initRequest)
return initResponse.getUploadId()
}
/**
* 计算每个分片数据的开始位置
* 得到总共的段数,和 分段后,每个段的开始上传的字节位置
*
* @param size 文件总大小
* @param minPartSize 每块文件大小
* @return
*/
public static List<Long> getPositions(long size, long minPartSize) {
List<Long> positions = new ArrayList<>()
long filePosition = 0
while (filePosition < size) {
positions.add(filePosition)
filePosition += Math.min(minPartSize, (size - filePosition))
}
return positions
}
六、大文件分块下载,支持断点下载
public static void main(String[] args) {
//串行下载
downFileFromS3()
//并行下载
concurrenDownFileFromS3()
}
/**
* 从S3下载文件到指定目录,分块下载,断点下载
* 串行
*
* @param bucket 桶
* @param key 对象的key
* @param path 下载路径,以“/”结尾
* @param fileName 文件名称
* @param client
*/
public void downFileFromS3(String bucket, String key, String path, String fileName, AmazonS3 client) {
//临时块文件目录,临时元数据文件夹目录
String partTmp = path + "part" + "/"
ObjectMetadata objectMetadata = client.getObjectMetadata(bucket, key)
String sha256 = objectMetadata.getUserMetadata().get("sha256")
//文件总长度
long size = objectMetadata.getContentLength()
//每快大小
long minPartSize = 100 * 1024 * 1024
//举例,数据被切分成10快,产生11个切点
List<Long> positions = getSplitPoint(size, minPartSize)
//log.info("总大小:{},分为{}段", size, positions.size() - 1)
//查询目录下是否有元数据文件
List<String> fileNameList = getFileName(partTmp)
if (CollectionUtils.isEmpty(fileNameList)) {
//没有下载过
FileMetaData fileMetaData = new FileMetaData()
fileMetaData.setMd5(sha256)
fileMetaData.setSize(size)
fileMetaData.setChunkCount(positions.size() - 1)
fileMetaData.setChunkSize(minPartSize)
//元数文件存储到相同目录
FileUtil.writeData(partTmp, sha256 + ".tmp", JSON.toJSON(fileMetaData))
}
for (int i = 1
String partFileName = sha256 + ".part" + i
long tempFileSize = new File(partTmp + partFileName).length()
if (!CollectionUtils.isEmpty(fileNameList) && fileNameList.contains(partFileName) && tempFileSize >= minPartSize) {
//log.info("{}文件,第{}块已存在", key, i)
continue
}
long start = positions.get(i - 1)
long end = positions.get(i)
downPart(i, start, end, client, bucket, key, partTmp + partFileName)
}
//合并
mergeFiles(partTmp, positions.size() - 1, sha256, path, fileName)
//删除块文件,元数据文件
deleteDirectory(new File(partTmp))
//log.info("{}文件下载完成,耗时:{}", key, System.currentTimeMillis() - t)
}
/**
* 从S3下载文件到指定目录,分块下载,断点下载
* 并行
*
* @param bucket 桶
* @param key 对象的key
* @param path 下载路径,以“/”结尾
* @param fileName 文件名称
* @param client
*/
public static void concurrenDownFileFromS3(String bucket, String key, String path, String fileName, AmazonS3 client) {
//临时块文件目录,临时元数据文件夹目录
long t = System.currentTimeMillis()
String partTmp = path + "part" + CommonUtil.getSeparator()
ObjectMetadata objectMetadata = client.getObjectMetadata(bucket, key)
String sha256 = objectMetadata.getUserMetadata().get("sha256")
//文件总长度
long size = objectMetadata.getContentLength()
//每快大小
long minPartSize = 100 * 1024 * 1024
//举例,数据被切分成10快,产生11个切点
List<Long> positions = getSplitPoint(size, minPartSize)
//log.info("总大小:{},分为{}段", size, positions.size() - 1)
//查询目录下是否有元数据文件
List<String> fileNameList = getFileName(partTmp)
if (CollectionUtils.isEmpty(fileNameList)) {
//没有下载过
FileMetaData fileMetaData = new FileMetaData()
fileMetaData.setMd5(sha256)
fileMetaData.setSize(size)
fileMetaData.setChunkCount(positions.size() - 1)
fileMetaData.setChunkSize(minPartSize)
//元数文件存储到相同目录
FileUtil.writeData(partTmp, sha256 + ".tmp", JSON.toJSON(fileMetaData))
}
Map<Integer, String> dataChunk = getDataChunk(positions)
ThreadPoolExecutor taskPool = new ThreadPoolExecutor(10, 20, 1, TimeUnit.SECONDS, new ArrayBlockingQueue<>(10000), new ThreadPoolExecutor.CallerRunsPolicy())
CompletableFuture[] futures = new CompletableFuture[dataChunk.size()]
for (int i = 1
String partFileName = sha256 + ".part" + i
long tempFileSize = new File(partTmp + partFileName).length()
if (!CollectionUtils.isEmpty(fileNameList) && fileNameList.contains(partFileName) && tempFileSize >= minPartSize) {
//log.info("{}文件,第{}块已存在", key, i)
continue
}
String position = dataChunk.get(i)
long start = Long.parseLong(position.split("_")[0])
long end = Long.parseLong(position.split("_")[1])
int finalI = i
CompletableFuture<Void> task = CompletableFuture.runAsync(() -> downPart(finalI, start, end, client, bucket, key, partTmp + partFileName), taskPool)
futures[i - 1] = task
}
CompletableFuture<Void> anyOfFuture = CompletableFuture.allOf(futures)
//等待所有任务完成或抛出异常
//log.info("等待线程执行完成")
anyOfFuture.join()
//合并
mergeFiles(partTmp, positions.size() - 1, sha256, path, fileName)
//删除块文件,元数据文件
deleteDirectory(new File(partTmp))
//log.info("{}文件下载完成,耗时:{}", key, System.currentTimeMillis() - t)
}
/**
* 分片索引和其对应的起始位置
*
* @param positions
* @return
*/
public static Map<Integer, String> getDataChunk(List<Long> positions) {
Map<Integer, String> map = new HashMap<>()
for (int i = 0
map.put(i + 1, positions.get(i) + "_" + positions.get(i + 1))
}
return map
}
/**
* 计算每个分片数据的开始位置
* 得到总共的段数,和 分段后,每个段的开始上传的字节位置
*
* @param size 文件总大小
* @param minPartSize 每块文件大小
* @return
*/
public static List<Long> getSplitPoint(long size, long minPartSize) {
List<Long> positions = new ArrayList<>()
long filePosition = 0
while (filePosition < size) {
positions.add(filePosition)
filePosition += Math.min(minPartSize, (size - filePosition))
}
positions.add(size)
return positions
}
public static List<String> getFileName(String path) {
List<String> list = new ArrayList<>()
File directory = new File(path)
File[] files = directory.listFiles()
if (files == null) {
return list
}
for (File file : files) {
if (!file.isDirectory()) {
list.add(file.getName())
}
}
return list
}
private static void downPart(int partIndex, long start, long end, AmazonS3 client, String bucket, String key, String partFile) {
long time = System.currentTimeMillis()
GetObjectRequest request = new GetObjectRequest(bucket, key)
request.setRange(start, end - 1)
S3Object s3Object = client.getObject(request)
S3ObjectInputStream objectInputStream = s3Object.getObjectContent()
try {
FileOutputStream fos = new FileOutputStream(new File(partFile))
byte[] read_buf = new byte[1024]
int read_len = 0
while ((read_len = objectInputStream.read(read_buf)) > 0) {
fos.write(read_buf, 0, read_len)
}
objectInputStream.close()
fos.close()
s3Object.close()
//log.info("第[{}]块,从{}--{}下载完成,耗时:{}", partIndex, start, end - 1, System.currentTimeMillis() - time)
} catch (Exception e) {
//log.info("从{}--{}下载失败,异常:{}", start, end, e)
//log.error("[{}]文件,第[{}]块下载失败,{}", key, partIndex, e)
}
}
/**
* @param sourceDir 分片文件目录
* @param numParts 文件分片数量
* @param fileName 分片文件名字
* @param targetFileName 合并后文件名字
*/
public static void mergeFiles(String sourceDir, int numParts, String fileName, String targetDir, String targetFileName) {
//合并后的文件
String targetFile = targetDir + "/" + targetFileName
try (FileOutputStream fos = new FileOutputStream(targetFile)
BufferedOutputStream mergingStream = new BufferedOutputStream(fos)) {
for (int i = 1
String partFileName = sourceDir + "/" + fileName + ".part" + i
try (FileInputStream fis = new FileInputStream(partFileName)
BufferedInputStream bis = new BufferedInputStream(fis)) {
byte[] buffer = new byte[1024]
int read
while ((read = bis.read(buffer, 0, 1024)) != -1) {
mergingStream.write(buffer, 0, read)
}
}
}
} catch (IOException e) {
e.printStackTrace()
}
}
/**
* 删除目录及目录下文件
*
* @param directory
*/
public static void deleteDirectory(File directory) {
try {
if (directory.exists()) {
FileUtils.deleteDirectory(directory)
}
} catch (IOException e) {
//log.error("删除文件失败:{}",e)
}
}