什么是断点续传?
就是上传文件时,不必重头开始上传,而是从指定的位置继续上传,这样的功能就叫做断点续传。
如何完成断点续传
要完成断点续传,需要对文件进行分割,就是按照分片大小将文件分成多个分片,将这些分片进行上传,再次上传的时候就需要知道已经上传了那些分片,然后再次上传就只用上传未完成的分片即可。
阿里云oss断点续传
func main() {
// 创建OSSClient实例。
// yourEndpoint填写Bucket对应的Endpoint,以华东1(杭州)为例,填写为https://oss-cn-hangzhou.aliyuncs.com。其它Region请按实际情况填写。
// 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
client, err := oss.New("yourEndpoint", "yourAccessKeyId", "yourAccessKeySecret")
if err != nil {
fmt.Println("Error:", err) os.Exit(-1)
}
// 填写Bucket名称,例如examplebucket。
bucket, err := client.Bucket("examplebucket")
if err != nil {
fmt.Println("Error:", err) os.Exit(-1)
}
// 设置分片大小为100 KB(100*1024),指定分片上传并发数为3,并开启断点续传上传。
// yourObjectName填写Object完整路径,完整路径中不能包含Bucket名称,例如exampledir/exampleobject.txt。
// yourLocalFile填写本地文件的完整路径,例如D:\\localpath\\examplefile.txt。如果未指定本地路径,则默认从示例程序所属项目对应本地路径中上传文件。
err = bucket.UploadFile("exampledir/exampleobject.txt", "D:\\localpath\\examplefile.txt", 100*1024, oss.Routines(3), oss.Checkpoint(true, ""))
if err != nil {
fmt.Println("Error:", err) os.Exit(-1) }
}
从阿里云的sdk来看,使用非常简单
使用minio sdk兼容
但是,我们的产品需要使用开源的minio,好消息是minio和阿里oss都是兼容s3
Amazon Simple Storage Service(Amazon S3)是一种对象存储服务,提供行业领先的可扩展性、数据可用性、安全性和性能。各种规模和行业的客户都可以使用 Amazon S3 存储和保护任意数量的数据,用于数据湖、网站、移动应用程序、备份和恢复、归档、企业应用程序、IoT 设备和大数据分析。Amazon S3 提供了管理功能,使您可以优化、组织和配置对数据的访问,以满足您的特定业务、组织和合规性要求。
minio 几个重要的接口
- 创建分片上传
func (c Core) NewMultipartUpload(ctx context.Context, bucket, object string, opts PutObjectOptions) (uploadID string, err error) {
result, err := c.initiateMultipartUpload(ctx, bucket, object, opts)
return result.UploadID, err
}
- 获取已上传分片
// ListObjectParts - List uploaded parts of an incomplete upload.x
func (c Core) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker int, maxParts int) (result ListObjectPartsResult, err error) {
return c.listObjectPartsQuery(ctx, bucket, object, uploadID, partNumberMarker, maxParts)
}
- 合并分片
// CompleteMultipartUpload - Concatenate uploaded parts and commit to an object.
func (c Core) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []CompletePart, opts PutObjectOptions) (string, error) {
res, err := c.completeMultipartUpload(ctx, bucket, object, uploadID, completeMultipartUpload{
Parts: parts,
}, opts)
return res.ETag, err
}
- 上传分片
// PutObjectPart - Upload an object part.
func (c Core) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data io.Reader, size int64, md5Base64, sha256Hex string, sse encrypt.ServerSide) (ObjectPart, error) {
streamSha256 := true
return c.uploadPart(ctx, bucket, object, uploadID, data, partID, md5Base64, sha256Hex, size, sse, streamSha256)
}
分片上传主要流程
本地存在checkPoint文件
1. 检查是否相同文件
2. 获取已上传分片
3. 继续上传未上传分片
4. 合并分片
不存在checkPoint文件
1. 创建分片上传
2. 对文件进行分片
3. 上传分片
4. 合并分片
主要代码片段
import (
"bytes"
"context"
"encoding/json"
"io/fs"
"io/ioutil"
"os"
"sort"
"strings"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
"github.com/minio/minio-go/v7/pkg/encrypt"
"github.com/sirupsen/logrus"
)
var helper *MinioHelper
const (
ChunkPartSize = 1024 * 1024 * 5
FilePermMode = os.FileMode(0664)
)
func InitMinio() {
core, err := minio.NewCore(endPoint, &minio.Options{
Creds: credentials.NewStaticV4(AccessKeyID, SecretAccessKey, ""),
Secure: Secure,
})
if err != nil {
panic(err.Error())
}
helper = &MinioHelper{
MinioCore: core,
}
}
type MultipartFile struct {
UploadId string `json:"uploadId"`
BucketName string `json:"bucketName"`
FileInfo os.FileInfo `json:"fileInfo"`
CompletedParts []minio.CompletePart `json:"-"`
}
type fileChunk struct {
PartNumber int
Start int
End int
}
func (cp *MultipartFile) getChunks(ctx context.Context, chunkCount int) ([]fileChunk, error) {
// 初始化长度为分片数,并且为零值
cp.CompletedParts = make([]minio.CompletePart, chunkCount, chunkCount)
partInfos, err := listObjectParts(ctx, config.GetConfig().Minio.BucketName, cp.FileInfo.Name(), cp.UploadId)
if err != nil {
uploadId, err := helper.MinioCore.NewMultipartUpload(ctx, cp.BucketName, cp.FileInfo.Name(), minio.PutObjectOptions{})
if err != nil {
logrus.Errorf("NewMultipartUpload error: %v", err)
return nil, err
}
cp.UploadId = uploadId
return []fileChunk{}, nil
}
var completedPartMap = make(map[int]minio.ObjectPart)
var chunks = make([]fileChunk, 0, chunkCount-len(partInfos))
for _, partInfo := range partInfos {
cp.CompletedParts[partInfo.PartNumber-1] = minio.CompletePart{PartNumber: partInfo.PartNumber, ETag: partInfo.ETag}
completedPartMap[partInfo.PartNumber] = partInfo
}
for i := 0; i < chunkCount; i++ {
if _, ok := completedPartMap[i+1]; ok {
continue
}
var chunk fileChunk
chunk.PartNumber = i + 1
chunk.Start = i * ChunkPartSize
end := chunk.Start + ChunkPartSize
if i == chunkCount-1 {
end = int(cp.FileInfo.Size())
}
chunk.End = end
chunks = append(chunks, chunk)
}
return chunks, nil
}
func (cp *MultipartFile) load(ctx context.Context, filePath string, bucketName string, fileInfo os.FileInfo) (string, error) {
cpFilePath := filePath + ".cp"
cpFile, err := os.Open(cpFilePath)
defer cpFile.Close()
if err != nil {
logrus.Errorf("open cpFile failed: %v, newMultipart", err)
cp.FileInfo = fileInfo
err = cp.newMultipart(ctx, bucketName)
if err != nil {
logrus.Errorf("newMultipart failed: %v", err)
return "", err
}
} else {
cpFileBytes, err := ioutil.ReadAll(cpFile)
if err != nil {
logrus.Errorf("read cpFile failed: %v", err)
return "", err
}
err = json.Unmarshal(cpFileBytes, cp)
if err != nil {
logrus.Errorf("unmarshal cpFile failed: %v", err)
return "", err
}
// 判断文是否相同,否则重新创建分片上传
if !cp.isSameFile(fileInfo, bucketName) {
cp.FileInfo = fileInfo
err = cp.newMultipart(ctx, bucketName)
if err != nil {
logrus.Errorf("newMultipart failed: %v", err)
return "", err
}
}
}
return cpFilePath, nil
}
func (cp *MultipartFile) newMultipart(ctx context.Context, bucketName string) error {
uploadId, err := helper.MinioCore.NewMultipartUpload(ctx, bucketName, cp.FileInfo.Name(), minio.PutObjectOptions{})
if err != nil {
logrus.Errorf("NewMultipartUpload error: %v", err)
return err
}
cp.UploadId = uploadId
cp.BucketName = bucketName
return nil
}
func (cp *MultipartFile) dump(cpFilePath string) error {
cpFileBytes, err := json.Marshal(cp)
if err != nil {
return err
}
err = ioutil.WriteFile(cpFilePath, cpFileBytes, FilePermMode)
if err != nil {
return err
}
return nil
}
func (cp *MultipartFile) isSameFile(fileInfo fs.FileInfo, bucketName string) bool {
return cp.FileInfo.Name() == fileInfo.Name() &&
cp.FileInfo.Size() == fileInfo.Size() &&
cp.FileInfo.ModTime().Equal(fileInfo.ModTime()) &&
cp.BucketName != bucketName
}
type MinioHelper struct {
MinioCore *minio.Core
}
func (helper *MinioHelper) Upload(ctx context.Context, filePath string) error {
bucketName := config.GetConfig().Minio.BucketName
openFile, err := os.Open(filePath)
defer openFile.Close()
if err != nil {
logrus.Errorf("openfile failed: %v", err)
return err
}
fileInfo, err := openFile.Stat()
if err != nil {
logrus.Errorf("get openfile statInfo error: %v", err)
return err
}
chunkCount := int(fileInfo.Size()) / ChunkPartSize
if chunkCount <= 1 {
return uploadFile(ctx, bucketName, filePath, fileInfo)
}
return uploadFileWithCP(ctx, filePath, bucketName, chunkCount)
}
func GetMinioHelper() *MinioHelper {
return helper
}
func isObjectExist(ctx context.Context, bucketName string, objectName string, fileSize int64) bool {
objInfo, err := helper.MinioCore.StatObject(ctx, bucketName, objectName, minio.StatObjectOptions{})
if err != nil {
logrus.Errorf("get object error, bucket: %s, obj: %s, error: %s", bucketName, objectName, err)
return false
}
logrus.Infoln("exist file info: ", objInfo)
return fileSize == objInfo.Size
}
// listObjectParts 列出已上传的分片
func listObjectParts(ctx context.Context, bucketName, objectName, uploadID string) (map[int]minio.ObjectPart, error) {
// Part number marker for the next batch of request.
var nextPartNumberMarker int
var partsInfo = make(map[int]minio.ObjectPart)
for {
// Get list of uploaded parts a maximum of 1000 per request.
listObjPartsResult, err := helper.MinioCore.ListObjectParts(ctx, bucketName, objectName, uploadID, nextPartNumberMarker, 1000)
if err != nil {
return nil, err
}
for _, part := range listObjPartsResult.ObjectParts {
// Trim off the odd double quotes from ETag in the beginning and end.
part.ETag = strings.TrimPrefix(part.ETag, """)
part.ETag = strings.TrimSuffix(part.ETag, """)
partsInfo[part.PartNumber] = part
}
// Keep part number marker, for the next iteration.
nextPartNumberMarker = listObjPartsResult.NextPartNumberMarker
// Listing ends result is not truncated, return right here.
if !listObjPartsResult.IsTruncated {
break
}
}
// Return all the parts.
return partsInfo, nil
}
// uploadFileWithCP 通过checkPoint文件上传
func uploadFileWithCP(ctx context.Context, filePath, bucketName string, chunkCount int) error {
openFile, err := os.Open(filePath)
defer openFile.Close()
if err != nil {
return err
}
fileInfo, err := openFile.Stat()
if err != nil {
return err
}
// 获取分片上传信息
var cpFile = new(MultipartFile)
cpFilePath, err := cpFile.load(ctx, filePath, bucketName, fileInfo)
if err != nil {
return err
}
// 查询已上传分片
chunks, err := cpFile.getChunks(ctx, chunkCount)
if err != nil {
return err
}
fileBytes, err := ioutil.ReadAll(openFile)
if err != nil {
return err
}
// 继续上传未上传分片
var jobs = make(chan fileChunk, len(chunks))
var results = make(chan minio.CompletePart, len(chunks))
var failed = make(chan error)
var die = make(chan bool)
// 分片上传
routines := 5
if len(chunks) < 5 {
routines = len(chunks)
}
for i := 0; i < routines; i++ {
go work(ctx, jobs, fileBytes, bucketName, fileInfo.Name(), cpFile.UploadId, failed, results, die)
}
go scheduler(jobs, chunks)
completed := 0
for completed < len(chunks) {
select {
case part := <-results:
completed++
cpFile.CompletedParts[part.PartNumber-1] = part
cpFile.dump(cpFilePath)
case err := <-failed:
close(die) // 停止worker
return err
}
if completed >= len(chunks) {
break
}
}
return complete(ctx, bucketName, cpFile, cpFilePath)
}
// uploadFile 不分片直接上传
func uploadFile(ctx context.Context, bucketName, filePath string, fileInfo os.FileInfo) error {
// 只有一片直接上传
openFile, err := os.Open(filePath)
defer openFile.Close()
if err != nil {
return err
}
_, err = helper.MinioCore.PutObject(ctx, bucketName, fileInfo.Name(), openFile, fileInfo.Size(), "", "", minio.PutObjectOptions{})
if err != nil {
logrus.Errorf("put object error: %v", err)
return err
}
return nil
}
// chunkUpload 上传分片
func chunkUpload(ctx context.Context, buf []byte, bucketName string, fileName, uploadId string, partNumber int) (minio.CompletePart, error) {
buffer := bytes.Buffer{}
buffer.Write(buf)
objectPart, err := helper.MinioCore.PutObjectPart(ctx, bucketName, fileName, uploadId, partNumber, &buffer, int64(buffer.Len()), "", "", encrypt.NewSSE())
if err != nil {
logrus.Errorf("read buff err: %s", err)
return minio.CompletePart{}, err
}
logrus.Infoln("upload chunk success, objectPart PartNumber:", objectPart.PartNumber)
return minio.CompletePart{
ETag: objectPart.ETag,
PartNumber: objectPart.PartNumber,
}, nil
}
// work 分片上传worker
func work(ctx context.Context, parts <-chan fileChunk, fileBytes []byte, bucketName, fileName string, uploadId string, failed chan error, results chan minio.CompletePart, die chan bool) {
for part := range parts {
logrus.Infoln("upload chunk chunkNumber:", part.PartNumber)
completePart, err := chunkUpload(ctx, fileBytes[part.Start:part.End], bucketName, fileName, uploadId, part.PartNumber)
if err != nil {
logrus.Errorf("upload chunk error: %s, chunkNumber: %d", err, part.PartNumber)
failed <- err
break
}
select {
case <-die:
return
default:
}
results <- completePart
}
}
// scheduler function
func scheduler(jobs chan fileChunk, chunks []fileChunk) {
for _, chunk := range chunks {
jobs <- chunk
}
close(jobs)
}
// complete 合并分片
func complete(ctx context.Context, bucketName string, cpFile *MultipartFile, cpFilePath string) error {
sort.Slice(cpFile.CompletedParts, func(i, j int) bool {
return cpFile.CompletedParts[i].PartNumber < cpFile.CompletedParts[j].PartNumber
})
_, err := helper.MinioCore.CompleteMultipartUpload(ctx, bucketName, cpFile.FileInfo.Name(), cpFile.UploadId, cpFile.CompletedParts, minio.PutObjectOptions{})
if err != nil {
logrus.Errorf("CompleteMultipartUpload err: %s", err)
return err
}
err = os.Remove(cpFilePath)
if err != nil {
return err
}
return nil
}