阿里云oss存储,在golang中使用minio的sdk做断点续传

1,438 阅读4分钟

什么是断点续传?

就是上传文件时,不必重头开始上传,而是从指定的位置继续上传,这样的功能就叫做断点续传。

如何完成断点续传

要完成断点续传,需要对文件进行分割,就是按照分片大小将文件分成多个分片,将这些分片进行上传,再次上传的时候就需要知道已经上传了那些分片,然后再次上传就只用上传未完成的分片即可。

阿里云oss断点续传

func main() { 
// 创建OSSClient实例。 
// yourEndpoint填写Bucket对应的Endpoint,以华东1(杭州)为例,填写为https://oss-cn-hangzhou.aliyuncs.com。其它Region请按实际情况填写。 
// 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。 
client, err := oss.New("yourEndpoint", "yourAccessKeyId", "yourAccessKeySecret") 
if err != nil {
    fmt.Println("Error:", err) os.Exit(-1) 
} 
// 填写Bucket名称,例如examplebucket。 
bucket, err := client.Bucket("examplebucket") 
if err != nil { 
    fmt.Println("Error:", err) os.Exit(-1)
} 
// 设置分片大小为100 KB(100*1024),指定分片上传并发数为3,并开启断点续传上传。 
// yourObjectName填写Object完整路径,完整路径中不能包含Bucket名称,例如exampledir/exampleobject.txt。 
// yourLocalFile填写本地文件的完整路径,例如D:\\localpath\\examplefile.txt。如果未指定本地路径,则默认从示例程序所属项目对应本地路径中上传文件。 
err = bucket.UploadFile("exampledir/exampleobject.txt", "D:\\localpath\\examplefile.txt", 100*1024, oss.Routines(3), oss.Checkpoint(true, ""))
if err != nil { 
    fmt.Println("Error:", err) os.Exit(-1) } 
}

从阿里云的sdk来看,使用非常简单

使用minio sdk兼容

但是,我们的产品需要使用开源的minio,好消息是minio和阿里oss都是兼容s3

Amazon Simple Storage Service(Amazon S3)是一种对象存储服务,提供行业领先的可扩展性、数据可用性、安全性和性能。各种规模和行业的客户都可以使用 Amazon S3 存储和保护任意数量的数据,用于数据湖、网站、移动应用程序、备份和恢复、归档、企业应用程序、IoT 设备和大数据分析。Amazon S3 提供了管理功能,使您可以优化、组织和配置对数据的访问,以满足您的特定业务、组织和合规性要求。

minio 几个重要的接口

  1. 创建分片上传
func (c Core) NewMultipartUpload(ctx context.Context, bucket, object string, opts PutObjectOptions) (uploadID string, err error) {
   result, err := c.initiateMultipartUpload(ctx, bucket, object, opts)
   return result.UploadID, err
}
  1. 获取已上传分片
// ListObjectParts - List uploaded parts of an incomplete upload.x
func (c Core) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker int, maxParts int) (result ListObjectPartsResult, err error) {
   return c.listObjectPartsQuery(ctx, bucket, object, uploadID, partNumberMarker, maxParts)
}
  1. 合并分片
// CompleteMultipartUpload - Concatenate uploaded parts and commit to an object.
func (c Core) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []CompletePart, opts PutObjectOptions) (string, error) {
   res, err := c.completeMultipartUpload(ctx, bucket, object, uploadID, completeMultipartUpload{
      Parts: parts,
   }, opts)
   return res.ETag, err
}
  1. 上传分片
// PutObjectPart - Upload an object part.
func (c Core) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data io.Reader, size int64, md5Base64, sha256Hex string, sse encrypt.ServerSide) (ObjectPart, error) {
   streamSha256 := true
   return c.uploadPart(ctx, bucket, object, uploadID, data, partID, md5Base64, sha256Hex, size, sse, streamSha256)
}

分片上传主要流程

本地存在checkPoint文件

1. 检查是否相同文件
2. 获取已上传分片
3. 继续上传未上传分片
4. 合并分片

不存在checkPoint文件

1. 创建分片上传
2. 对文件进行分片
3. 上传分片
4. 合并分片 

主要代码片段

import (
   "bytes"
   "context"
   "encoding/json"
   "io/fs"
   "io/ioutil"
   "os"
   "sort"
   "strings"

   "github.com/minio/minio-go/v7"
   "github.com/minio/minio-go/v7/pkg/credentials"
   "github.com/minio/minio-go/v7/pkg/encrypt"
   "github.com/sirupsen/logrus"
)

var helper *MinioHelper

const (
   ChunkPartSize = 1024 * 1024 * 5
   FilePermMode  = os.FileMode(0664)
)

func InitMinio() {
   core, err := minio.NewCore(endPoint, &minio.Options{
      Creds:  credentials.NewStaticV4(AccessKeyID, SecretAccessKey, ""),
      Secure: Secure,
   })
   if err != nil {
      panic(err.Error())
   }

   helper = &MinioHelper{
      MinioCore: core,
   }
}

type MultipartFile struct {
   UploadId       string               `json:"uploadId"`
   BucketName     string               `json:"bucketName"`
   FileInfo       os.FileInfo          `json:"fileInfo"`
   CompletedParts []minio.CompletePart `json:"-"`
}

type fileChunk struct {
   PartNumber int
   Start      int
   End        int
}

func (cp *MultipartFile) getChunks(ctx context.Context, chunkCount int) ([]fileChunk, error) {
   // 初始化长度为分片数,并且为零值
   cp.CompletedParts = make([]minio.CompletePart, chunkCount, chunkCount)

   partInfos, err := listObjectParts(ctx, config.GetConfig().Minio.BucketName, cp.FileInfo.Name(), cp.UploadId)
   if err != nil {
      uploadId, err := helper.MinioCore.NewMultipartUpload(ctx, cp.BucketName, cp.FileInfo.Name(), minio.PutObjectOptions{})
      if err != nil {
         logrus.Errorf("NewMultipartUpload error: %v", err)
         return nil, err
      }
      cp.UploadId = uploadId
      return []fileChunk{}, nil
   }

   var completedPartMap = make(map[int]minio.ObjectPart)
   var chunks = make([]fileChunk, 0, chunkCount-len(partInfos))
   for _, partInfo := range partInfos {
      cp.CompletedParts[partInfo.PartNumber-1] = minio.CompletePart{PartNumber: partInfo.PartNumber, ETag: partInfo.ETag}
      completedPartMap[partInfo.PartNumber] = partInfo
   }

   for i := 0; i < chunkCount; i++ {
      if _, ok := completedPartMap[i+1]; ok {
         continue
      }
      var chunk fileChunk
      chunk.PartNumber = i + 1
      chunk.Start = i * ChunkPartSize
      end := chunk.Start + ChunkPartSize
      if i == chunkCount-1 {
         end = int(cp.FileInfo.Size())
      }
      chunk.End = end
      chunks = append(chunks, chunk)
   }
   return chunks, nil
}

func (cp *MultipartFile) load(ctx context.Context, filePath string, bucketName string, fileInfo os.FileInfo) (string, error) {
   cpFilePath := filePath + ".cp"
   cpFile, err := os.Open(cpFilePath)
   defer cpFile.Close()
   if err != nil {
      logrus.Errorf("open cpFile failed: %v, newMultipart", err)
      cp.FileInfo = fileInfo
      err = cp.newMultipart(ctx, bucketName)
      if err != nil {
         logrus.Errorf("newMultipart failed: %v", err)
         return "", err
      }
   } else {
      cpFileBytes, err := ioutil.ReadAll(cpFile)
      if err != nil {
         logrus.Errorf("read cpFile failed: %v", err)
         return "", err
      }

      err = json.Unmarshal(cpFileBytes, cp)
      if err != nil {
         logrus.Errorf("unmarshal cpFile failed: %v", err)
         return "", err
      }

      // 判断文是否相同,否则重新创建分片上传
      if !cp.isSameFile(fileInfo, bucketName) {
         cp.FileInfo = fileInfo
         err = cp.newMultipart(ctx, bucketName)
         if err != nil {
            logrus.Errorf("newMultipart failed: %v", err)
            return "", err
         }
      }
   }
   return cpFilePath, nil
}

func (cp *MultipartFile) newMultipart(ctx context.Context, bucketName string) error {
   uploadId, err := helper.MinioCore.NewMultipartUpload(ctx, bucketName, cp.FileInfo.Name(), minio.PutObjectOptions{})
   if err != nil {
      logrus.Errorf("NewMultipartUpload error: %v", err)
      return err
   }
   cp.UploadId = uploadId
   cp.BucketName = bucketName
   return nil
}

func (cp *MultipartFile) dump(cpFilePath string) error {
   cpFileBytes, err := json.Marshal(cp)
   if err != nil {
      return err
   }
   err = ioutil.WriteFile(cpFilePath, cpFileBytes, FilePermMode)
   if err != nil {
      return err
   }
   return nil
}

func (cp *MultipartFile) isSameFile(fileInfo fs.FileInfo, bucketName string) bool {
   return cp.FileInfo.Name() == fileInfo.Name() &&
      cp.FileInfo.Size() == fileInfo.Size() &&
      cp.FileInfo.ModTime().Equal(fileInfo.ModTime()) &&
      cp.BucketName != bucketName
}

type MinioHelper struct {
   MinioCore *minio.Core
}

func (helper *MinioHelper) Upload(ctx context.Context, filePath string) error {
   bucketName := config.GetConfig().Minio.BucketName
   openFile, err := os.Open(filePath)
   defer openFile.Close()
   if err != nil {
      logrus.Errorf("openfile failed: %v", err)
      return err
   }

   fileInfo, err := openFile.Stat()
   if err != nil {
      logrus.Errorf("get openfile statInfo error: %v", err)
      return err
   }
   chunkCount := int(fileInfo.Size()) / ChunkPartSize
   if chunkCount <= 1 {
      return uploadFile(ctx, bucketName, filePath, fileInfo)
   }

   return uploadFileWithCP(ctx, filePath, bucketName, chunkCount)
}

func GetMinioHelper() *MinioHelper {
   return helper
}

func isObjectExist(ctx context.Context, bucketName string, objectName string, fileSize int64) bool {
   objInfo, err := helper.MinioCore.StatObject(ctx, bucketName, objectName, minio.StatObjectOptions{})
   if err != nil {
      logrus.Errorf("get object error, bucket: %s, obj: %s, error: %s", bucketName, objectName, err)
      return false
   }
   logrus.Infoln("exist file info: ", objInfo)
   return fileSize == objInfo.Size
}

// listObjectParts 列出已上传的分片
func listObjectParts(ctx context.Context, bucketName, objectName, uploadID string) (map[int]minio.ObjectPart, error) {
   // Part number marker for the next batch of request.
   var nextPartNumberMarker int
   var partsInfo = make(map[int]minio.ObjectPart)
   for {
      // Get list of uploaded parts a maximum of 1000 per request.
      listObjPartsResult, err := helper.MinioCore.ListObjectParts(ctx, bucketName, objectName, uploadID, nextPartNumberMarker, 1000)
      if err != nil {
         return nil, err
      }
      for _, part := range listObjPartsResult.ObjectParts {
         // Trim off the odd double quotes from ETag in the beginning and end.
         part.ETag = strings.TrimPrefix(part.ETag, """)
         part.ETag = strings.TrimSuffix(part.ETag, """)
         partsInfo[part.PartNumber] = part
      }
      // Keep part number marker, for the next iteration.
      nextPartNumberMarker = listObjPartsResult.NextPartNumberMarker
      // Listing ends result is not truncated, return right here.
      if !listObjPartsResult.IsTruncated {
         break
      }
   }

   // Return all the parts.
   return partsInfo, nil
}

// uploadFileWithCP 通过checkPoint文件上传
func uploadFileWithCP(ctx context.Context, filePath, bucketName string, chunkCount int) error {
   openFile, err := os.Open(filePath)
   defer openFile.Close()
   if err != nil {
      return err
   }
   fileInfo, err := openFile.Stat()
   if err != nil {
      return err
   }

   // 获取分片上传信息
   var cpFile = new(MultipartFile)
   cpFilePath, err := cpFile.load(ctx, filePath, bucketName, fileInfo)
   if err != nil {
      return err
   }

   // 查询已上传分片
   chunks, err := cpFile.getChunks(ctx, chunkCount)
   if err != nil {
      return err
   }

   fileBytes, err := ioutil.ReadAll(openFile)
   if err != nil {
      return err
   }

   // 继续上传未上传分片
   var jobs = make(chan fileChunk, len(chunks))
   var results = make(chan minio.CompletePart, len(chunks))
   var failed = make(chan error)
   var die = make(chan bool)

   // 分片上传
   routines := 5
   if len(chunks) < 5 {
      routines = len(chunks)
   }
   for i := 0; i < routines; i++ {
      go work(ctx, jobs, fileBytes, bucketName, fileInfo.Name(), cpFile.UploadId, failed, results, die)
   }

   go scheduler(jobs, chunks)

   completed := 0
   for completed < len(chunks) {
      select {
      case part := <-results:
         completed++
         cpFile.CompletedParts[part.PartNumber-1] = part
         cpFile.dump(cpFilePath)
      case err := <-failed:
         close(die) // 停止worker
         return err
      }

      if completed >= len(chunks) {
         break
      }
   }

   return complete(ctx, bucketName, cpFile, cpFilePath)
}

// uploadFile 不分片直接上传
func uploadFile(ctx context.Context, bucketName, filePath string, fileInfo os.FileInfo) error {
   // 只有一片直接上传
   openFile, err := os.Open(filePath)
   defer openFile.Close()
   if err != nil {
      return err
   }
   _, err = helper.MinioCore.PutObject(ctx, bucketName, fileInfo.Name(), openFile, fileInfo.Size(), "", "", minio.PutObjectOptions{})
   if err != nil {
      logrus.Errorf("put object error: %v", err)
      return err
   }
   return nil
}

// chunkUpload 上传分片
func chunkUpload(ctx context.Context, buf []byte, bucketName string, fileName, uploadId string, partNumber int) (minio.CompletePart, error) {
   buffer := bytes.Buffer{}
   buffer.Write(buf)
   objectPart, err := helper.MinioCore.PutObjectPart(ctx, bucketName, fileName, uploadId, partNumber, &buffer, int64(buffer.Len()), "", "", encrypt.NewSSE())
   if err != nil {
      logrus.Errorf("read buff err: %s", err)
      return minio.CompletePart{}, err
   }
   logrus.Infoln("upload chunk success, objectPart PartNumber:", objectPart.PartNumber)
   return minio.CompletePart{
      ETag:       objectPart.ETag,
      PartNumber: objectPart.PartNumber,
   }, nil
}

// work 分片上传worker
func work(ctx context.Context, parts <-chan fileChunk, fileBytes []byte, bucketName, fileName string, uploadId string, failed chan error, results chan minio.CompletePart, die chan bool) {
   for part := range parts {
      logrus.Infoln("upload chunk chunkNumber:", part.PartNumber)
      completePart, err := chunkUpload(ctx, fileBytes[part.Start:part.End], bucketName, fileName, uploadId, part.PartNumber)
      if err != nil {
         logrus.Errorf("upload chunk error: %s, chunkNumber: %d", err, part.PartNumber)
         failed <- err
         break
      }
      select {
      case <-die:
         return
      default:
      }
      results <- completePart
   }

}

// scheduler function
func scheduler(jobs chan fileChunk, chunks []fileChunk) {
   for _, chunk := range chunks {
      jobs <- chunk
   }
   close(jobs)
}

// complete 合并分片
func complete(ctx context.Context, bucketName string, cpFile *MultipartFile, cpFilePath string) error {
   sort.Slice(cpFile.CompletedParts, func(i, j int) bool {
      return cpFile.CompletedParts[i].PartNumber < cpFile.CompletedParts[j].PartNumber
   })
   _, err := helper.MinioCore.CompleteMultipartUpload(ctx, bucketName, cpFile.FileInfo.Name(), cpFile.UploadId, cpFile.CompletedParts, minio.PutObjectOptions{})
   if err != nil {
      logrus.Errorf("CompleteMultipartUpload err: %s", err)
      return err
   }
   err = os.Remove(cpFilePath)
   if err != nil {
      return err
   }
   return nil
}