esbuild源码分析(二)chunk构建流程

74 阅读5分钟

前文已经深入探讨了esbuild的Token解析和AST构建过程,相信您已经对esbuild的工作流程有了相当的了解。本文将进一步讨论这一流程的另一关键环节:chunk构建和输出bundle文件。

test.png

这一阶段的构建过程发生在bundle.Compile函数中,下面来看一下Compile函数的执行过程(这里只保留核心代码)

func (b *Bundle) Compile(log logger.Log, timer *helpers.Timer, mangleCache map[string]interface{}, link Linker) ([]graph.OutputFile, string) {


 files := make([]graph.InputFile, len(b.files))
 for i, file := range b.files {
  files[i] = file.inputFile
 }

 // 获取所有可达文件
 ancestor of all entry points
 allReachableFiles := findReachableFiles(files, b.entryPoints)

 // 生成sourceMap
 dataForSourceMaps := b.computeDataForSourceMapsInParallel(&options, allReachableFiles)

 var resultGroups [][]graph.OutputFile
 // 根据不同的config执行link流程,link是构建依赖关系及将文件合并为chunk的主要函数
 if options.CodeSplitting || len(b.entryPoints) == 1 {
  // 对于单入口文件会走下面的逻辑
  resultGroups = [][]graph.OutputFile{link(&options, timer, log, b.fs, b.res,
   files, b.entryPoints, b.uniqueKeyPrefix, allReachableFiles, dataForSourceMaps)}
 } else {
  // 多入口文件构建
  waitGroup := sync.WaitGroup{}

  serializer := helpers.MakeSerializer(len(b.entryPoints))
  for i, entryPoint := range b.entryPoints {
   waitGroup.Add(1)
   go func(i int, entryPoint graph.EntryPoint) {
    entryPoints := []graph.EntryPoint{entryPoint}
    forked := timer.Fork()
    resultGroups[i] = link(optionsPtr, forked, log, b.fs, b.res, files, entryPoints,
     b.uniqueKeyPrefix, findReachableFiles(files, entryPoints), dataForSourceMaps)
    timer.Join(forked)
    waitGroup.Done()
   }(i, entryPoint)
  }
  waitGroup.Wait()
 }
 // outputFiles是一个包含bundle结构的array
 return outputFiles, metafileJSON
}

扫描可达文件

func findReachableFiles(files []graph.InputFile, entryPoints []graph.EntryPoint) []uint32 {
// visitedk用来记录是否访问过
 visited := make(map[uint32]bool)
 var order []uint32
 var visit func(uint32)

 visit = func(sourceIndex uint32) {
  if !visited[sourceIndex] {
   //第一次visit后设置为true
   visited[sourceIndex] = true
   file := &files[sourceIndex]
   if recordsPtr := file.Repr.ImportRecords(); recordsPtr != nil {
    // 遍历file内所有import语句
    for _, record := range *recordsPtr {
     if record.SourceIndex.IsValid() {
      visit(record.SourceIndex.GetIndex())
     } else if record.CopySourceIndex.IsValid() {
      visit(record.CopySourceIndex.GetIndex())
     }
    }
   }
   order = append(order, sourceIndex)
  }
 }
 visit(runtime.SourceIndex)
 
 //根据entryPoint启动查找过程
 for _, entryPoint := range entryPoints {
  visit(entryPoint.SourceIndex)
 }

 return order
}

"findReachableFiles" 内部是递归流程,用于查找所有被引用文件。在函数内部,它使用一个映射(map)来确保不会重复访问文件,每当访问一个文件时,会根据该文件内import语句继续遍历其他文件。

Link

func Link(
 options *config.Options,
 timer *helpers.Timer,
 log logger.Log,
 fs fs.FS,
 res *resolver.Resolver,
 inputFiles []graph.InputFile,
 entryPoints []graph.EntryPoint,
 uniqueKeyPrefix string,
 reachableFiles []uint32,
 dataForSourceMaps func() []bundler.DataForSourceMap,
) []graph.OutputFile {

 timer.Begin("Clone linker graph")
 // linkerContext是贯穿整个link过程的重要对象
 c := linkerContext{
  options:              options,
  timer:                timer,
  log:                  log,
  fs:                   fs,
  res:                  res,
  dataForSourceMaps:    dataForSourceMaps,
  uniqueKeyPrefix:      uniqueKeyPrefix,
  uniqueKeyPrefixBytes: []byte(uniqueKeyPrefix),
  // graph包含之前构建的AST,entryPoints,allReachFiles等信息
  graph: graph.CloneLinkerGraph(
   inputFiles,
   reachableFiles,
   entryPoints,
   options.CodeSplitting,
  ),
 }
 
 var additionalFiles []graph.OutputFile
 for _, entryPoint := range entryPoints {
  file := &c.graph.Files[entryPoint.SourceIndex].InputFile
  switch repr := file.Repr.(type) {
  case *graph.JSRepr:

   if repr.AST.HasLazyExport && (c.options.Mode == config.ModePassThrough ||
    (c.options.Mode == config.ModeConvertFormat && !c.options.OutputFormat.KeepESMImportExportSyntax())) {
    repr.AST.ExportsKind = js_ast.ExportsCommonJS
   }
   if repr.AST.ExportKeyword.Len > 0 && (options.OutputFormat == config.FormatCommonJS ||
    (options.OutputFormat == config.FormatIIFE && len(options.GlobalName) > 0)) {
    repr.AST.UsesExportsRef = true
    repr.Meta.ForceIncludeExportsForEntryPoint = true
   }

  case *graph.CopyRepr:
   additionalFiles = append(additionalFiles, file.AdditionalFiles...)
  }
 }

 //扫描import和export语句,对于不同格式(esm和cjs)的import需要在产物外包裹不同的函数,其次scanImportsAndExports还要对import和export进行匹配,保证对应关系
 c.scanImportsAndExports()
 
 // 标记文件见的引用关系
 c.treeShakingAndCodeSplitting()

 //生成chunk,主要包含file,entryPoint,目标chunk之间的关系。该函数用于确定chunk内需要包含哪些内容
 c.computeChunks()
 // computeChunks只能处理单一依赖关系,对于一个文件被多个文件引用的情况需要二次扫描
 c.computeCrossChunkDependencies()

//生成chunk
 return c.generateChunksInParallel(additionalFiles)
}

Link内的主要流程包括:

  • 生成linkContext对象,linkContext内包含AST,config等重要信息
  • scanImportsAndExports,扫描import&export,确定产物格式
  • treeShakingAndCodeSplitting,标记文件引用关系
  • computeChunks,初步生成chunk对象,确定chunk内包含哪些文件及内容
  • computeCrossChunkDependencies,处理跨模块依赖

test.png

扫描导入导出语句

func (c *linkerContext) scanImportsAndExports() 

 // 找到需要以cjs格式输出的模块
 for _, sourceIndex := range c.graph.ReachableFiles {
  file := &c.graph.Files[sourceIndex]
  additionalFiles := file.InputFile.AdditionalFiles

  switch repr := file.InputFile.Repr.(type) {
  case *graph.JSRepr:
   for importRecordIndex := range repr.AST.ImportRecords {
    record := &repr.AST.ImportRecords[importRecordIndex]
    if !record.SourceIndex.IsValid() {
     if record.CopySourceIndex.IsValid() {
      otherFile := &c.graph.Files[record.CopySourceIndex.GetIndex()]
      if otherRepr, ok := otherFile.InputFile.Repr.(*graph.CopyRepr); ok {
       record.Path.Text = otherRepr.URLForCode
       record.Path.Namespace = ""
       record.CopySourceIndex = ast.Index32{}
       record.Flags |= ast.ShouldNotBeExternalInMetafile

       additionalFiles = append(additionalFiles, otherFile.InputFile.AdditionalFiles...)
      }
     }
     continue
    }

    otherFile := &c.graph.Files[record.SourceIndex.GetIndex()]
    otherRepr := otherFile.InputFile.Repr.(*graph.JSRepr)

    switch record.Kind {
    case ast.ImportStmt:
    // 处理import *
     if (record.Flags.Has(ast.ContainsImportStar) || record.Flags.Has(ast.ContainsDefaultAlias)) &&
      otherRepr.AST.ExportsKind == js_ast.ExportsNone && !otherRepr.AST.HasLazyExport {
      otherRepr.Meta.Wrap = graph.WrapCJS
      otherRepr.AST.ExportsKind = js_ast.ExportsCommonJS
     }
    // 处理require语句
    case ast.ImportRequire:
     if otherRepr.AST.ExportsKind == js_ast.ExportsESM {
      otherRepr.Meta.Wrap = graph.WrapESM
     } else {
      otherRepr.Meta.Wrap = graph.WrapCJS
      otherRepr.AST.ExportsKind = js_ast.ExportsCommonJS
     }

    case ast.ImportDynamic:
     if !c.options.CodeSplitting {

      if otherRepr.AST.ExportsKind == js_ast.ExportsESM {
       otherRepr.Meta.Wrap = graph.WrapESM
      } else {
       otherRepr.Meta.Wrap = graph.WrapCJS
       otherRepr.AST.ExportsKind = js_ast.ExportsCommonJS
      }
     }
    }
   }

   if repr.AST.ExportsKind == js_ast.ExportsCommonJS && (!file.IsEntryPoint() ||
    c.options.OutputFormat == config.FormatIIFE || c.options.OutputFormat == config.FormatESModule) {
    repr.Meta.Wrap = graph.WrapCJS
   }
  }

  file.InputFile.AdditionalFiles = additionalFiles
 }

 // 处理export * from
 exportStarStack := make([]uint32, 0, 32)
 for _, sourceIndex := range c.graph.ReachableFiles {
  repr, ok := c.graph.Files[sourceIndex].InputFile.Repr.(*graph.JSRepr)
  if !ok {
   continue
  }

  if repr.AST.HasLazyExport {
   c.generateCodeForLazyExport(sourceIndex)
  }

  repr.Meta.ResolvedExportStar = &graph.ExportData{
   Ref:         repr.AST.ExportsRef,
   SourceIndex: sourceIndex,
  }
 }
 c.timer.End("Step 3")

 // 将import和export语句进行匹配
 c.timer.Begin("Step 4")
 for _, sourceIndex := range c.graph.ReachableFiles {
  file := &c.graph.Files[sourceIndex]
  repr, ok := file.InputFile.Repr.(*graph.JSRepr)
  if !ok {
   continue
  }

  if len(repr.AST.NamedImports) > 0 {
   c.matchImportsWithExportsForFile(uint32(sourceIndex))
  }

  // Create the wrapper part for wrapped files. This is needed by a later step.
  c.createWrapperForFile(uint32(sourceIndex))
 }
 c.timer.End("Step 4")
}

标记文件与入口文件的引用关系

因为chunk是根据entryPoint生成的,因此在bundle之前得确定哪些file被对应的entryPoint引用了。

func (c *linkerContext) treeShakingAndCodeSplitting() {

 for i, entryPoint := range c.graph.EntryPoints() {
  c.markFileReachableForCodeSplitting(entryPoint.SourceIndex, uint(i), 0)
 }

}

// treeShakingAndCodeSplitting内通过markFileReachableForCodeSplitting标记文件引用关系
func (c *linkerContext) markFileReachableForCodeSplitting(sourceIndex uint32, entryPointBit uint, distanceFromEntryPoint uint32) {
 file := &c.graph.Files[sourceIndex]
 if !file.IsLive {
  return
 }
 traverseAgain := false

 // distanceFromEntryPoint用于代表当前文件与entryPoint之间的距离,每经过一层import则距离+1
 if distanceFromEntryPoint < file.DistanceFromEntryPoint {
  file.DistanceFromEntryPoint = distanceFromEntryPoint
  traverseAgain = true
 }
 distanceFromEntryPoint++

 // entryBits是一个二进制数,每一位用于记录是否被对应的entryPoint引用,hasBit === true说明被引用
 if file.EntryBits.HasBit(entryPointBit) && !traverseAgain {
  return
 }
 file.EntryBits.SetBit(entryPointBit)

 switch repr := file.InputFile.Repr.(type) {
 case *graph.JSRepr:

  // 根据文件内的import语句继续递归查找
  for _, record := range repr.AST.ImportRecords {
   if record.SourceIndex.IsValid() && !c.isExternalDynamicImport(&record, sourceIndex) {
    c.markFileReachableForCodeSplitting(record.SourceIndex.GetIndex(), entryPointBit, distanceFromEntryPoint)
   }
  }
  for _, part := range repr.AST.Parts {
   for _, dependency := range part.Dependencies {
    if dependency.SourceIndex != sourceIndex {
     c.markFileReachableForCodeSplitting(dependency.SourceIndex, entryPointBit, distanceFromEntryPoint)
    }
   }
  }
 }
}

treeShakingAndCodeSplitting内部与findAllReachableFiles类似,都是通过递归import语句进行查找。

在查找过程中通过distanceFromEntryPoint记录当前文件与entryPoint之间的距离,取distance较小的一条链路作为引用关系。

在构建过程中会给每个file对象创建一个EntryBits二进制数,用于表示该文件被哪些entryPoints引用,如果存在引用关系则hasBit为true。

计算chunk

计算chunk的原理是确定该chunk包含哪些文件的哪些内容,本质上还是确定文件之间的引用关系,esbuild内部通过computeChunks和computeCrossChunkDependencies完成chunk计算过程

func (c *linkerContext) computeChunks() {
 // 用来记录file.entryBits与sourceIndex之间的对应关系
 jsChunks := make(map[string]chunkInfo)

 // 遍历entryPoint,生成对应chunk
 for i, entryPoint := range c.graph.EntryPoints() {
  file := &c.graph.Files[entryPoint.SourceIndex]

  entryBits := helpers.NewBitSet(uint(len(c.graph.EntryPoints())))
  entryBits.SetBit(uint(i))
  key := entryBits.String()
  chunk := chunkInfo{
   entryBits:             entryBits,
   isEntryPoint:          true,
   sourceIndex:           entryPoint.SourceIndex,
   entryPointBit:         uint(i),
   filesWithPartsInChunk: make(map[uint32]bool),
  }

  switch file.InputFile.Repr.(type) {
  case *graph.JSRepr:
   chunkRepr := &chunkReprJS{}
   chunk.chunkRepr = chunkRepr
   //在map内先记录entryPoint
   jsChunks[key] = chunk
  }
 }
 // 对于单一引用关系的文件,如果file.entryBits === entryPoint.entryBits,说明该文件被此entryPoint引用了,在chunk.filesWithPartsInChunk内进行记录即可
 for _, sourceIndex := range c.graph.ReachableFiles {
  if file := &c.graph.Files[sourceIndex]; file.IsLive {
   if _, ok := file.InputFile.Repr.(*graph.JSRepr); ok {
    key := file.EntryBits.String()
    chunk, ok := jsChunks[key]
    if !ok {
     chunk.entryBits = file.EntryBits
     chunk.filesWithPartsInChunk = make(map[uint32]bool)
     chunk.chunkRepr = &chunkReprJS{}
     jsChunks[key] = chunk
    }
    chunk.filesWithPartsInChunk[uint32(sourceIndex)] = true
   }
  }
 }


 sortedChunks := make([]chunkInfo, 0, len(jsChunks)+len(cssChunks))
 sortedKeys := make([]string, 0, len(jsChunks)+len(cssChunks))

 c.chunks = sortedChunks
}

对于单一的引用关系,可以通过对比file和entryPoint的entryBits来确定,但是如果file被多个entryPoint引用,file.entryBits内会包含多个entryPoint的sourceIndex,因此并不能通过简单的相等关系进行判断,esbuild内通过computeCrossChunkDependencies来处理这种case

if chunk.isEntryPoint {
 for otherChunkIndex, otherChunk := range c.chunks {
  if _, ok := otherChunk.chunkRepr.(*chunkReprJS); ok && chunkIndex != otherChunkIndex && otherChunk.entryBits.HasBit(chunk.entryPointBit) {
   imports := chunkRepr.importsFromOtherChunks[uint32(otherChunkIndex)]
   chunkRepr.importsFromOtherChunks[uint32(otherChunkIndex)] = imports
  }
 }
}

生成代码

在计算完chunk信息后就需要根据ast生成代码了,这一过程相对来说没有很复杂。由于生成代码的过程比较耗时且生成chunk的过程可以独立进行,因此esbuild使用并行的方式输出代码。

将AST转换成代码的过程主要在printStmt内,因为要处理的AST节点很多,这里简单看一下处理Function的过程就可以了

case *js_ast.SFunction:
 p.addSourceMapping(stmt.Loc)
 p.printIndent()
 p.printSpaceBeforeIdentifier()
 if s.IsExport {
  p.print("export ")
 }
 if s.Fn.IsAsync {
  p.print("async ")
 }
 p.print("function")
 if s.Fn.IsGenerator {
  p.print("*")
  p.printSpace()
 }
 p.printSpaceBeforeIdentifier()
 name := p.renamer.NameForSymbol(s.Fn.Name.Ref)
 p.addSourceMappingForName(s.Fn.Name.Loc, name, s.Fn.Name.Ref)
 p.printIdentifier(name)
 p.printFn(s.Fn)
 p.printNewline()