WTSC 8: SIL Generation

217 阅读2分钟

In previous post, WTSC 7: Semantic Analysis, we have get the Type-annotated AST for our helloworld example show as following:

(source_file "Helloworld.swift"
  (top_level_code_decl range=[Helloworld.swift:1:1 - line:1:21]
    (brace_stmt implicit range=[Helloworld.swift:1:1 - line:1:21]
      (call_expr type='()' location=Helloworld.swift:1:1 range=[Helloworld.swift:1:1 - line:1:21] nothrow arg_labels=_:
        (declref_expr type='(Any..., String, String) -> ()' location=Helloworld.swift:1:1 range=[Helloworld.swift:1:1 - line:1:1] decl=Swift.(file).print(_:separator:terminator:) function_ref=single)
        (tuple_expr type='(Any..., separator: String, terminator: String)' location=Helloworld.swift:1:6 range=[Helloworld.swift:1:6 - line:1:21] names='',separator,terminator
          (vararg_expansion_expr implicit type='[Any]' location=Helloworld.swift:1:7 range=[Helloworld.swift:1:7 - line:1:7]
            (array_expr implicit type='[Any]' location=Helloworld.swift:1:7 range=[Helloworld.swift:1:7 - line:1:7] initializer=**NULL**
              (erasure_expr implicit type='Any' location=Helloworld.swift:1:7 range=[Helloworld.swift:1:7 - line:1:7]
                (string_literal_expr type='String' location=Helloworld.swift:1:7 range=[Helloworld.swift:1:7 - line:1:7] encoding=utf8 value="Hello world!" builtin_initializer=Swift.(file).String extension.init(_builtinStringLiteral:utf8CodeUnitCount:isASCII:) initializer=**NULL**))))
          (default_argument_expr implicit type='String' location=Helloworld.swift:1:6 range=[Helloworld.swift:1:6 - line:1:6] default_args_owner=Swift.(file).print(_:separator:terminator:) param=1)
          (default_argument_expr implicit type='String' location=Helloworld.swift:1:6 range=[Helloworld.swift:1:6 - line:1:6] default_args_owner=Swift.(file).print(_:separator:terminator:) param=2))))))

And in this post, we gonna get a raw SIL, Swift Intermediate Language, from above AST as following:

swiftc -o hello -emit-silgen hello.swift
cat hellosil_stage raw
import Builtin
import Swift
import SwiftShims
sil [ossa] @main : $@convention(c) (Int32, UnsafeMutablePointer<Optional<UnsafeMutablePointer<Int8>>>) -> Int32 {
bb0(%0 : $Int32, %1 : $UnsafeMutablePointer<Optional<UnsafeMutablePointer<Int8>>>):
  %2 = integer_literal $Builtin.Word, 1           // user: %4
  // function_ref _allocateUninitializedArray<A>(_:)
  %3 = function_ref @$ss27_allocateUninitializedArrayySayxG_BptBwlF : $@convention(thin) <τ_0_0> (Builtin.Word) -> (@owned Array<τ_0_0>, Builtin.RawPointer) // user: %4
  %4 = apply %3<Any>(%2) : $@convention(thin) <τ_0_0> (Builtin.Word) -> (@owned Array<τ_0_0>, Builtin.RawPointer) // user: %5
  (%5, %6) = destructure_tuple %4 : $(Array<Any>, Builtin.RawPointer) // users: %24, %21, %7
  %7 = pointer_to_address %6 : $Builtin.RawPointer to [strict] $*Any // user: %14
  %8 = string_literal utf8 "hello world!"         // user: %13
  %9 = integer_literal $Builtin.Word, 12          // user: %13
  %10 = integer_literal $Builtin.Int1, -1         // user: %13
  %11 = metatype $@thin String.Type               // user: %13
  // function_ref String.init(_builtinStringLiteral:utf8CodeUnitCount:isASCII:)
  %12 = function_ref @$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String // user: %13
  %13 = apply %12(%8, %9, %10, %11) : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String // user: %15
  %14 = init_existential_addr %7 : $*Any, $String // user: %15
  store %13 to [init] %14 : $*String              // id: %15
  // function_ref default argument 1 of print(_:separator:terminator:)
  %16 = function_ref @$ss5print_9separator10terminatoryypd_S2StFfA0_ : $@convention(thin) () -> @owned String // user: %17
  %17 = apply %16() : $@convention(thin) () -> @owned String // users: %23, %21
  // function_ref default argument 2 of print(_:separator:terminator:)
  %18 = function_ref @$ss5print_9separator10terminatoryypd_S2StFfA1_ : $@convention(thin) () -> @owned String // user: %19
  %19 = apply %18() : $@convention(thin) () -> @owned String // users: %22, %21
  // function_ref print(_:separator:terminator:)
  %20 = function_ref @$ss5print_9separator10terminatoryypd_S2StF : $@convention(thin) (@guaranteed Array<Any>, @guaranteed String, @guaranteed String) -> () // user: %21
  %21 = apply %20(%5, %17, %19) : $@convention(thin) (@guaranteed Array<Any>, @guaranteed String, @guaranteed String) -> ()
  destroy_value %19 : $String                     // id: %22
  destroy_value %17 : $String                     // id: %23
  destroy_value %5 : $Array<Any>                  // id: %24
  %25 = integer_literal $Builtin.Int32, 0         // user: %26
  %26 = struct $Int32 (%25 : $Builtin.Int32)      // user: %27
  return %26 : $Int32                             // id: %27
} // end sil function 'main'

// _allocateUninitializedArray<A>(_:)
sil [serialized] [always_inline] [_semantics "array.uninitialized_intrinsic"] @$ss27_allocateUninitializedArrayySayxG_BptBwlF : $@convention(thin) <τ_0_0> (Builtin.Word) -> (@owned Array<τ_0_0>, Builtin.RawPointer)

// String.init(_builtinStringLiteral:utf8CodeUnitCount:isASCII:)
sil [serialized] [always_inline] [readonly] [_semantics "string.makeUTF8"] @$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String

// default argument 1 of print(_:separator:terminator:)
sil hidden_external [serialized] @$ss5print_9separator10terminatoryypd_S2StFfA0_ : $@convention(thin) () -> @owned String

// default argument 2 of print(_:separator:terminator:)
sil hidden_external [serialized] @$ss5print_9separator10terminatoryypd_S2StFfA1_ : $@convention(thin) () -> @owned String

// print(_:separator:terminator:)
sil @$ss5print_9separator10terminatoryypd_S2StF : $@convention(thin) (@guaranteed Array<Any>, @guaranteed String, @guaranteed String) -> ()

Let us find out how this transformation perfromed.

We start from following code snippet, that will run performCompileStepsPostSema() right after finishing SemanticAnalysis.

/* /wtsc/swift/lib/FrontendTool/FrontendTool.cpp */
static bool performAction(CompilerInstance &Instance,
                          int &ReturnValue,
                          FrontendObserver *observer) {
  ...
  case FrontendOptions::ActionType::EmitObject:
  ...
    return withSemanticAnalysis(
        Instance, observer, [&](CompilerInstance &Instance) {
          assert(FrontendOptions::doesActionGenerateSIL(opts.RequestedAction) &&
                 "All actions not requiring SILGen must have been handled!");
          return performCompileStepsPostSema(Instance, ReturnValue, observer);
        });
  ...
}

Then we use our lovely lldb to step through what would be performed to generate SIL. I will point out some important break points to show how this above raw sil is generated.

Firstly, set a breakpoint at just right after an empty SIL Module is created.

(lldb) b SILGen.cpp : 2037
auto silMod = SILModule::createEmptyModule(desc.context, desc.conv, desc.opts);

Then in lldb, we can show what the sil module looks like at that point.

(lldb) expr silMod->dump(true);
sil_stage raw
import Builtin
import Swift
import SwiftShims

Second, set a breakpoint at creation of SourceFileScope that emits the main function in SIL Module.

(lldb) b SILGen.cpp : 1979
SourceFileScope scope(SGM, sf);
(lldb) expr SGM.M.dump(true)
sil_stage raw
import Builtin
import Swift
import SwiftShims
// main
sil [ossa] @main : $@convention(c) (Int32, UnsafeMutablePointer<Optional<UnsafeMutablePointer<Int8>>>) -> Int32 {
bb0(%0 : $Int32, %1 : $UnsafeMutablePointer<Optional<UnsafeMutablePointer<Int8>>>):
bb1:
bb2(%2 : @owned $Error):
} // end sil function 'main'

The actual action to create those new stuff is inside the constructor of SourceFileScope.

      RegularLocation TopLevelLoc = RegularLocation::getModuleLocation();
      SILFunction *toplevel = sgm.emitTopLevelFunction(TopLevelLoc);

      // Assign a debug scope pointing into the void to the top level function.
      toplevel->setDebugScope(new (sgm.M) SILDebugScope(TopLevelLoc, toplevel));

      sgm.TopLevelSGF = new SILGenFunction(sgm, *toplevel, sf);
      sgm.TopLevelSGF->MagicFunctionName = sgm.SwiftModule->getName();
      auto moduleCleanupLoc = CleanupLocation::getModuleCleanupLocation();
      sgm.TopLevelSGF->prepareEpilog(false, true, moduleCleanupLoc);

      // Create the argc and argv arguments.
      auto prologueLoc = RegularLocation::getModuleLocation();
      prologueLoc.markAsPrologue();
      auto entry = sgm.TopLevelSGF->B.getInsertionBB();
      auto context = sgm.TopLevelSGF->getTypeExpansionContext();
      auto paramTypeIter = sgm.TopLevelSGF->F.getConventions()
                               .getParameterSILTypes(context)
                               .begin();
      entry->createFunctionArgument(*paramTypeIter);
      entry->createFunctionArgument(*std::next(paramTypeIter));

      scope.emplace(sgm.TopLevelSGF->Cleanups, moduleCleanupLoc);

We can use the same trick to go over the function calls to see how the SIL is generated.

One more thing, the SILModule::dump() method is worthwhile to look over. Because it iterates over a sil module to print all the content inside it. Therefore we can get some inside about the structre of module. Simply saying, a module contains some global variables, functions and some more information about this module. And a function contains basic blocks, in turn, basic block containts instructions.

This post shallowly shows the sil generation process. I will give a deeper look inside the whole process after the end of this series posts.

Stay tune.