vue模版编译源码（一）the-super-tiny-compiler。为了更好的理清楚代码结构，我们先梳理一下，v

前端编译原理

编译主要分为三步，parse => transform => generate,
parse 是将代码或者模版字符串等转化成ast树，
transform 对ast进行处理，
generate 是生成代码

Vue2.6 的编译

看下vue2.6的源码关于编译的核心代码

var createCompiler = createCompilerCreator(function baseCompile (
    template,
    options
  ) {
    var ast = parse(template.trim(), options);
    if (options.optimize !== false) {
      optimize(ast, options);
    }
    var code = generate(ast, options);
    return {
      ast: ast,
      render: code.render,
      staticRenderFns: code.staticRenderFns
    }
  });

其中: parse: 会将template转成代码所需要ast optimize: 其实是对ast的优化，标注了一些静态节点 generate: 会生成render函数，执行render函数能够生成vnode树

文件目录

为了更好的理清楚代码结构，我们先梳理一下，vue是怎么执行到这里的。首先

// scripts/config.js
'web-full-dev': {
    entry: resolve('web/entry-runtime-with-compiler.js'),
    dest: resolve('dist/vue.js'),
    format: 'umd',
    env: 'development',
    alias: { he: './entity-decoder' },
    banner
  },

可以看到入口是web/entry-runtime-with-compiler.js文件。

// src/platforms/wev/entry-runtime-with-compiler.js
import Vue from './runtime/index'
// 先把Vue.prototype.$mount取出放在mount中
const mount = Vue.prototype.$mount
Vue.prototype.$mount = function (
  el?: string | Element,
  hydrating?: boolean
): Component {
  el = el && query(el)
  const options = this.$options
  if (!options.render) {
      // 获取template字符串
    let template = options.template
    if (template) {
      if (typeof template === 'string') {
        if (template.charAt(0) === '#') {
          template = idToTemplate(template)
        }
      } else if (template.nodeType) {
        template = template.innerHTML
      } else {
        return this
      }
    } else if (el) {
      template = getOuterHTML(el)
    }

    // 获取render函数
    if (template) {
      const { render, staticRenderFns } = compileToFunctions(template, {
        outputSourceRange: process.env.NODE_ENV !== 'production',
        shouldDecodeNewlines,
        shouldDecodeNewlinesForHref,
        delimiters: options.delimiters,
        comments: options.comments
      }, this)
      options.render = render
      options.staticRenderFns = staticRenderFns
    }
  }
  return mount.call(this, el, hydrating)
}

上面的mount函数实际最终执行的是 mountComponent

// src/platforms/web/runtime/index.js
Vue.prototype.$mount = function (
  el?: string | Element,
  hydrating?: boolean
): Component {
  el = el && inBrowser ? query(el) : undefined
  return mountComponent(this, el, hydrating)
}

上面获取render的函数compileToFunctions来自

// src/platforms/web/compiler/index.js
import { baseOptions } from './options'
import { createCompiler } from 'compiler/index'
// createCompiler 就是上面最开始提到的核心function
const { compile, compileToFunctions } = createCompiler(baseOptions)

export { compile, compileToFunctions }

Parse

对，大概文件有个了解之后，看下parse怎么将template生成ast树的

var ast = parse(template.trim(), options);

template字符串的获得

通过上面的分析，可以看出template 是这么来的

// 如果生成Vue实例的时候传template了， 
let template = options.template
if (template) {
    // 如果template是#templateId, 那么返回templateId的innerHTML
    if (typeof template === 'string') {
        if (template.charAt(0) === '#') {
            template = idToTemplate(template)
            /* istanbul ignore if */
            if (process.env.NODE_ENV !== 'production' && !template) {
            warn(
                `Template element not found or is empty: ${options.template}`,
                this
            )
            }
        }
    } else if (template.nodeType) {
        // 如果template是dom 则直接返回它的innerHTML
        template = template.innerHTML
    } else {
        if (process.env.NODE_ENV !== 'production') {
            warn('invalid template option:' + template, this)
        }
        return this
    }
} else if (el) {
    // 如果没有传入的是el, 则获取el的outerHTML
    template = getOuterHTML(el)
}

parse

观察parse方法，看到里面定义了一些变量

stack ==> 这个栈会存储所有未关闭标签的节点，在后面我们称之为outterStack
currentParent ==> 这个是指当前未关闭标签的最近父元素
root ==> 最终返回的根节点

/**
   * Convert HTML string to AST.
   */
  function parse (
    template,
    options
  ) {
    var stack = [];
    var root;
    var currentParent;
    function closeElement (element) {
    }

    function trimEndingWhitespace (el) {
    }

    parseHTML(template, {
      warn: warn$2,
      expectHTML: options.expectHTML,
      isUnaryTag: options.isUnaryTag,
      canBeLeftOpenTag: options.canBeLeftOpenTag,
      shouldDecodeNewlines: options.shouldDecodeNewlines,
      shouldDecodeNewlinesForHref: options.shouldDecodeNewlinesForHref,
      shouldKeepComment: options.comments,
      outputSourceRange: options.outputSourceRange,
      start: function start (tag, attrs, unary, start$1, end) {
        // ...
      },
      end: function end (tag, start, end$1) {
        // ...
      },
      chars: function chars (text, start, end) {
        // ...
      },
      comment: function comment (text, start, end) {
        // ...
      }
    });
    return root
  }

根据下面代码，可以看出parse函数主要做了两件事情，

定义了自身的一些存储变量和function，

执行了parseHTML

观察一下parseHTML的入参，里面除了传入的各个options之外，还有四个函数，这四个函数相当于钩子在特定的情况下去执行，从而改变 outterStack 和 currentParent 的值。

parseHTML

parseHTML 可以理解为一个walk函数，它会遍历所有的template string

识别到不同的tag

处理相应tag

advance(n)前进n个字符串

调用相应的钩子函数，改变outer的变量

下面的代码大概看下就行，后面会结合例子逐步看

function parseHTML (html, options) {
    var stack = [];
    var expectHTML = options.expectHTML;
    var isUnaryTag$$1 = options.isUnaryTag || no;
    var canBeLeftOpenTag$$1 = options.canBeLeftOpenTag || no;
    var index = 0;
    var last, lastTag;
    while (html) {
      last = html;
      if (!lastTag || !isPlainTextElement(lastTag)) {
        var textEnd = html.indexOf('<');
        if (textEnd === 0) {
          // ... 其他标签

          // End tag:
          var endTagMatch = html.match(endTag);
          if (endTagMatch) {
            var curIndex = index;
            advance(endTagMatch[0].length);
            parseEndTag(endTagMatch[1], curIndex, index);
            continue
          }

          // Start tag:
          var startTagMatch = parseStartTag();
          if (startTagMatch) {
            handleStartTag(startTagMatch);
            continue
          }
        }

        var text = (void 0), rest = (void 0), next = (void 0);
        if (textEnd >= 0) {
          rest = html.slice(textEnd);
          while (
            !endTag.test(rest) &&
            !startTagOpen.test(rest) &&
            !comment.test(rest) &&
            !conditionalComment.test(rest)
          ) {
            next = rest.indexOf('<', 1);
            if (next < 0) { break }
            textEnd += next;
            rest = html.slice(textEnd);
          }
          text = html.substring(0, textEnd);
        }

        if (textEnd < 0) {
          text = html;
        }

        if (text) {
          advance(text.length);
        }

        if (options.chars && text) {
          options.chars(text, index - text.length, index);
        }
      } else {
        var endTagLength = 0;
        var stackedTag = lastTag.toLowerCase();
        var reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'));
        var rest$1 = html.replace(reStackedTag, function (all, text, endTag) {
          endTagLength = endTag.length;
          if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
            text = text
              .replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
              .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1');
          }
          if (shouldIgnoreFirstNewline(stackedTag, text)) {
            text = text.slice(1);
          }
          if (options.chars) {
            options.chars(text);
          }
          return ''
        });
        index += html.length - rest$1.length;
        html = rest$1;
        parseEndTag(stackedTag, index - endTagLength, index);
      }

      if (html === last) {
        options.chars && options.chars(html);
        break
      }
    }

    parseEndTag();
  }

观察parseHTML:

首先会看到function中也定义了一些变量 stack: 存放未关闭的 element, 因为和上面的stack同一个名字，下面我们把这个stack称为innerStack来作区分
index: 当前string轮训到的index指针
循环html

实例分析

主要看一下针对多层的dom是怎么实现ast的生成的,可以结合最后面的流程图和源代码debug看。

假设template是

    <div id="demo"><div id="child">childValue</div></div>
    new Vue({
        el: '#demo',
    })

因为第一个string是<，所以标中start tag

    // Start tag:
    var startTagMatch = parseStartTag();
    if (startTagMatch) {
        handleStartTag(startTagMatch);
        continue
    }

这里执行了，两个方法，其中parseStartTag：

var startTagOpen = new RegExp(("^<" + qnameCapture));
function parseStartTag () {
    var start = html.match(startTagOpen);
    // start = ["<div","div"]
    if (start) {
        var match = {
            tagName: start[1],
            attrs: [],
            start: index
        };
        advance(start[0].length);
        var end, attr;
        // 实际代码是这么写的，我这边做了一下分解，方便看
        // while (!(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute))) {

        // 验证是不是后面紧跟着 > 或者 />
        end = html.match(startTagClose);
        // 验证有没有属性
        attr = html.match(dynamicArgAttribute);
        var isAttrMatch = (attr || html.match(attribute));
        // 如果后面不是 > 或者 /> ， 并且有属性，
        // 循环获取属性并push到match的attrs中
        while (!end && isAttrMatch) {
            attr.start = index;
            advance(attr[0].length);
            attr.end = index;
            match.attrs.push(attr);
        }
        // match ====> 
        // "tagName": "div",
        // "attrs": [
        //     [
        //     " id=\"demo\"",
        //     "id",
        //     "=",
        //     "demo",
        //     null,
        //     null
        //     ]
        // ],
        // "start": 0
        // }

        // 如果是 > 或者 />
        if (end) {
            match.unarySlash = end[1];
            advance(end[0].length);
            match.end = index;
            return match
        }
        // match ====>
        // {
        //     "tagName": "div",
        //     "attrs": [
        //         [
        //         " id=\"demo\"",
        //         "id",
        //         "=",
        //         "demo",
        //         null,
        //         null
        //         ]
        //     ],
        //     "start": 0,
        //     "unarySlash": "",
        //     "end": 15
        // }
    }
}

通过代码，可以看出 parseStartTag 其实就是

将<div id="demo">转换成object, 这个object包含了

{
    tagName,
    attrs // 属性数组还需要解析
    start // 开始的index
    end // 结束的index
}

并且parseStartTag将index指针指向了结尾

得到startTagMatch，看下接下来做了什么操作

if (startTagMatch) {
    handleStartTag(startTagMatch);
    continue
}

handleStartTag：

处理attrs

当前节点push到innerStack中

执行钩子函数start

function handleStartTag (match) {
    var tagName = match.tagName;
    var unarySlash = match.unarySlash;

    var l = match.attrs.length;
    var attrs = new Array(l);
    for (var i = 0; i < l; i++) {
    //  处理attrs
    }
    // attrs ===>
    // [
    //     {
    //         "name": "id",
    //         "value": "demo",
    //         "start": 5,
    //         "end": 14
    //     }
    // ]

    if (!unary) {
    stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end });
    // push到innerStack栈中 ==> 
    // [
        // {
        //     "tag": "div",
        //     "lowerCasedTag": "div",
        //     "attrs": [
        //     {
        //         "name": "id",
        //         "value": "demo",
        //         "start": 5,
        //         "end": 14
        //     }
        //     ],
        //     "start": 0,
        //     "end": 15
        // }
    // ]
    lastTag = tagName;
    }
    
    // hook: 这里就去执行了刚才说的start钩子
    if (options.start) {
     options.start(tagName, attrs, unary, match.start, match.end);
    }
}

然后看下start函数具体做了什么事情

通过createASTElement生成element

对element进行处理(指令的解析等)

当前elment赋值给cuurentParent

elment push到outerStack中

function start (tag, attrs, unary, start$1, end) {

    var element = createASTElement(tag, attrs, currentParent);
    // createASTElement 会将根据
    // 1. 将attrsList生成一个map赋值给attrsMap
    // 2. 根据传入的currentParent将currentParent挂在当前元素的parent上
    // 3. 打上type 1 表示标签节点
    // 4. 生成最终需要的一个子节点的格式

    // element  ==> 
    // {
    //     "type": 1,
    //     "tag": "div",
    //     "attrsList": [
    //         {
    //         "name": "id",
    //         "value": "demo",
    //         "start": 5,
    //         "end": 14
    //         }
    //     ],
    //     "attrsMap": {
    //         "id": "demo"
    //     },
    //     "rawAttrsMap": {},
    //     "children": []
    // }

    {
        if (options.outputSourceRange) {
        element.start = start$1;
        element.end = end;
        element.rawAttrsMap = element.attrsList.reduce(function (cumulated, attr) {
            cumulated[attr.name] = attr;
            return cumulated
        }, {});
        }
    }
    // 这里追加start, end, rawAttrsMap
    // {
    //     "id": {
    //         "name": "id",
    //         "value": "demo",
    //         "start": 5,
    //         "end": 14
    //     }
    // }

    // apply pre-transforms
    // preTransforms 是做一些vue指令的解析和绑定
    for (var i = 0; i < preTransforms.length; i++) {
        element = preTransforms[i](element, options) || element;
    }

    // structural directives
    // TODO: 针对for if once 生成执行方法
    processFor(element);
    processIf(element);
    processOnce(element);

    // 如果没有root表示现在是第一个节点，将elment赋值给root
    if (!root) {
        root = element;
    }
    // 将当前elment赋值给currentParent;
    currentParent = element;
    // 将当前elment推入栈
    stack.push(element);
}

那到目前为止，<div id="demo">才算解析完成我们忽略对空白字符的处理，接下来将解析<div id="child">,流程和上面完全一致，这个时候我们来看下处理完成后innerStack

[
  {
    "tag": "div",
    "lowerCasedTag": "div",
    "attrs": [
      {
        "name": "id",
        "value": "demo",
        "start": 5,
        "end": 14
      }
    ],
    "start": 0,
    "end": 15
  },
  {
    "tag": "div",
    "lowerCasedTag": "div",
    "attrs": [
      {
        "name": "id",
        "value": "child",
        "start": 20,
        "end": 30
      }
    ],
    "start": 15,
    "end": 31
  }
]

再看下outerStack

[
  {
    "type": 1,
    "tag": "div",
    "attrsList": [...],
    "attrsMap": {
      "id": "demo"
    },
    "rawAttrsMap": {...},
    "children": [],
    "start": 0,
    "end": 15
  },
  {
    "type": 1,
    "tag": "div",
    "attrsList": [...],
    "attrsMap": {
      "id": "child"
    },
    "rawAttrsMap": {...},
    "parent": {
      "type": 1,
      "tag": "div",
      "attrsList": [...],
      "attrsMap": {
        "id": "demo"
      },
      "rawAttrsMap": {...},
      "children": [],
      "start": 0,
      "end": 15
    },
    "children": [],
    "start": 15,
    "end": 31
  }
]

currentParent 指向child这个elment。

接着解析childValue parseHTML中

index指针指向childValue结尾

执行chars钩子函数

    var textEnd = html.indexOf('<');
    if (textEnd >= 0) {
        // ...
        text = html.substring(0, textEnd);
    }
    // text == 'childValue'
    if (text) {
        advance(text.length);
    }
    // 执行钩子函数chars
    if (options.chars && text) {
        options.chars(text, index - text.length, index);
    }

chars的实现：

生成text的node

将textNode push到当前currentParent.children中

    function chars (text, start, end) {
        if (!currentParent) {
          return
        }
        var children = currentParent.children;
        // ... 
        if (text) {
          var res;
          var child;
          if (!inVPre && text !== ' ' && (res = parseText(text, delimiters))) {
            child = {
              type: 2,
              expression: res.expression,
              tokens: res.tokens,
              text: text
            };
          } else if (text !== ' ' || !children.length || children[children.length - 1].text !== ' ') {
              // type 表示是文本节点
            child = {
              type: 3,
              text: text
            };
          }
          if (child) {
            child.start = start;
            child.end = end;
            children.push(child);
          }
        }
      }

执行完成成后 currentParent.children.push(textNode)，这个时候的currentParent是：

{
  "type": 1,
  "tag": "div",
  "attrsList": [...],
  "attrsMap": {
    "id": "child"
  },
  "rawAttrsMap": {...},
  "parent": {
    "type": 1,
    "tag": "div",
    "attrsList": [...],
    "attrsMap": {
      "id": "demo"
    },
    "rawAttrsMap": {...},
    "children": [],
    "start": 0,
    "end": 15
  },
  // 文本被push在children中
  "children": [
    {
      "type": 3,
      "text": "childValue",
      "start": 31,
      "end": 41
    }
  ],
  "start": 15,
  "end": 31
}

到这里后面都是关闭标签的了，我们继续看</div>是怎么处理的 parseHTML中:

index指针指向</div>结尾

执行parseEndTag

// End tag:
    var endTagMatch = html.match(endTag);
    // endTagMatch ==>
    // [
    //     "</div>",
    //     "div"
    // ]
    if (endTagMatch) {
        var curIndex = index;
        // index指针移到结尾
        advance(endTagMatch[0].length);
        parseEndTag(endTagMatch[1], curIndex, index);
        continue
    }

parseEndTag

找出innerStack中和当前标签名相同的距离栈顶最近的元素

把这个元素到栈顶所有的元素依次执行end钩子函数

将这些元素从innerStack中推出

function parseEndTag (tagName, start, end) {
      var pos, lowerCasedTagName;
      // 找到innerStack中距离栈顶最近同时 lowerCasedTagName 相等的 结点对象的位置pos
      if (tagName) {
        lowerCasedTagName = tagName.toLowerCase();
        for (pos = stack.length - 1; pos >= 0; pos--) {
          if (stack[pos].lowerCasedTag === lowerCasedTagName) {
            break
          }
        }
      } else {
        pos = 0;
      }

      if (pos >= 0) {
        for (var i = stack.length - 1; i >= pos; i--) {
            // 然后执行end钩子函数
          if (options.end) {
            options.end(stack[i].tag, start, end);
          }
        }

        // 将关闭后的标签推出innerStack, 处理完成
        stack.length = pos;
        lastTag = pos && stack[pos - 1].tag;
      }
    }

end 钩子函数：

将outterStack栈顶element推出

currentParent指向element的父节点

对推出的element.end进行重写

执行closeElement(element)

function end (tag, start, end$1) {
    // 找出outerStack中的栈顶elment
    var element = stack[stack.length - 1];
    // 将这个elment推出栈
    stack.length -= 1;
    // currentParent 指向新的栈顶
    currentParent = stack[stack.length - 1];
    if (options.outputSourceRange) {
        // 重置end, 之前elment中的end是<div id="child">最后的位置，现在的end是<div id="child">XXXX</div>最后的位置
        element.end = end$1;
    }
    closeElement(element);
},

closeElement函数

处理vue的指令等，挂载在element上（忽略）

如果有currentParent，currentParent.children.push(element)

function closeElement (element) {
      if (!inVPre && !element.processed) {
        element = processElement(element, options);
      }
      if (currentParent && !element.forbidden) {
        if (element.elseif || element.else) {
            // 处理elseif
            processIfConditions(element, currentParent);
        } else {
            // 处理slot
            if (element.slotScope) {
                var name = element.slotTarget || '"default"'
                ;(currentParent.scopedSlots || (currentParent.scopedSlots = {}))[name] = element;
            }

            // 将当前element push 到currentparent的children中
            currentParent.children.push(element);
            // 将currentParent 赋值给element.parent;
            element.parent = currentParent;
        }
      }
    }

同样最后一个</div>也是执行了上述关闭的过程。最终root也就是第一个element是

{
    attrs: [{…}]
    attrsList: [{…}]
    attrsMap: {id: "demo"}
    children: [{
        attrs: [{…}]
        attrsList: [{…}]
        attrsMap: {id: "child"}
        children: [{
            end: 41
            start: 31
            text: "childValue"
            type: 3
        }]
        end: 47
        parent: {type: 1, tag: "div", attrsList: Array(1), attrsMap: {…}, rawAttrsMap: {…}, …}
        plain: false
        rawAttrsMap: {id: {…}}
        start: 15
        tag: "div"
        type: 1
    }]
    end: 53
    parent: undefined
    plain: false
    rawAttrsMap: {id: {…}}
    start: 0
    tag: "div"
    type: 1
}

vue parse基本流程

vue parse全流程

变量绑定

上面我们讨论了父子节点怎么生成ast的，我们知道模版标签中所有的信息都会最终转变成ast中的某个key, 那么vue中的变量最终的ast会是什么样子的呢比如有这么个dom

    <div id="demo" :class="calssName"><div v-if="c">{{a + b}}</div><div v-else>{{a-b}}</div><div v-for="item in branches">{{item}}</div></div>

最终生成的ast是下面这个样子的，可以主要关注下我指出来的部分, 其中_s = toString();

{
    attrs: [{…}]
    attrsList: [{…}]
    attrsMap: {id: "demo", :class: "calssName"}
    children: (3) [
        {
            attrsList: []
            attrsMap: {v-if: "c"}
            children: [{
                end: 57
                // 👇
                expression: "_s(a + b)"
                start: 48
                text: "{{a + b}}"
                // 👇
                tokens: [{@binding: "a + b"}]
                type: 2
            }]
            end: 63
            // 👇
            if: "c"
            // 👇
            ifConditions: [{
                // <div v-if="c">{{a + b}}</div>
                block: {type: 1, tag: "div", attrsList: Array(0), attrsMap: {…}, rawAttrsMap: {…}, …}
                exp: "c"
            }, {
                // <div v-else>{{a-b}}</div>
                block: {type: 1, tag: "div", attrsList: Array(0), attrsMap: {…}, rawAttrsMap: {…}, …}
                exp: undefined
            }]
            parent: {...}
            plain: true
            rawAttrsMap: {v-if: {…}}
            start: 34
            tag: "div"
            type: 1
        },{
            alias: "item"
            attrsList: []
            attrsMap: {v-for: "item in branches"}
            children: [{
                end: 101
                // 👇
                expression: "_s(item)"
                start: 93
                text: "{{item}}"
                // 👇
                tokens: [{@binding: "item"}]
                type: 2
            }]
            end: 107
            // 👇
            for: "branches"
            parent: {...}
            plain: true
            rawAttrsMap: {v-for: {…}}
            start: 63
            tag: "div"
            type: 1
        }
    ]
    // 👇
    classBinding: "calssName"
    end: 158
    parent: undefined
    plain: false
    rawAttrsMap: {
        :class: {name: ":class", value: "calssName", start: 15, end: 33}
        id: {name: "id", value: "demo", start: 5, end: 14}
    }
    start: 0
    tag: "div"
    type: 1
}