secondparsesecondparse 基础解析函数只能解析 <div></div> 这种简单合法的 html 字

secondparse

基础解析函数只能解析 <div></div> 这种简单合法的 html 字符串，但确是一切的基础，下面来加入对属性的解析。在这之前需要一个关键正则：

// 标签属性
const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/;

// 解析动态属性
const dynamicArgAttribute = /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/

这个正则用于匹配标签上的属性，有了它，就可以获取到标签上的任何属性，首先是上篇的基础解析代码：

function parse(input) {
    let root = null // 用来保存解析到的 ast 节点
    let tagName = '' // 当前正在解析的标签名称
    // 不管怎么样，都要遍历字符串
    while(input) {
        let textEnd = input.indexOf('<')
        if(textEnd === 0){
            // < 打头的，可能是开始标签，也可能是结束标签，也可能只是个 <
            // 首先尝试匹配开始标签
            const match = input.match(startTag)
            if(match){
                // 说明是开始标签
                input = input.slice(match[0].length)
                // 检查标签是否正常闭合
                const closeStart = input.match(startTagClose)
                if(closeStart){
                    input = input.slice(closeStart[0].length)
                    // 表示标签正常闭合
                    root = {
                        tagName: match[1]
                    }
                    if(closeStart[1] === '/'){
                        // 表示是自闭合标签
                        input = input.slice(closeStart[0].length)
                        continue;
                    }
                    tagName = root.tagName
                }
            }
            const matchEnd = input.match(endTag)
            if(matchEnd){
                // 说明匹配到了结束标签
                if(matchEnd[1] !== tagName){
                    // 结束和开始标签不配对，说明不是合法标签，不进行保存
                    root = null
                    break
                }
                input = input.slice(matchEnd[0].length)
            }
        }
    }
    return root
}

console.log('parse', parse('<div></div>'));

由于属性是在开始标签上的，所以这里只需要修改匹配开始标签部分即可，在其中加入匹配解析属性的代码即可：

需要注意的是，按照 vue 的规则，v-bind[xxx] = "ee" 属于动态属性

老规矩，这里给出一段待匹配的模板 <div id="app" :b="c" v-html="d"></div>

function parse(input) {
    let root = null // 用来保存解析到的 ast 节点
    let tagName = '' // 当前正在解析的标签名称
    // 不管怎么样，都要遍历字符串
    while(input) {
        let textEnd = input.indexOf('<')
        if(textEnd === 0){
            // < 打头的，可能是开始标签，也可能是结束标签，也可能只是个 <

            // 首先尝试匹配开始标签
            const match = input.match(startTag)
            if(match){
                // 说明是开始标签
                input = input.slice(match[0].length)
                // 检查标签是否正常闭合
                let closeStart = null
                let attr = null
                let matchNode = {
                    tagName: match[1],
                    attrList: []
                }
                while(
                    !(closeStart = input.match(startTagClose)) && 
                    (attr = input.match(dynamicArgAttribute) || input.match(attribute)
                ){
                    // 收集属性
                    matchNode.attrList.push({
                        name: attr[1],
                        value: attr[3] || attr[4] || attr[5]
                    })
                    input = input.slice(attr[0].length)
                }
                if(closeStart){
                    input = input.slice(closeStart[0].length)
                    // 表示标签正常闭合
                    root = {
                        ...matchNode
                    }
                    if(closeStart[1] === '/'){
                        // 表示是自闭合标签
                        break;
                    }
                    tagName = root.tagName
                }
            }
            const matchEnd = input.match(endTag)
            if(matchEnd){
                // 说明匹配到了结束标签
                if(matchEnd[1] !== tagName){
                    // 结束和开始标签不配对，说明不是合法标签，不进行保存
                    root = null
                    break
                }
                input = input.slice(matchEnd[0].length)
            }
        }
    }
    return root
}

console.log('parse', parse('<div id="app" :b="c" v-html="d" :[xxx] = "e"></div>'));

这样子，我们的到的目标 ast 会是下面这样子：

root = {
    tagName: 'div',
    attrList: [ 
        { name: 'id', value: 'app' }, 
        { name: ':b', value: 'c' },  
        { name: 'v-html', value: 'd' }, 
        { name: ':[xxx]', value: 'e' } 
    ] }
}

得到上面的结构，这个标签必须是合法的标签。

这里解析属性的主体代码如下：

while(
    !(closeStart = input.match(startTagClose)) && // 非开始标签的结束
    (attr = input.match(dynamicArgAttribute) || input.match(attribute)  // 可以匹配到属性
){
    // 收集属性 
    matchNode.attrList.push({
        name: attr[1],
        value: attr[3] || attr[4] || attr[5]
    })
    input = input.slice(attr[0].length)
}

同时修改了开始标签的生成方式

let closeStart = null // 记录开始标签结束的 match 匹配结果
let attr = null  // 记录当前匹配到的属性 match
let matchNode = {  // 因为不知道开始标签是否合法，这里先临时创建一个
    tagName: match[1],
    attrList: []
}
// 一次匹配属性
while(
    !(closeStart = input.match(startTagClose)) && 
    (attr = input.match(dynamicArgAttribute) || input.match(attribute))
){
    // 收集属性
    matchNode.attrList.push({
        name: attr[1],
        value: attr[3] || attr[4] || attr[5]
    })
    input = input.slice(attr[0].length)
}
// 检测标签是否正常关闭
if(closeStart){
    input = input.slice(closeStart[0].length)
    // 表示标签正常闭合
    root = {
        ...matchNode
    }
    if(closeStart[1] === '/'){
        // 表示是自闭合标签
        break;
    }
    tagName = root.tagName
}

总结

至此，属性的解析已经加入其中，因为有了前文的基础，所以属性解析也是手到擒来，下一篇将加入子元素的解析。需要涉及到一些栈的知识。这一节的流程图在这里