virtual-dom 梳理分析【diff 算法】上一篇介绍了VD的是怎么创建VD Tree的和怎么根据VD Tree生

上一篇介绍了VD的是怎么创建VD Tree的和怎么根据VD Tree生成真实的DOM。上一章链接。

这一章主要是来梳理当我们的VD有变化的时候，它的diff算法是怎么去比较生成一个diff对象的。

Diff 算法是 VD 中最核心的一个算法。通过输入初始状态状态A（VNode）和最终状态B（VNode），通过计算，就可以到得到描述从A到B状态的对象（VPatch），然后再根据这个描述对象，我们就能知道哪些节点是需要新增的，哪些节点是需要删除的，哪些节点只是属性变化了需要更新的等等这些。

根据github.com/Matt-Esch/v…的源码来看，Diff 算法主要有三种情况，分别是：

VNode diff，当前 VD 节点的比较。
props diff，当前节点的属性比较。
child diff，对当前节点的子节点进行比较，其实就是递归调用 1和2 步骤。

当前节点的比较

以下文章，将前一个状态称为A，变更后的状态称为B。

function diff(a, b) {
    var patch = { a: a }
    walk(a, b, patch, 0)
    return patch
}

整个diff的算法的入口就是上面列的函数，首席声明了一个patch对象，默认将前一个VD Tree存起来，整个 patch对象最终会被传入到walk函数，进行加工最终得到VPatch对象（描述各个节点的变化）。

function walk(a, b, patch, index) {
    if (a === b) {
        return
    }
    
    // 因为判断子元素的时候，会递归调用这个函数，
    // 会尝试的去获取这个下标是否之前计算过。
    var apply = patch[index]
    var applyClear = false

    if (isThunk(a) || isThunk(b)) {
        thunks(a, b, patch, index)
    } else if (b == null) {
        if (!isWidget(a)) {
            clearState(a, patch, index)
            apply = patch[index]
        }
        apply = appendPatch(apply, new VPatch(VPatch.REMOVE, a, b))
    } else if (isVNode(b)) {
        if (isVNode(a)) {
            if (a.tagName === b.tagName &&
                a.namespace === b.namespace &&
                a.key === b.key) {
                var propsPatch = diffProps(a.properties, b.properties)
                if (propsPatch) {
                    apply = appendPatch(apply,
                        new VPatch(VPatch.PROPS, a, propsPatch))
                }
                apply = diffChildren(a, b, patch, apply, index)
            } else {
                apply = appendPatch(apply, new VPatch(VPatch.VNODE, a, b))
                applyClear = true
            }
        } else {
            apply = appendPatch(apply, new VPatch(VPatch.VNODE, a, b))
            applyClear = true
        }
    } else if (isVText(b)) {
        if (!isVText(a)) {
            apply = appendPatch(apply, new VPatch(VPatch.VTEXT, a, b))
            applyClear = true
        } else if (a.text !== b.text) {
            apply = appendPatch(apply, new VPatch(VPatch.VTEXT, a, b))
        }
    } else if (isWidget(b)) {
        if (!isWidget(a)) {
            applyClear = true
        }
        apply = appendPatch(apply, new VPatch(VPatch.WIDGET, a, b))
    }

    if (apply) {
        patch[index] = apply
    }

    if (applyClear) {
        clearState(a, patch, index)
    }
}

代码还算比较长，但是逻辑还是比较清楚，下面来对每个分之进行分析。

步骤分析

先比较A和B如果是全等，那就是节点一点都没有变更，直接结束。

if (a === b) {
    return
}

如果A或者B被判断为Thunk则使用Thunk的比较方式。这里最终还是会调用diff函数，回到节点的比较，中间会多几层判断。

if (isThunk(a) || isThunk(b)) {
   thunks(a, b, patch, index)
}

如果B为空，就会生成一个标为REMOVE的VPatch对象。

else if (b == null) {
    // If a is a widget we will add a remove patch for it
    // Otherwise any child widgets/hooks must be destroyed.
    // This prevents adding two remove patches for a widget.
    if (!isWidget(a)) {
        clearState(a, patch, index)
        apply = patch[index]
    }
    apply = appendPatch(apply, new VPatch(VPatch.REMOVE, a, b))
}

如果B是一个VD对象，接下来就开始进行比较：
1. 如果 A 也是一个VD对象，通过比较tagName、namespace和key。
2. 如果这三个都相同，则进一步去比较Props，得到props的VPatch对象（这个放到Props diff分析），比较完props之后，继续比较child子节点（放到后面讲）
3. 三个值其中有一个不同，将当前节点标记为VNODE，也就是表示该节点标记为替换。

else if (isVNode(b)) {
    if (isVNode(a)) {
         if (a.tagName === b.tagName &&
             a.namespace === b.namespace &&
             a.key === b.key) {
             var propsPatch = diffProps(a.properties, b.properties)
             if (propsPatch) {
                 apply = appendPatch(apply,
                        new VPatch(VPatch.PROPS, a, propsPatch))
             }
             apply = diffChildren(a, b, patch, apply, index)
         } else {
             apply = appendPatch(apply, new VPatch(VPatch.VNODE, a, b))
             applyClear = true
         }
     } else {
         apply = appendPatch(apply, new VPatch(VPatch.VNODE, a, b))
         applyClear = true
     }
}

如果B是文本节点，A不是文本节点，那就标记当前节点为VTEXT也就是将当前节点替换成文本节点。如果A也是文本节点，那就比较A和B节点的值，如果不同则标记替换文本节点。

else if (isVText(b)) {
     if (!isVText(a)) {
        apply = appendPatch(apply, new VPatch(VPatch.VTEXT, a, b))
        applyClear = true
     } else if (a.text !== b.text) {
        apply = appendPatch(apply, new VPatch(VPatch.VTEXT, a, b))
    }
}

6.如果B节点是Widget，就将当前节点替换成Widget元素，标记为WIDGET。

else if (isWidget(b)) {
    if (!isWidget(a)) {
        applyClear = true
    }
    apply = appendPatch(apply, new VPatch(VPatch.WIDGET, a, b))
}

将上面判断得出的结果赋值到patch[index] 中，apply就是对当前节点变动的描述对象了。

if (apply) {
    patch[index] = apply
}

上面7个步骤就是VNode的diff算法，可以看到，在B为VNode的情况下，还会去继续比较B和A的属性和子元素。

props 的比较

props的diff算法，文件地址，可以看到，整个函数是一个for循环，使用for in循环来遍历A的属性。

function diffProps(a, b) {
    var diff

    for (var aKey in a) {
        if (!(aKey in b)) {
            diff = diff || {}
            diff[aKey] = undefined
        }

        var aValue = a[aKey]
        var bValue = b[aKey]

        if (aValue === bValue) {
            continue
        } else if (isObject(aValue) && isObject(bValue)) {
            if (getPrototype(bValue) !== getPrototype(aValue)) {
                diff = diff || {}
                diff[aKey] = bValue
            } else if (isHook(bValue)) {
                 diff = diff || {}
                 diff[aKey] = bValue
            } else {
                var objectDiff = diffProps(aValue, bValue)
                if (objectDiff) {
                    diff = diff || {}
                    diff[aKey] = objectDiff
                }
            }
        } else {
            diff = diff || {}
            diff[aKey] = bValue
        }
    }

    for (var bKey in b) {
        if (!(bKey in a)) {
            diff = diff || {}
            diff[bKey] = b[bKey]
        }
    }

    return diff
}

如果A元素里面的属性在B元素中已经不存在了，则将diff[aKey]置为undefined，用来标记为删除。

if (!(aKey in b)) {
    diff = diff || {}
    diff[aKey] = undefined
}

获取A和B里面相同Key的值，也就是当前遍历的Key对应的值。

var aValue = a[aKey]
var bValue = b[aKey]

如果值是相等的接直接遍历下一个Key。

if (aValue === bValue) {
    continue
}

如果A和B这两个属性都是对象，则继续往下比较。
1. 如果两个对象的原型不相同，则记录diff[aKey] = bValue。
2. 如果B的属性是Hook，则记录diff[aKey] = bValue。
3. 递归比较A和B的当前属性，这两个对象，得到的diffObject记录到diff[aKey] = objectDiff。通过这点可以看到这个库的props的比较是深比较，会递归比较props的每一个Key。

else if (isObject(aValue) && isObject(bValue)) {
        if (getPrototype(bValue) !== getPrototype(aValue)) {
            diff = diff || {}
            diff[aKey] = bValue
        } else if (isHook(bValue)) {
             diff = diff || {}
             diff[aKey] = bValue
        } else {
            var objectDiff = diffProps(aValue, bValue)
            if (objectDiff) {
                diff = diff || {}
                diff[aKey] = objectDiff
            }
       }
}

如果当前两个值不是对象且不相等，则标记diff[aKey] = bValue。

else {
    diff = diff || {}
    diff[aKey] = bValue
}

遍历B中有但是A总没有的Key，也就是新增的Key，标记为diff[bKey] = b[bKey]。

for (var bKey in b) {
    if (!(bKey in a)) {
        diff = diff || {}
        diff[bKey] = b[bKey]
    }
}

最后函数放回当前的diff对象。

child 的比较

之前说过，child 的 diff 其实还是会递归调用的 diff函数，下面我们来看看。

function diffChildren(a, b, patch, apply, index) {
    var aChildren = a.children
    var orderedSet = reorder(aChildren, b.children)
    var bChildren = orderedSet.children

    var aLen = aChildren.length
    var bLen = bChildren.length
    var len = aLen > bLen ? aLen : bLen

    for (var i = 0; i < len; i++) {
        var leftNode = aChildren[i]
        var rightNode = bChildren[i]
        index += 1

        if (!leftNode) {
            if (rightNode) {
                // Excess nodes in b need to be added
                apply = appendPatch(apply,
                    new VPatch(VPatch.INSERT, null, rightNode))
            }
        } else {
            walk(leftNode, rightNode, patch, index)
        }

        if (isVNode(leftNode) && leftNode.count) {
            index += leftNode.count
        }
    }

    if (orderedSet.moves) {
        // Reorder nodes last
        apply = appendPatch(apply, new VPatch(
            VPatch.ORDER,
            a,
            orderedSet.moves
        ))
    }

    return apply
}

将A和B的child放在一起进行顺序调整，方便之后能更好的比较。

var aChildren = a.children
var orderedSet = reorder(aChildren, b.children)
var bChildren = orderedSet.children

获取两个元素子节点的最大长度。

var aLen = aChildren.length
var bLen = bChildren.length
var len = aLen > bLen ? aLen : bLen

开始循环遍历子节点。

for (var i = 0; i < len; i++) {
...
}

如果A节点的当前子节点是不存在的，但是B节点却有。标记为插入新节点。

if (!leftNode) {
    if (rightNode) {
        // Excess nodes in b need to be added
        apply = appendPatch(apply,
            new VPatch(VPatch.INSERT, null, rightNode))
    }
}

如果A和B两个节点的当前子节点都是存在的，则递归调用walk函数操作，注意这里传入的index为当前子节点的下标，这就是walk函数中index的来源了，主要是用来区分子元素的。

else {
     walk(leftNode, rightNode, patch, index)
}

循环比较完节点后，来判断之前的排序算法，如果只是顺序换了一下，则标记为ORDER表示知识更换了顺序。

if (orderedSet.moves) {
    // Reorder nodes last
    apply = appendPatch(apply, new VPatch(
        VPatch.ORDER,
        a,
        orderedSet.moves
    ))
}

reorder 函数分析

这个函数就是上面第一步中，进行调整顺序的函数，里面会使用到我们经常看到React 中说同级节点需要添加的 key。

// List diff, naive left to right reordering
function reorder(aChildren, bChildren) {
    // O(M) time, O(M) memory
    var bChildIndex = keyIndex(bChildren)
    var bKeys = bChildIndex.keys
    var bFree = bChildIndex.free

    if (bFree.length === bChildren.length) {
        return {
            children: bChildren,
            moves: null
        }
    }

    // O(N) time, O(N) memory
    var aChildIndex = keyIndex(aChildren)
    var aKeys = aChildIndex.keys
    var aFree = aChildIndex.free

    if (aFree.length === aChildren.length) {
        return {
            children: bChildren,
            moves: null
        }
    }

    // O(MAX(N, M)) memory
    var newChildren = []

    var freeIndex = 0
    var freeCount = bFree.length
    var deletedItems = 0

    // Iterate through a and match a node in b
    // O(N) time,
    for (var i = 0 ; i < aChildren.length; i++) {
        var aItem = aChildren[i]
        var itemIndex

        if (aItem.key) {
            if (bKeys.hasOwnProperty(aItem.key)) {
                // Match up the old keys
                itemIndex = bKeys[aItem.key]
                newChildren.push(bChildren[itemIndex])

            } else {
                // Remove old keyed items
                itemIndex = i - deletedItems++
                newChildren.push(null)
            }
        } else {
            // Match the item in a with the next free item in b
            if (freeIndex < freeCount) {
                itemIndex = bFree[freeIndex++]
                newChildren.push(bChildren[itemIndex])
            } else {
                // There are no free items in b to match with
                // the free items in a, so the extra free nodes
                // are deleted.
                itemIndex = i - deletedItems++
                newChildren.push(null)
            }
        }
    }

    var lastFreeIndex = freeIndex >= bFree.length ?
        bChildren.length :
        bFree[freeIndex]

    // Iterate through b and append any new keys
    // O(M) time
    for (var j = 0; j < bChildren.length; j++) {
        var newItem = bChildren[j]

        if (newItem.key) {
            if (!aKeys.hasOwnProperty(newItem.key)) {
                // Add any new keyed items
                // We are adding new items to the end and then sorting them
                // in place. In future we should insert new items in place.
                newChildren.push(newItem)
            }
        } else if (j >= lastFreeIndex) {
            // Add any leftover non-keyed items
            newChildren.push(newItem)
        }
    }

    var simulate = newChildren.slice()
    var simulateIndex = 0
    var removes = []
    var inserts = []
    var simulateItem

    for (var k = 0; k < bChildren.length;) {
        var wantedItem = bChildren[k]
        simulateItem = simulate[simulateIndex]

        // remove items
        while (simulateItem === null && simulate.length) {
            removes.push(remove(simulate, simulateIndex, null))
            simulateItem = simulate[simulateIndex]
        }

        if (!simulateItem || simulateItem.key !== wantedItem.key) {
            // if we need a key in this position...
            if (wantedItem.key) {
                if (simulateItem && simulateItem.key) {
                    // if an insert doesn't put this key in place, it needs to move
                    if (bKeys[simulateItem.key] !== k + 1) {
                        removes.push(remove(simulate, simulateIndex, simulateItem.key))
                        simulateItem = simulate[simulateIndex]
                        // if the remove didn't put the wanted item in place, we need to insert it
                        if (!simulateItem || simulateItem.key !== wantedItem.key) {
                            inserts.push({key: wantedItem.key, to: k})
                        }
                        // items are matching, so skip ahead
                        else {
                            simulateIndex++
                        }
                    }
                    else {
                        inserts.push({key: wantedItem.key, to: k})
                    }
                }
                else {
                    inserts.push({key: wantedItem.key, to: k})
                }
                k++
            }
            // a key in simulate has no matching wanted key, remove it
            else if (simulateItem && simulateItem.key) {
                removes.push(remove(simulate, simulateIndex, simulateItem.key))
            }
        }
        else {
            simulateIndex++
            k++
        }
    }

    // remove all the remaining nodes from simulate
    while(simulateIndex < simulate.length) {
        simulateItem = simulate[simulateIndex]
        removes.push(remove(simulate, simulateIndex, simulateItem && simulateItem.key))
    }

    // If the only moves we have are deletes then we can just
    // let the delete patch remove these items.
    if (removes.length === deletedItems && !inserts.length) {
        return {
            children: newChildren,
            moves: null
        }
    }

    return {
        children: newChildren,
        moves: {
            removes: removes,
            inserts: inserts
        }
    }
}

这个函数有点长，还是一步一步来梳理。

根据keyIndex函数获取bChildren设置了key和没有设置key的元素下标集合。如果都没有设置key就直接将bChildren返回。

var bChildIndex = keyIndex(bChildren)
var bKeys = bChildIndex.keys
var bFree = bChildIndex.free

if (bFree.length === bChildren.length) {
    return {
        children: bChildren,
        moves: null
    }
}

与第一步骤一样，根据keyIndex函数获取aChildren设置了key和没有设置key的元素下标集合。如果都没有设置key就直接将bChildren返回。

var aChildIndex = keyIndex(aChildren)
var aKeys = aChildIndex.keys
var aFree = aChildIndex.free

if (aFree.length === aChildren.length) {
    return {
        children: bChildren,
        moves: null
    }
}

遍历aChildren，分为两种情况。
1. aItem 存在key，则根据key去bChildren的keys集合中找，如果找的到，则将bChildren对应的节点 push 到 newChildren 中。找不到则 push 一个 null 到 newChildren。
2. aItem 不存在key，则去bChildren中没有keys的集合中找第一个元素，将该元素 push 到 newChildren 中，如果已经找完了或者为空，则 push 一个 null 到 newChildren。

if (aItem.key) {
  if (bKeys.hasOwnProperty(aItem.key)) {
    // Match up the old keys
    itemIndex = bKeys[aItem.key]
    newChildren.push(bChildren[itemIndex])

  } else {
    // Remove old keyed items
    itemIndex = i - deletedItems++
    newChildren.push(null)
  }
} else {
  // Match the item in a with the next free item in b
  if (freeIndex < freeCount) {
    itemIndex = bFree[freeIndex++]
    newChildren.push(bChildren[itemIndex])
  } else {
    // There are no free items in b to match with
    // the free items in a, so the extra free nodes
    // are deleted.
    itemIndex = i - deletedItems++
    newChildren.push(null)
  }
}

遍历 bChildren ，将对应在 aChildren 中没有的 key 对应的元素或者还没有被添加到 newChildren 的剩下元素 push 到 newChildren 。

for (var j = 0; j < bChildren.length; j++) {
  var newItem = bChildren[j]
  if (newItem.key) {
    if (!aKeys.hasOwnProperty(newItem.key)) {
      // Add any new keyed items
      // We are adding new items to the end and then sorting them
      // in place. In future we should insert new items in place.
      newChildren.push(newItem)
    }
  } else if (j >= lastFreeIndex) {
    // Add any leftover non-keyed items
    newChildren.push(newItem)
  }
}

经过3和4步骤，就应该得到 newChildren 数组了，最后将bChildren和新数组逐个比较，得到从新数组转换到bChildren数组的move操作patch（即remove+insert）。

for (var k = 0; k < bChildren.length;) {
...
  if (!simulateItem || simulateItem.key !== wantedItem.key) {
    ... 
      ...
      removes.push(remove(simulate, simulateIndex, simulateItem.key))
      simulateItem = simulate[simulateIndex]
      ...
   ...
  }
}

最后返回调整后的 chilren 数组和相关标记，新数组和move操作列表。这就可以看到 diffChilren 里面有 if (orderedSet.moves) 判断，优化比较避免新增元素。更多可以看React 源码剖析系列－不可思议的 react diff。

小结

通过上面三部分的分析，发现 diff 算法就是按照 DOM 的描述来进行比较的，在比较 children 的时候会利用 key 来优化标记，避免重复创建新 DOM。通过递归来比较 VD 得到 VPatch 对象。

上一章说了 VD 树的生成和 DOM 的创建，结合这章就可以知道当数据和页面变更后，VD 是怎么去比较的，整体来说，梳理了一遍还是明白了不少东西。

下一章就来梳理 patch 的过程了。

参考文章链接：如何实现一个虚拟 DOM——virtual-dom 源码分析 React 源码剖析系列－不可思议的 react diff

原文链接