每日源码分析-Lodash(uniq.js)

1,888 阅读3分钟

本系列使用lodash 4.17.4

前言

引用internal文件下的baseUniq.js

正文

import baseUniq from './.internal/baseUniq.js'

/**
 * Creates a duplicate-free version of an array, using
 * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero)
 * for equality comparisons, in which only the first occurrence of each element
 * is kept. The order of result values is determined by the order they occur
 * in the array.
 *
 * @since 0.1.0
 * @category Array
 * @param {Array} array The array to inspect.
 * @returns {Array} Returns the new duplicate free array.
 * @see uniqBy, uniqWith
 * @example
 *
 * uniq([2, 1, 2])
 * // => [2, 1]
 */
function uniq(array) {
  return (array != null && array.length)
    ? baseUniq(array)
    : []
}

export default uniq

可以看到完完全全是调用baseUniq函数,如果这样就完的话感觉今天就比较水,所以我们来看看这个baseUniq函数

import SetCache from './SetCache.js'
import arrayIncludes from './arrayIncludes.js'
import arrayIncludesWith from './arrayIncludesWith.js'
import cacheHas from './cacheHas.js'
import createSet from './createSet.js'
import setToArray from './setToArray.js'

/** Used as the size to enable large array optimizations. */
const LARGE_ARRAY_SIZE = 200

/**
 * The base implementation of `uniqBy`.
 *
 * @private
 * @param {Array} array The array to inspect.
 * @param {Function} [iteratee] The iteratee invoked per element.
 * @param {Function} [comparator] The comparator invoked per element.
 * @returns {Array} Returns the new duplicate free array.
 */
function baseUniq(array, iteratee, comparator) {
  let index = -1
  let includes = arrayIncludes
  let isCommon = true

  const { length } = array
  const result = []
  let seen = result

  if (comparator) {
    isCommon = false
    includes = arrayIncludesWith
  }
  else if (length >= LARGE_ARRAY_SIZE) {
    const set = iteratee ? null : createSet(array)
    if (set) {
      return setToArray(set)
    }
    isCommon = false
    includes = cacheHas
    seen = new SetCache
  }
  else {
    seen = iteratee ? [] : result
  }
  outer:
  while (++index < length) {
    let value = array[index]
    const computed = iteratee ? iteratee(value) : value

    value = (comparator || value !== 0) ? value : 0
    if (isCommon && computed === computed) {
      let seenIndex = seen.length
      while (seenIndex--) {
        if (seen[seenIndex] === computed) {
          continue outer
        }
      }
      if (iteratee) {
        seen.push(computed)
      }
      result.push(value)
    }
    else if (!includes(seen, computed, comparator)) {
      if (seen !== result) {
        seen.push(computed)
      }
      result.push(value)
    }
  }
  return result
}
export default baseUniq

由于这个函数还考虑了对数据的'处理器'和'比较器',如果有兴趣的话可以再仔细看看整体的实现,今天我们就只看uniq函数调用时的情况(既没有'处理器'也没有'比较器')。我做了个简化,代码如下:

import SetCache from './SetCache.js'
import arrayIncludes from './arrayIncludes.js'
import arrayIncludesWith from './arrayIncludesWith.js'
import cacheHas from './cacheHas.js'
import createSet from './createSet.js'
import setToArray from './setToArray.js'

/** Used as the size to enable large array optimizations. */
const LARGE_ARRAY_SIZE = 200

function baseUniq(array) {
  let index = -1
  let includes = arrayIncludes
  let isCommon = true

  const { length } = array
  const result = []
  let seen = result
  
  if (length >= LARGE_ARRAY_SIZE) {
    const set = createSet(array)
    if (set) {
      return setToArray(set)
    }
    isCommon = false
    includes = cacheHas
    seen = new SetCache
  }
  else {
    seen = result
  }
  outer:
  while (++index < length) {
    let value = array[index]
    
    value = value !== 0 ? value : 0
    if (isCommon) {
      let seenIndex = seen.length
      while (seenIndex--) {
        if (seen[seenIndex] === value) {
          continue outer
        }
      }
      result.push(value)
    }
    else if (!includes(seen, value)) {
      if (seen !== result) {
        seen.push(value)
      }
      result.push(value)
    }
  }
  return result
}

这样一来就很明确了。如果数组长度大于200(LARGE_ARRAY_SIZE),则使用cache那套来判断cache里有没有对应的数据,没有就添加进cache和结果数组。如果是普通的长度小于200的数组,那么就和我们平时写的差不多了:遍历获取目标数组的值并且遍历查询结果数组判断该值是否已经存在,不存在存入结果数组。

使用方式

  _.uniq([2, 1, 2])
  // => [2, 1]

使用场景

该函数作用是将一个数组去重,由于内部判断相等机制是采用===,所以只能满足我们常见的判断简单的数组相同,类似于[[2],[2]]等hash值不同的对象不能去重。

  let t = [2]
  _.uniq([2, 1, 2, t, t])
  // => [2, 1, [2]]

结语

个人感觉如果要简单的进行类似的去重可以使用es6的set直接达到目的:Array.from(new Set(yourArray))或者[...new Set(yourArray)]就可以了。如果要对内容去重则需要进行递归操作。

补充 其实还可以使用正则来去重。比如

var str_arr = ["a", "b", "c", "a", "b", "c"]

function unique(arr) {
    return arr.sort().join(",,").                       
    replace(/(,|^)([^,]+)(,,\2)+(,|$)/g, "$1$2$4").     
    replace(/,,+/g, ",").
    replace(/,$/, "").
    split(",")
}
console.log(unique(str_arr)) // ["a","b","c"]