JS中的sort排序源码解读(一)

7,039 阅读4分钟
前些日子自己在对数组字符串排序的时候,习惯性的写成了
    function sortArr(arr){
        return arr.sort((item1, item2) => {
            return item1 > item2
        })
    }
    let arr = ['Apply', 'any', 'Amazon']
    sortArr(arr)
得到的并不是正确的排序结果,于是就纳闷了因为以前自己在对数组排序的时候就是通过这样的方式写的。才发现自己对sort的理解并未入门,于是借着这个机会好好探究一下sort的来龙去脉
参照MDN解释可以大致得出一下几个结论
  • 针对sort包括compareFunction的情况,会通过compareFunction的返回值进行排序。返回值有三种结果-1、0、1(其实可以广泛的认为是正数、0、负数);
    • 对于数字排序可以直接通过item1 - item2的形式
    • 对于字符串排序由于考虑不能通过隐形转换为number进行减法运算,所以通过判断大小的方式return正数、0或负数
  • 针对sort不包括compareFunction的情况在sort源码中会自动生成一个默认的compareFunction(下文说明)
Array.sort旧版本源码解读
  • InnerArraySort方法(Array.sort的源码)
  • InsertionSort方法(插入排序,在数组长度小于10的时候执行插入排序算法)
  • GetThirdIndex方法(不同的基准元也会影响排序时的比较次数,通过GetThirdIndex获取较为合适的基准元作为快排的基准元)
  • QuickSort方法通过GetThirdIndex获得基准数实现快排(通过GetThirdIndex获得的基准数进行的比较次数会低于0基准点的比较次数)
源码解析:只描述主流程代码
function InnerArraySort(array, length, comparefn) {
  if (!IS_CALLABLE(comparefn)) { //调用Array.sort()的compareFunctuion的默认值
    comparefn = function (x, y) {
      if (x === y) return 0;
      if (%_IsSmi(x) && %_IsSmi(y)) {
        return %SmiLexicographicCompare(x, y);
      }
      //默认的compareFunctuion会对值做TO_STRING的转换,会出问题
      x = TO_STRING(x);
      y = TO_STRING(y);
      if (x == y) return 0;
      else return x < y ? -1 : 1;
    };
  }
  var InsertionSort = function InsertionSort(a, from, to) {//插入排序算法
    for (var i = from + 1; i < to; i++) {
      var element = a[i];
      for (var j = i - 1; j >= from; j--) {
        var tmp = a[j];
        var order = comparefn(tmp, element);
        if (order > 0) {
          a[j + 1] = tmp;
        } else {
          break;
        }
      }
      a[j + 1] = element;
    }
  };

  var GetThirdIndex = function(a, from, to) {
    //获取合适的基准元
    //以200~215为间隔读取数组a内的值,得到新的数组t_array,对t_array进行排序后取排序后的中位数
    var t_array = new InternalArray(); 
    // Use both 'from' and 'to' to determine the pivot candidates.
    var increment = 200 + ((to - from) & 15);
    var j = 0;
    from += 1;
    to -= 1;
    for (var i = from; i < to; i += increment) {
      t_array[j] = [i, a[i]];
      j++;
    }
    t_array.sort(function(a, b) {
      return comparefn(a[1], b[1]);
    });
    var third_index = t_array[t_array.length >> 1][0];
    return third_index;
  }

  var QuickSort = function QuickSort(a, from, to) {
    ...
  };

  var SafeRemoveArrayHoles = function SafeRemoveArrayHoles(obj) {//将数组中所有值为'undefined'放置数组末尾,且将下一个不为undefined的值填充,该流程执行完后,return第一个为undefined的下标
    // Copy defined elements from the end to fill in all holes and undefineds
    // in the beginning of the array.  Write undefineds and holes at the end
    // after loop is finished.
    ...
    return first_undefined;
  };

  if (length < 2) return array; //数组长度小于2,直接返回
  var is_array = IS_ARRAY(array);
  var max_prototype_element;

  // %RemoveArrayHoles returns -1 if fast removal is not supported.
  var num_non_undefined = %RemoveArrayHoles(array, length);

  if (num_non_undefined == -1) {
    // There were indexed accessors in the array.
    // Move array holes and undefineds to the end using a Javascript function
    // that is safe in the presence of accessors.
    num_non_undefined = SafeRemoveArrayHoles(array);
  }

  QuickSort(array, 0, num_non_undefined);//对原始数组0~num_non_undefined排序,而num_non_undefined排序~sortArr.length - 1都为undefined
  ...
  return array;
}
QuickSort内容
  var QuickSort = function QuickSort(a, from, to) {
    //快排的基本实现,不详细解释
    var third_index = 0;
    while (true) {
      // Insertion sort is faster for short arrays.
      if (to - from <= 10) {//数组长度小于10则对数组进行插入排序
        InsertionSort(a, from, to);
        return;
      }
      if (to - from > 1000) {
        third_index = GetThirdIndex(a, from, to);
      } else {
        third_index = from + ((to - from) >> 1);//长度小于1000时,获取中位数
      }
      // Find a pivot as the median of first, last and middle element.
      var v0 = a[from];
      var v1 = a[to - 1];
      var v2 = a[third_index];
      var c01 = comparefn(v0, v1);
      if (c01 > 0) { //判断comparefn是否大于0,所以其实comparefn函数中return只要是正数、0、负数即可
        // v1 < v0, so swap them.
        var tmp = v0;v0 = v1;v1 = tmp;
      } // v0 <= v1.
      var c02 = comparefn(v0, v2);
      if (c02 >= 0) {
        // v2 <= v0 <= v1.
        var tmp = v0;
        v0 = v2;v2 = v1;v1 = tmp;
      } else {
        // v0 <= v1 && v0 < v2
        var c12 = comparefn(v1, v2);
        if (c12 > 0) {
          // v0 <= v2 < v1
          var tmp = v1;v1 = v2;v2 = tmp;
        }
      }
      // v0 <= v1 <= v2
      a[from] = v0;
      a[to - 1] = v2;
      var pivot = v1;
      var low_end = from + 1;   // Upper bound of elements lower than pivot.
      var high_start = to - 1;  // Lower bound of elements greater than pivot.
      a[third_index] = a[low_end];
      a[low_end] = pivot;

      // From low_end to i are elements equal to pivot.
      // From i to high_start are elements that haven't been compared yet.
      for (var i = low_end + 1; i < high_start; i++) {
        var element = a[i];
        var order = comparefn(element, pivot);
        if (order < 0) {
          a[i] = a[low_end];
          a[low_end] = element;
          low_end++;
        } else if (order > 0) {
          do {
            high_start--;
            if (high_start == i) break;
            var top_elem = a[high_start];
            order = comparefn(top_elem, pivot);
          } while (order > 0);
          a[i] = a[high_start];
          a[high_start] = element;
          if (order < 0) {
            element = a[i];
            a[i] = a[low_end];
            a[low_end] = element;
            low_end++;
          }
        }
      }
      if (to - high_start < low_end - from) {
        QuickSort(a, high_start, to);
        to = low_end;
      } else {
        QuickSort(a, from, low_end);
        from = high_start;
      }
    }
  };

1.由于在默认的compareFunction中会对值进行TO_STRING的转换,会导致在比较数字的时候照着字符串的比较规则进行比较,进而出现问题,如[12,0,23,3].sort(),所以在使用Array.sort方法时建议传compareFunction作为比较

Array.sort新版本源码地址

新的sort实现与旧的sort实现在思路上有一定的差异,接下来一篇文章会对新的实现进行解释,如有不足还请大家多多指教