Swift-集合类型协议

197 阅读9分钟

日常开发中使用最多的应该就是集合类型了,本文通过查看一些源码来进一步理解下集合类型协议。

协议继承关系

image.png

  • Sequence 提供迭代能力。允许你创建一个迭代器,但是不能保障迭代器只能单次遍历还是支持多次遍历。

    public protocol Sequence<Element> {
    
        associatedtype Element where Self.Element == Self.Iterator.Element
    
        associatedtype Iterator : IteratorProtocol
    
        func makeIterator() -> Self.Iterator
    
            //...
    }
    
  • IteratorProtocol 每次提供序列中的一个值。IteratorProtocol 协议与Sequence 协议是紧密相关的,当你使用for-in 遍历集合类型时,内部使用的是Sequence/Collection 的iterator。

    public protocol IteratorProtocol<Element> {
    
        associatedtype Element
    
        ///可以看到返回值是一个可选型,如果有下一个返回下一个,否则返回nil。
        mutating func next() -> Self.Element?
    }
    
  • Collection 继承于Sequence,允许多次遍历。可以通过索引访问其中的元素,还提供了集合切片的能力。

    public protocol Collection<Element> : Sequence {
    
        subscript(position: Self.Index) -> Self.Element { get }
    
        subscript(bounds: Range<Self.Index>) -> Self.SubSequence { get }
    }
    
  • MutableCollection 继承于Collection,提供通过下标读写的能力。但是不能改变集合本身的长度,比如添加、删除操作是不允许的。

    public protocol MutableCollection<Element> : Collection where Self.SubSequence : MutableCollection {
    
        // 重写了Collection的方法
        override subscript(position: Self.Index) -> Self.Element { get set }
    
        override subscript(bounds: Range<Self.Index>) -> Self.SubSequence { get set }
    }
    
  • RangeReplaceableCollection 提供使用一个集合替换当前集合任意子区间的能力。你需要实现init()和replaceSubrange(:with:) 方法,其他的方法都是通过调用replaceSubrange(:with:) 实现的。你也可以通过重写协议中的方法进行自定义实现。

    public protocol RangeReplaceableCollection<Element> : Collection where Self.SubSequence : RangeReplaceableCollection {
    
        init()
    
        mutating func replaceSubrange<C>(_ subrange: Range<Self.Index>, with newElements: C) where C : Collection, Self.Element == C.Element
    
        mutating func append(_ newElement: Self.Element)
    
        mutating func insert(_ newElement: Self.Element, at i: Self.Index)
    
        mutating func remove(at i: Self.Index) -> Self.Element
    
        //...
    }
    
  • BidirectionalCollection 提供逆向遍历的能力。

    public protocol BidirectionalCollection<Element> : Collection where Self.Indices : BidirectionalCollection, Self.SubSequence : BidirectionalCollection {
    
        func index(before i: Self.Index) -> Self.Index
    
        //...
    }
    
  • RandomAccessCollection 提供了高效的索引计算能力。要求把索引移动任意距离和计算索引间距离的时间复杂度为O(1)。

    public protocol RandomAccessCollection<Element> : BidirectionalCollection where Self.Indices : RandomAccessCollection, Self.SubSequence : RandomAccessCollection {
    
        func index(_ i: Self.Index, offsetBy distance: Int) -> Self.Index
    
        func distance(from start: Self.Index, to end: Self.Index) -> Int
    
        //...
    }
    
  • LazySequenceProtocol 只有在访问序列中的元素时才开始计算这个元素的具体值。可以避免不必要的内存分配和计算。

    public protocol LazySequenceProtocol : Sequence {
    
        associatedtype Elements : Sequence = Self where Self.Element == Self.Elements.Element
    
        // 你不需要实现这个计算属性,在扩展中有默认实现,只是返回了self。
        // 在使用.lazy 时,可以看到lazy属性是把elements包装成了LazySequence
        var elements: Self.Elements { get }
    }
    
  • LazyCollectionProtocol 和LazySequenceProtocol 类似。

    public protocol LazyCollectionProtocol : Collection, LazySequenceProtocol where Self.Elements : Collection {
    }
    
    extension LazyCollectionProtocol {
    
        @inlinable public var lazy: LazyCollection<Self.Elements> { get }
    }
    
    extension LazyCollectionProtocol where Self.Elements : LazyCollectionProtocol {
    
        @inlinable public var lazy: Self.Elements { get }
    }
    
    //仔细看这里
    public typealias LazyCollection<T> = LazySequence<T> where T : Collection
    

序列

  • Sequence 协议要求非常简单,返回一个迭代器。func makeIterator() -> Self.Iterator

迭代器

  • IteratorProtocol 协议的要求也非常简单,返回序列中的下一个元素,序列被耗尽时返回nil。associatedtype Element where Self.Element == Self.Iterator.Element 保证了序列和迭代器返回的类型是一致的。

  • 我们平时不需要关心迭代器或者直接使用迭代器,通常我们遍历序列使用的是for-in,本质上来说for-in 是下面代码的简写。

    var iterator = someSequence.makeIterator()
    while let element = iterator.next() {
            doSomething(with: element)
    }
    
  • 对于大部分迭代器来说都具有值语义。但是也有例外AnyIterator,它将某个复杂类型的迭代器进行包装实现类型擦除。可以看到_AnyIteratorBoxBase 是一个Class,所以AnyIterator 具有引用语义。

    public struct AnyIterator<Element> {
    
      internal let _box: _AnyIteratorBoxBase<Element>
    
      public init<I: IteratorProtocol>(_ base: I) where I.Element == Element {
        self._box = _IteratorBox(base)
     }
    
     //...
    }
    
    internal class _AnyIteratorBoxBase<Element>: IteratorProtocol { }
    

集合类型

集合是有限的且可以被多次遍历,集合中的元素可以通过下标索引的方式访问。下标索引通常是整数,但是也有例外比如字符串的下标索引。实现Collection 协议时,应当选取适合的索引类型来表达元素在集合中的位置。

自定义集合类型

我们先来实现一个简单的数据结构-队列,然后让这个队列实现Collection 协议。

struct Queue<E> {
    private var left: [E] = []
    private var right: [E] = []
    
    mutating func enqueue(_ element: E) {
        right.append(element)
    }
    
    mutating func dequeue() -> E? {
        if left.isEmpty {
            left = right.reversed()
        }
        return left.popLast()
    }
}

通过查看Collection 协议,我们可以看到协议中有很多的关联类型、属性、方法,那我们需要把这些全部都实现吗?当然不是。我们可以继续观察,有一部分关联类型有了默认值,还有一部分在Collection 扩展中提供了默认实现。那我们应该怎么确定至少应该实现那些方法呢?查找对比默认实现然后把没有实现的实现了,太麻烦了。根据编译器提示去实现呢,确实可以但是编译器的提示有时候不是很有用。这时候我们应该寄希望于文档,Swift标准库的文档还是很有价值的,大家没事的时候可以看看。

/// Conforming to the Collection Protocol
/// =====================================
///
/// If you create a custom sequence that can provide repeated access to its
/// elements, make sure that its type conforms to the `Collection` protocol in
/// order to give a more useful and more efficient interface for sequence and
/// collection operations. To add `Collection` conformance to your type, you
/// must declare at least the following requirements:
///
/// - The `startIndex` and `endIndex` properties
/// - A subscript that provides at least read-only access to your type's
///   elements
/// - The `index(after:)` method for advancing an index into your collection
///
public protocol Collection<Element> : Sequence {}

实现下这些方法、属性就OK了,关联类型Swift进行类型推断。实现这几行简单的代码,我们的Queue 就已经获得了40多个方法和属性了。

extension Queue: Collection {
    public var startIndex: Int { 0 }
    public var endIndex: Int { left.count + right.count }
    
    public func index(after i: Int) -> Int { i + 1 }
    public subscript(position: Int) -> E {
        position < left.count ? left[left.count - position - 1] : right[position - left.count]
    }
}

数组字面量

同时我们也希望像通过字面量创建数组一样来创建Queue,当然是让Queue实现字面量协议啦。我们先看下字面量协议,也是非常的简单。

/// Conforming to ExpressibleByArrayLiteral
/// =======================================
///
/// Add the capability to be initialized with an array literal to your own
/// custom types by declaring an `init(arrayLiteral:)` initializer. 
/// ...
public protocol ExpressibleByArrayLiteral {

    associatedtype ArrayLiteralElement

    init(arrayLiteral elements: Self.ArrayLiteralElement...)
}

怎么快速找到如何实现一个协议的方法相信大家已经很清楚了,我们来实现一下ExpressibleByArrayLiteral 协议。

extension Queue: ExpressibleByArrayLiteral {
    init(arrayLiteral elements: E...) {
        self.init(left: elements.reversed(), right: [])
    }
}

let q: Queue = [1, 2, 3] //Build Succeeded

注意这里的[1, 2, 3]并不是一个数组,它只是一个数组字面量,我们可以用它来创建实现了ExpressibleByArrayLiteral 协议的类型。

关联类型

Collection 为除了Index 和Element 之外的关联类型都提供了默认值。

  • Iterator 从Sequence 继承来的关联类型,Collection 中默认迭代器类型是IndexingIterator<Self>,它对集合进行包装,使用集合 的下标索引进行迭代。
extension Collection where Iterator == IndexingIterator<Self> {
  public func makeIterator() -> IndexingIterator<Self> {
    return IndexingIterator(_elements: self)
  }
}

public struct IndexingIterator<Elements: Collection> {
  internal let _elements: Elements
  internal var _position: Elements.Index

  public /// @testable
  init(_elements: Elements) {
    self._elements = _elements
    self._position = _elements.startIndex
  }
  //...
}

extension IndexingIterator: IteratorProtocol, Sequence {
  public typealias Element = Elements.Element
  public typealias Iterator = IndexingIterator<Elements>
  public typealias SubSequence = AnySequence<Element>

  public mutating func next() -> Elements.Element? {
    if _position == _elements.endIndex { return nil }
    let element = _elements[_position]
    _elements.formIndex(after: &_position)
    return element
  }
}
  • SubSequence 表示集合中一段连续内容切片的类型。默认实现的类型是Slice<Self>,也是对集合进行包装。
extension Collection where SubSequence == Slice<Self> {
  public subscript(bounds: Range<Index>) -> Slice<Self> {
    return Slice(base: self, bounds: bounds)
  }
}

public struct Slice<Base: Collection> {
  public var _startIndex: Base.Index
  public var _endIndex: Base.Index

  internal var _base: Base

  public init(base: Base, bounds: Range<Base.Index>) {
    self._base = base
    self._startIndex = bounds.lowerBound
    self._endIndex = bounds.upperBound
  }

  //...
}
  • Indices 合集的indices 属性的类型,它是集合中所有有效的索引,并且按照升序排列。它也是对集合类型的包装。
public struct DefaultIndices<Elements: Collection> {
  internal var _elements: Elements
  internal var _startIndex: Elements.Index
  internal var _endIndex: Elements.Index

  internal init(
    _elements: Elements,
    startIndex: Elements.Index,
    endIndex: Elements.Index
  ) {
    self._elements = _elements
    self._startIndex = startIndex
    self._endIndex = endIndex
  }
}

索引

所以endIndex 不是一个有效的索引。

目前我们使用的索引大多数是整数,接下来我们来看下Dictionary 的索引。

索引表示了集合中的位置,每个集合都有两个特殊的索引值:

startIndex:集合中第一个元素的位置

endIndex:集合中最后一个元素的下一个位置

extension Dictionary {
  /// The position of a key-value pair in a dictionary.
  ///
  /// Dictionary has two subscripting interfaces:
  ///
  /// 1. Subscripting with a key, yielding an optional value:
  ///
  ///        v = d[k]!
  ///
  /// 2. Subscripting with an index, yielding a key-value pair:
  ///
  ///        (k, v) = d[i]
  @frozen
  public struct Index {
    // Index for native dictionary is efficient.  Index for bridged NSDictionary
    // is not, because neither NSEnumerator nor fast enumeration support moving
    // backwards.  Even if they did, there is another issue: NSEnumerator does
    // not support NSCopying, and fast enumeration does not document that it is
    // safe to copy the state.  So, we cannot implement Index that is a value
    // type for bridged NSDictionary in terms of Cocoa enumeration facilities.

    @frozen
    @usableFromInline
    internal enum _Variant {
      case native(_HashTable.Index)
#if _runtime(_ObjC)
      case cocoa(__CocoaDictionary.Index)
#endif
    }

    @usableFromInline
    internal var _variant: _Variant

    @inlinable
    @inline(__always)
    internal init(_variant: __owned _Variant) {
      self._variant = _variant
    }

    @inlinable
    @inline(__always)
    internal init(_native index: _HashTable.Index) {
      self.init(_variant: .native(index))
    }

#if _runtime(_ObjC)
    @inlinable
    @inline(__always)
    internal init(_cocoa index: __owned __CocoaDictionary.Index) {
      self.init(_variant: .cocoa(index))
    }
#endif
  }
}

可以看到Dictionary 的Index 实现还是比较复杂化的,从注释中可以了解到Dictionary 有两个下标方法,我们通过索引下标访问时返回的是一个非可选值,通过键下标访问时返回的是一个可选值。这是因为通常我们使用索引下标时都是从集合获得的比如indices 属性,无效的索引下标被认为是程序员的错误。然而使用键作为下标访问时,我们并不清楚键是否有对应的值。所以返回的是可选型。

extension Dictionary: Collection {
  	// public typealias Element = (key: Key, value: Value)
	public subscript(position: Index) -> Element
}

extension Dictionary {
  	public subscript(key: Key) -> Value?
}

注意通过索引下标访问获得的类型是Element,它是一个键值对public typealias Element = (key: Key, value: Value)。最后再看下Dictionary 的迭代器。

extension Dictionary.Iterator: IteratorProtocol {
  public mutating func next() -> (key: Key, value: Value)? 
}

子序列

SubSequence 表示集合中一个连续的子区间,我们可以看到下面有很多操作都是返回集合的SubSequence。

extension Collection {
  func dropFirst(_ k: Int = 1) -> SubSequence
  
  func dropLast(_ k: Int = 1) -> SubSequence
  
  func drop(
    while predicate: (Element) throws -> Bool
  ) rethrows -> SubSequence
  
  func prefix(_ maxLength: Int) -> SubSequence
  
  func prefix(
    while predicate: (Element) throws -> Bool
  ) rethrows -> SubSequence
  
  func suffix(_ maxLength: Int) -> SubSequence
  
  func prefix(upTo end: Index) -> SubSequence
  
  func suffix(from start: Index) -> SubSequence
  
  func prefix(through position: Index) -> SubSequenc
  
  func split(
    maxSplits: Int = Int.max,
    omittingEmptySubsequences: Bool = true,
    whereSeparator isSeparator: (Element) throws -> Bool
  ) rethrows -> [SubSequence] 
}

而这些方法的实现都是调用public subscript(bounds: Range<Index>) -> Slice<Self>,相比于直接返回一个包含子序列中所有元素的新集合的好处是,不会造成额外的内存分配。子序列与原集合共享内部存储。但是当原始序列占用内存较大时,为了避免子序列长时间吧原始序列保持在内存中,我们可以使用子序列创建一个新的集合。例如:String(substring)Array(arraySlice)

延迟序列

延迟意味着只有在真正需要的时候才计算出来。这里我们主要看下lazy.filter 和lazy.map 是怎么实现的。

  • lazy:lazy 属性会把原集合包装成LazyCollection 类型,LazyCollection 遵守LazyCollectionProtocol 协议。

    extension Sequence {
      public var lazy: LazySequence<Self> {
        return LazySequence(_base: self)
      }
    }
    
    public typealias LazyCollection<T: Collection> = LazySequence<T>
    
    extension LazyCollection: LazyCollectionProtocol { }
    
  • filter:lazy 调用filter 方法时,原集合被包装成了LazyFilterSequence 类型。注意这里的下标方法返回的还是原集合的下标方法。

    extension LazySequenceProtocol {
      public func filter(
        _ isIncluded: @escaping (Elements.Element) -> Bool
      ) -> LazyFilterSequence<Self.Elements> {
        return LazyFilterSequence(_base: self.elements, isIncluded)
      }
    }
    
    public struct LazyFilterSequence<Base: Sequence> {
      internal var _base: Base
      internal let _predicate: (Base.Element) -> Bool
    
      public // @testable
      init(_base base: Base, _ isIncluded: @escaping (Base.Element) -> Bool) {
        self._base = base
        self._predicate = isIncluded
      }
    }
    
    extension LazyFilterSequence {
      public struct Iterator {
        public var base: Base.Iterator { return _base }
        internal var _base: Base.Iterator
        internal let _predicate: (Base.Element) -> Bool
    
        internal init(_base: Base.Iterator, _ isIncluded: @escaping (Base.Element) -> Bool) {
          self._base = _base
          self._predicate = isIncluded
        }
      }
    }
    
    extension LazyFilterSequence.Iterator: IteratorProtocol, Sequence {
      public typealias Element = Base.Element
    
      public mutating func next() -> Element? {
        while let n = _base.next() {
          if _predicate(n) {
            return n
          }
        }
        return nil
      }
    }
    
    extension LazyFilterSequence: Sequence {
      public __consuming func makeIterator() -> Iterator {
        return Iterator(_base: _base.makeIterator(), _predicate)
      }
    }
    
    extension LazyFilterCollection: Collection {
    
      public var startIndex: Index {
        var index = _base.startIndex
        while index != _base.endIndex && !_predicate(_base[index]) {
          _base.formIndex(after: &index)
        }
        return index
      }
    
      public var endIndex: Index {
        return _base.endIndex
      }
    
      public subscript(position: Index) -> Element {
        return _base[position]
      }
    }
    
  • map: lazy 调用map 方法时,原集合被包装成了LazyMapSequence 类型。实现原理和 LazyFilterSequence 很相似,主要差别还是在 startIndex、endIndex、Iterator、subscript(position: Base.Index) -> Element 上,大家可以对比一下。

    public struct LazyMapSequence<Base: Sequence, Element> {
      public typealias Elements = LazyMapSequence
      internal var _base: Base
      internal let _transform: (Base.Element) -> Element
    
      internal init(_base: Base, transform: @escaping (Base.Element) -> Element) {
        self._base = _base
        self._transform = transform
      }
    }
    
    extension LazyMapSequence {
      public struct Iterator {
        internal var _base: Base.Iterator
        internal let _transform: (Base.Element) -> Element
    
        public var base: Base.Iterator { return _base }
    
        internal init(
          _base: Base.Iterator, 
          _transform: @escaping (Base.Element) -> Element
        ) {
          self._base = _base
          self._transform = _transform
        }
      }
    }
    
    extension LazyMapSequence.Iterator: IteratorProtocol, Sequence {
      public mutating func next() -> Element? {
        return _base.next().map(_transform)
      }
    }
    
    extension LazyMapSequence: LazySequenceProtocol {
      public func makeIterator() -> Iterator {
        return Iterator(_base: _base.makeIterator(), _transform: _transform)
      }
    }
    
    extension LazyMapCollection: Collection {
      public var startIndex: Base.Index { return _base.startIndex }
      public var endIndex: Base.Index { return _base.endIndex }
    
      public subscript(position: Base.Index) -> Element {
        return _transform(_base[position])
      }
    }
    

补充

reversed 并不会逆序原集合的元素。而是持有原集合,并逆序了原集合的索引遍历方法。

extension BidirectionalCollection {
  public func reversed() -> ReversedCollection<Self> {
    return ReversedCollection(_base: self)
  }
}

extension ReversedCollection.Iterator: IteratorProtocol, Sequence {
  public typealias Element = Base.Element
 
  public mutating func next() -> Element? {
    guard _fastPath(_position != _base.startIndex) else { return nil }
    _base.formIndex(before: &_position)
    return _base[_position]
  }
}

引用

ObjC 中国 - Swift 进阶