日常开发中使用最多的应该就是集合类型了，本文通过查看一些源码来进一步理解下集合类型协议。

协议继承关系

Sequence 提供迭代能力。允许你创建一个迭代器，但是不能保障迭代器只能单次遍历还是支持多次遍历。

public protocol Sequence<Element> {

    associatedtype Element where Self.Element == Self.Iterator.Element

    associatedtype Iterator : IteratorProtocol

    func makeIterator() -> Self.Iterator

        //...
}

IteratorProtocol 每次提供序列中的一个值。IteratorProtocol 协议与Sequence 协议是紧密相关的，当你使用for-in 遍历集合类型时，内部使用的是Sequence/Collection 的iterator。

public protocol IteratorProtocol<Element> {

    associatedtype Element

    ///可以看到返回值是一个可选型，如果有下一个返回下一个，否则返回nil。
    mutating func next() -> Self.Element?
}

Collection 继承于Sequence，允许多次遍历。可以通过索引访问其中的元素，还提供了集合切片的能力。

public protocol Collection<Element> : Sequence {

    subscript(position: Self.Index) -> Self.Element { get }

    subscript(bounds: Range<Self.Index>) -> Self.SubSequence { get }
}

MutableCollection 继承于Collection，提供通过下标读写的能力。但是不能改变集合本身的长度，比如添加、删除操作是不允许的。

public protocol MutableCollection<Element> : Collection where Self.SubSequence : MutableCollection {

    // 重写了Collection的方法
    override subscript(position: Self.Index) -> Self.Element { get set }

    override subscript(bounds: Range<Self.Index>) -> Self.SubSequence { get set }
}

RangeReplaceableCollection 提供使用一个集合替换当前集合任意子区间的能力。你需要实现init()和replaceSubrange(:with:) 方法，其他的方法都是通过调用replaceSubrange(:with:) 实现的。你也可以通过重写协议中的方法进行自定义实现。

public protocol RangeReplaceableCollection<Element> : Collection where Self.SubSequence : RangeReplaceableCollection {

    init()

    mutating func replaceSubrange<C>(_ subrange: Range<Self.Index>, with newElements: C) where C : Collection, Self.Element == C.Element

    mutating func append(_ newElement: Self.Element)

    mutating func insert(_ newElement: Self.Element, at i: Self.Index)

    mutating func remove(at i: Self.Index) -> Self.Element

    //...
}

BidirectionalCollection 提供逆向遍历的能力。

public protocol BidirectionalCollection<Element> : Collection where Self.Indices : BidirectionalCollection, Self.SubSequence : BidirectionalCollection {

    func index(before i: Self.Index) -> Self.Index

    //...
}

RandomAccessCollection 提供了高效的索引计算能力。要求把索引移动任意距离和计算索引间距离的时间复杂度为O(1)。

public protocol RandomAccessCollection<Element> : BidirectionalCollection where Self.Indices : RandomAccessCollection, Self.SubSequence : RandomAccessCollection {

    func index(_ i: Self.Index, offsetBy distance: Int) -> Self.Index

    func distance(from start: Self.Index, to end: Self.Index) -> Int

    //...
}

LazySequenceProtocol 只有在访问序列中的元素时才开始计算这个元素的具体值。可以避免不必要的内存分配和计算。

public protocol LazySequenceProtocol : Sequence {

    associatedtype Elements : Sequence = Self where Self.Element == Self.Elements.Element

    // 你不需要实现这个计算属性，在扩展中有默认实现，只是返回了self。
    // 在使用.lazy 时，可以看到lazy属性是把elements包装成了LazySequence
    var elements: Self.Elements { get }
}

LazyCollectionProtocol 和LazySequenceProtocol 类似。

public protocol LazyCollectionProtocol : Collection, LazySequenceProtocol where Self.Elements : Collection {
}

extension LazyCollectionProtocol {

    @inlinable public var lazy: LazyCollection<Self.Elements> { get }
}

extension LazyCollectionProtocol where Self.Elements : LazyCollectionProtocol {

    @inlinable public var lazy: Self.Elements { get }
}

//仔细看这里
public typealias LazyCollection<T> = LazySequence<T> where T : Collection

序列

Sequence 协议要求非常简单，返回一个迭代器。func makeIterator() -> Self.Iterator

迭代器

IteratorProtocol 协议的要求也非常简单，返回序列中的下一个元素，序列被耗尽时返回nil。associatedtype Element where Self.Element == Self.Iterator.Element 保证了序列和迭代器返回的类型是一致的。
我们平时不需要关心迭代器或者直接使用迭代器，通常我们遍历序列使用的是for-in，本质上来说for-in 是下面代码的简写。
```
var iterator = someSequence.makeIterator()
while let element = iterator.next() {
        doSomething(with: element)
}
```

对于大部分迭代器来说都具有值语义。但是也有例外AnyIterator，它将某个复杂类型的迭代器进行包装实现类型擦除。可以看到_AnyIteratorBoxBase 是一个Class，所以AnyIterator 具有引用语义。

public struct AnyIterator<Element> {

  internal let _box: _AnyIteratorBoxBase<Element>

  public init<I: IteratorProtocol>(_ base: I) where I.Element == Element {
    self._box = _IteratorBox(base)
 }

 //...
}

internal class _AnyIteratorBoxBase<Element>: IteratorProtocol { }

集合类型

集合是有限的且可以被多次遍历，集合中的元素可以通过下标索引的方式访问。下标索引通常是整数，但是也有例外比如字符串的下标索引。实现Collection 协议时，应当选取适合的索引类型来表达元素在集合中的位置。

自定义集合类型

我们先来实现一个简单的数据结构-队列，然后让这个队列实现Collection 协议。

struct Queue<E> {
    private var left: [E] = []
    private var right: [E] = []
    
    mutating func enqueue(_ element: E) {
        right.append(element)
    }
    
    mutating func dequeue() -> E? {
        if left.isEmpty {
            left = right.reversed()
        }
        return left.popLast()
    }
}

通过查看Collection 协议，我们可以看到协议中有很多的关联类型、属性、方法，那我们需要把这些全部都实现吗？当然不是。我们可以继续观察，有一部分关联类型有了默认值，还有一部分在Collection 扩展中提供了默认实现。那我们应该怎么确定至少应该实现那些方法呢？查找对比默认实现然后把没有实现的实现了，太麻烦了。根据编译器提示去实现呢，确实可以但是编译器的提示有时候不是很有用。这时候我们应该寄希望于文档，Swift标准库的文档还是很有价值的，大家没事的时候可以看看。

/// Conforming to the Collection Protocol
/// =====================================
///
/// If you create a custom sequence that can provide repeated access to its
/// elements, make sure that its type conforms to the `Collection` protocol in
/// order to give a more useful and more efficient interface for sequence and
/// collection operations. To add `Collection` conformance to your type, you
/// must declare at least the following requirements:
///
/// - The `startIndex` and `endIndex` properties
/// - A subscript that provides at least read-only access to your type's
///   elements
/// - The `index(after:)` method for advancing an index into your collection
///
public protocol Collection<Element> : Sequence {}

实现下这些方法、属性就OK了，关联类型Swift进行类型推断。实现这几行简单的代码，我们的Queue 就已经获得了40多个方法和属性了。

extension Queue: Collection {
    public var startIndex: Int { 0 }
    public var endIndex: Int { left.count + right.count }
    
    public func index(after i: Int) -> Int { i + 1 }
    public subscript(position: Int) -> E {
        position < left.count ? left[left.count - position - 1] : right[position - left.count]
    }
}

数组字面量

同时我们也希望像通过字面量创建数组一样来创建Queue，当然是让Queue实现字面量协议啦。我们先看下字面量协议，也是非常的简单。

/// Conforming to ExpressibleByArrayLiteral
/// =======================================
///
/// Add the capability to be initialized with an array literal to your own
/// custom types by declaring an `init(arrayLiteral:)` initializer. 
/// ...
public protocol ExpressibleByArrayLiteral {

    associatedtype ArrayLiteralElement

    init(arrayLiteral elements: Self.ArrayLiteralElement...)
}

怎么快速找到如何实现一个协议的方法相信大家已经很清楚了，我们来实现一下ExpressibleByArrayLiteral 协议。

extension Queue: ExpressibleByArrayLiteral {
    init(arrayLiteral elements: E...) {
        self.init(left: elements.reversed(), right: [])
    }
}

let q: Queue = [1, 2, 3] //Build Succeeded

注意这里的[1, 2, 3]并不是一个数组，它只是一个数组字面量，我们可以用它来创建实现了ExpressibleByArrayLiteral 协议的类型。

关联类型

Collection 为除了Index 和Element 之外的关联类型都提供了默认值。

Iterator 从Sequence 继承来的关联类型，Collection 中默认迭代器类型是IndexingIterator<Self>，它对集合进行包装，使用集合的下标索引进行迭代。

extension Collection where Iterator == IndexingIterator<Self> {
  public func makeIterator() -> IndexingIterator<Self> {
    return IndexingIterator(_elements: self)
  }
}

public struct IndexingIterator<Elements: Collection> {
  internal let _elements: Elements
  internal var _position: Elements.Index

  public /// @testable
  init(_elements: Elements) {
    self._elements = _elements
    self._position = _elements.startIndex
  }
  //...
}

extension IndexingIterator: IteratorProtocol, Sequence {
  public typealias Element = Elements.Element
  public typealias Iterator = IndexingIterator<Elements>
  public typealias SubSequence = AnySequence<Element>

  public mutating func next() -> Elements.Element? {
    if _position == _elements.endIndex { return nil }
    let element = _elements[_position]
    _elements.formIndex(after: &_position)
    return element
  }
}

SubSequence 表示集合中一段连续内容切片的类型。默认实现的类型是Slice<Self>，也是对集合进行包装。

extension Collection where SubSequence == Slice<Self> {
  public subscript(bounds: Range<Index>) -> Slice<Self> {
    return Slice(base: self, bounds: bounds)
  }
}

public struct Slice<Base: Collection> {
  public var _startIndex: Base.Index
  public var _endIndex: Base.Index

  internal var _base: Base

  public init(base: Base, bounds: Range<Base.Index>) {
    self._base = base
    self._startIndex = bounds.lowerBound
    self._endIndex = bounds.upperBound
  }

  //...
}

Indices 合集的indices 属性的类型，它是集合中所有有效的索引，并且按照升序排列。它也是对集合类型的包装。

public struct DefaultIndices<Elements: Collection> {
  internal var _elements: Elements
  internal var _startIndex: Elements.Index
  internal var _endIndex: Elements.Index

  internal init(
    _elements: Elements,
    startIndex: Elements.Index,
    endIndex: Elements.Index
  ) {
    self._elements = _elements
    self._startIndex = startIndex
    self._endIndex = endIndex
  }
}

索引

所以endIndex 不是一个有效的索引。

目前我们使用的索引大多数是整数，接下来我们来看下Dictionary 的索引。

索引表示了集合中的位置，每个集合都有两个特殊的索引值：

startIndex：集合中第一个元素的位置

endIndex：集合中最后一个元素的下一个位置

extension Dictionary {
  /// The position of a key-value pair in a dictionary.
  ///
  /// Dictionary has two subscripting interfaces:
  ///
  /// 1. Subscripting with a key, yielding an optional value:
  ///
  ///        v = d[k]!
  ///
  /// 2. Subscripting with an index, yielding a key-value pair:
  ///
  ///        (k, v) = d[i]
  @frozen
  public struct Index {
    // Index for native dictionary is efficient.  Index for bridged NSDictionary
    // is not, because neither NSEnumerator nor fast enumeration support moving
    // backwards.  Even if they did, there is another issue: NSEnumerator does
    // not support NSCopying, and fast enumeration does not document that it is
    // safe to copy the state.  So, we cannot implement Index that is a value
    // type for bridged NSDictionary in terms of Cocoa enumeration facilities.

    @frozen
    @usableFromInline
    internal enum _Variant {
      case native(_HashTable.Index)
#if _runtime(_ObjC)
      case cocoa(__CocoaDictionary.Index)
#endif
    }

    @usableFromInline
    internal var _variant: _Variant

    @inlinable
    @inline(__always)
    internal init(_variant: __owned _Variant) {
      self._variant = _variant
    }

    @inlinable
    @inline(__always)
    internal init(_native index: _HashTable.Index) {
      self.init(_variant: .native(index))
    }

#if _runtime(_ObjC)
    @inlinable
    @inline(__always)
    internal init(_cocoa index: __owned __CocoaDictionary.Index) {
      self.init(_variant: .cocoa(index))
    }
#endif
  }
}

可以看到Dictionary 的Index 实现还是比较复杂化的，从注释中可以了解到Dictionary 有两个下标方法，我们通过索引下标访问时返回的是一个非可选值，通过键下标访问时返回的是一个可选值。这是因为通常我们使用索引下标时都是从集合获得的比如indices 属性，无效的索引下标被认为是程序员的错误。然而使用键作为下标访问时，我们并不清楚键是否有对应的值。所以返回的是可选型。

extension Dictionary: Collection {
  	// public typealias Element = (key: Key, value: Value)
	public subscript(position: Index) -> Element
}

extension Dictionary {
  	public subscript(key: Key) -> Value?
}

注意通过索引下标访问获得的类型是Element，它是一个键值对public typealias Element = (key: Key, value: Value)。最后再看下Dictionary 的迭代器。

extension Dictionary.Iterator: IteratorProtocol {
  public mutating func next() -> (key: Key, value: Value)? 
}

子序列

SubSequence 表示集合中一个连续的子区间，我们可以看到下面有很多操作都是返回集合的SubSequence。

extension Collection {
  func dropFirst(_ k: Int = 1) -> SubSequence
  
  func dropLast(_ k: Int = 1) -> SubSequence
  
  func drop(
    while predicate: (Element) throws -> Bool
  ) rethrows -> SubSequence
  
  func prefix(_ maxLength: Int) -> SubSequence
  
  func prefix(
    while predicate: (Element) throws -> Bool
  ) rethrows -> SubSequence
  
  func suffix(_ maxLength: Int) -> SubSequence
  
  func prefix(upTo end: Index) -> SubSequence
  
  func suffix(from start: Index) -> SubSequence
  
  func prefix(through position: Index) -> SubSequenc
  
  func split(
    maxSplits: Int = Int.max,
    omittingEmptySubsequences: Bool = true,
    whereSeparator isSeparator: (Element) throws -> Bool
  ) rethrows -> [SubSequence] 
}

而这些方法的实现都是调用public subscript(bounds: Range<Index>) -> Slice<Self>，相比于直接返回一个包含子序列中所有元素的新集合的好处是，不会造成额外的内存分配。子序列与原集合共享内部存储。但是当原始序列占用内存较大时，为了避免子序列长时间吧原始序列保持在内存中，我们可以使用子序列创建一个新的集合。例如：String(substring) 或 Array(arraySlice)。

延迟序列

延迟意味着只有在真正需要的时候才计算出来。这里我们主要看下lazy.filter 和lazy.map 是怎么实现的。

lazy：lazy 属性会把原集合包装成LazyCollection 类型，LazyCollection 遵守LazyCollectionProtocol 协议。

extension Sequence {
  public var lazy: LazySequence<Self> {
    return LazySequence(_base: self)
  }
}

public typealias LazyCollection<T: Collection> = LazySequence<T>

extension LazyCollection: LazyCollectionProtocol { }

filter：lazy 调用filter 方法时，原集合被包装成了LazyFilterSequence 类型。注意这里的下标方法返回的还是原集合的下标方法。

extension LazySequenceProtocol {
  public func filter(
    _ isIncluded: @escaping (Elements.Element) -> Bool
  ) -> LazyFilterSequence<Self.Elements> {
    return LazyFilterSequence(_base: self.elements, isIncluded)
  }
}

public struct LazyFilterSequence<Base: Sequence> {
  internal var _base: Base
  internal let _predicate: (Base.Element) -> Bool

  public // @testable
  init(_base base: Base, _ isIncluded: @escaping (Base.Element) -> Bool) {
    self._base = base
    self._predicate = isIncluded
  }
}

extension LazyFilterSequence {
  public struct Iterator {
    public var base: Base.Iterator { return _base }
    internal var _base: Base.Iterator
    internal let _predicate: (Base.Element) -> Bool

    internal init(_base: Base.Iterator, _ isIncluded: @escaping (Base.Element) -> Bool) {
      self._base = _base
      self._predicate = isIncluded
    }
  }
}

extension LazyFilterSequence.Iterator: IteratorProtocol, Sequence {
  public typealias Element = Base.Element

  public mutating func next() -> Element? {
    while let n = _base.next() {
      if _predicate(n) {
        return n
      }
    }
    return nil
  }
}

extension LazyFilterSequence: Sequence {
  public __consuming func makeIterator() -> Iterator {
    return Iterator(_base: _base.makeIterator(), _predicate)
  }
}

extension LazyFilterCollection: Collection {

  public var startIndex: Index {
    var index = _base.startIndex
    while index != _base.endIndex && !_predicate(_base[index]) {
      _base.formIndex(after: &index)
    }
    return index
  }

  public var endIndex: Index {
    return _base.endIndex
  }

  public subscript(position: Index) -> Element {
    return _base[position]
  }
}

map: lazy 调用map 方法时，原集合被包装成了LazyMapSequence 类型。实现原理和 LazyFilterSequence 很相似，主要差别还是在 startIndex、endIndex、Iterator、subscript(position: Base.Index) -> Element 上，大家可以对比一下。

public struct LazyMapSequence<Base: Sequence, Element> {
  public typealias Elements = LazyMapSequence
  internal var _base: Base
  internal let _transform: (Base.Element) -> Element

  internal init(_base: Base, transform: @escaping (Base.Element) -> Element) {
    self._base = _base
    self._transform = transform
  }
}

extension LazyMapSequence {
  public struct Iterator {
    internal var _base: Base.Iterator
    internal let _transform: (Base.Element) -> Element

    public var base: Base.Iterator { return _base }

    internal init(
      _base: Base.Iterator, 
      _transform: @escaping (Base.Element) -> Element
    ) {
      self._base = _base
      self._transform = _transform
    }
  }
}

extension LazyMapSequence.Iterator: IteratorProtocol, Sequence {
  public mutating func next() -> Element? {
    return _base.next().map(_transform)
  }
}

extension LazyMapSequence: LazySequenceProtocol {
  public func makeIterator() -> Iterator {
    return Iterator(_base: _base.makeIterator(), _transform: _transform)
  }
}

extension LazyMapCollection: Collection {
  public var startIndex: Base.Index { return _base.startIndex }
  public var endIndex: Base.Index { return _base.endIndex }

  public subscript(position: Base.Index) -> Element {
    return _transform(_base[position])
  }
}

补充

reversed 并不会逆序原集合的元素。而是持有原集合，并逆序了原集合的索引遍历方法。

extension BidirectionalCollection {
  public func reversed() -> ReversedCollection<Self> {
    return ReversedCollection(_base: self)
  }
}

extension ReversedCollection.Iterator: IteratorProtocol, Sequence {
  public typealias Element = Base.Element
 
  public mutating func next() -> Element? {
    guard _fastPath(_position != _base.startIndex) else { return nil }
    _base.formIndex(before: &_position)
    return _base[_position]
  }
}

引用

ObjC 中国 - Swift 进阶

Swift-集合类型协议