# 《swift-algorithm-club》——算法/压缩

·  阅读 1188

### 压缩算法(Compression)

#### 行程长度压缩算法(Run-Length Encoding (RLE))

RLE可能是进行压缩的最简单方法。假设数据如下：

``````aaaaabbbcdeeeeeeef...

``````5a3b1c1d7e1f...

``````extension Data {
public func compressRLE() -> Data {
var data = Data()
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count
while ptr < end {
var count = 0
var byte = ptr.pintee
var next = byte

while next == byte && ptr < end && count < 64 {
ptr = ptr.advanced(by: 1)
next = ptr.pointee
count += 1
}

if count > 1 || byte >= 192 {
var size = 191 + UInt8(count)
data.append(&size, count: 1)
data.append(&size, count: 1)
} else {
data.append(&byte, count: 1)
}
}
}
return data
}
}

``````public func decompressRLE() -> Data {
var data = Data()
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count

while ptr < end {
// Read the next byte. This is either a single value less than 192,
// or the start of a byte run.
var byte = ptr.pointee
ptr = ptr.advanced(by: 1)

if byte < 192 {
data.append(&byte, count: 1)
} else if ptr < end {
// Read the actual data value.
var value = ptr.pointee
ptr = ptr.advanced(by: 1)

// And write it out repeatedly.
for _ in 0 ..< byte - 191 {
data.append(&value, count: 1)
}
}
}
}
return data
}

#### 霍夫曼编码(Huffman Coding)

``````so much words wow many compression

``````space: 5                   u: 1
0: 5                   h: 1
s: 4                   d: 1
m: 3                   a: 1
w: 3                   y: 1
c: 2                   p: 1
r: 2                   e: 1
n: 2                   i: 1

``````sapce: 5    010         u: 1    11001
0: 5    000         h: 1    10001
s: 4    101         d: 1    11010
m: 3    111         a: 1    11011
w: 3    0010        y: 1    01111
c: 2    0011        p: 1    11000
r: 2    1001        e: 1    01110
n: 2    0110        i: 1    10000

``````101 000 010 111 11001 0011 10001 010 0010 000 1001 11010 101
s   o   _   m   u     c    h     _   w    o   r    d     s

010 0010 000 0010 010 111 11011 0110 01111 010 0011 000 111
_   w    o   w    _   m   a     n    y     _   c    o   m

11000 1001 01110 101 101 10000 000 0110 0
p     r    e     s   s   i     o   n

`NSData`理解的最小数据是字节，但我们处理的是比特，所以我们需要在两者之间进行转换

``````Npublic class BitWriter {
public var data = NSMutableData()
var outByte: UInt8 = 0
var outCount = 0

public func writeBit(bit: Bool) {
if outCount == 8 {
data.append(&outByte, length: 1)
outCount = 0
}
outByte = (outByte << 1) | (bit ? 1 : 0)
outCount += 1
}

public func flush() {
if outCount > 0 {
if outCount < 8 {
let diff = UInt8(8 - outCount)
outByte <<= diff
}
data.append(&outByte, length: 1)
}
}
}

``````public class BitReader {
var ptr: UnsafePointer<UInt8>
var inByte: UInt8 = 0
var inCount = 8

public init(data: NSData) {
ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
}

public func readBit() -> Bool {
if inCount == 8 {
inByte = ptr.pointee    // load the next byte
inCount = 0
ptr = ptr.successor()
}
let bit = inByte & 0x80   // read the next bit
inByte <<= 1
inCount += 1
return bit == 0 ? false : true
}
}

``````class Huffman {
typealias NodeIndex = Int

struct Node {
var count = 0
var index: NodeIndex = -1
var parent: NodeIndex = -1
var left: NodeIndex = -1
var right: NodeIndex = -1
}

var tree = [Node](repeating: Node(), count: 256)

var root: NodeIndex = -1
}

``````fileprivate func countByteFrequency(inData data: NSData) {
var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
for _ in 0..<data.length {
let i = Int(ptr.pointee)
tree[i].count += 1
tree[i].index = i
ptr = ptr.successor()
}
}

``````struct Freq {
var byte: UInt8 = 0
var count = 0
}

func frequencyTable() -> [Freq] {
var a = [Freq]()
for i in 0..<256 where tree[i].count > 0 {
a.append(Freq(byte: UInt8(i), count: tree[i].count))
}
return a
}

``````fileprivate func buildTree() {
var queue = PriortyQueue<Node>(sort: { \$0.count < \$1.count })
for node in tree where node.count > 0 {
queue.enqueue(node)
}

while queue.count > 1 {
let node1 = queue.dequeue()!
let node2 = queue.dequeue()!

var parentNode = Node()
parentNode.count = node1.count + node2.count
parentNode.left = node1.index
parentNode.right = node2.index
parentNode.index = tree.count
tree.append(parentNode)

tree[node1.index].parent = parentNode.index
tree[node2.index].parent = parentNode.index

queue.enqueue(parentNode)
}

let rootNode = queue.dequeue()!
root = rootNode.index
}

``````public func compressData(data: NSData) -> NSData {
countByteFrequency(inData: data)
buildTree()

let writer = BitWriter()
var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
for _ in 0..<data.length {
let c = ptr.pointee
let i = Int(c)
traverseTree(writer: writer, nodeIndex: i, childIndex: -1)
ptr = ptr.successor()
}
writer.flush()
return writer.data
}

``````private func traverseTree(writer: BitWriter, nodeIndex h: Int, childIndex child: Int) {
if tree[h].parent != -1 {
traverseTree(writer: writer, nodeIndex: tree[h].parent, childIndex: h)
}
if child != -1 {
if child == tree[h].left {
writer.writeBit(bit: true)
} else if child == tree[h].right {
writer.writeBit(bit: false)
}
}
}

``````let s1 = "so much words wow many compression"
if let originalData = s1.dataUsingEncoding(NSUTF8StringEncoding) {
let huffman1 = Huffman()
let compressedData = huffman1.compressData(originalData)
print(compressedData.length)
}

``````fileprivate func restoreTree(fromTable frequencyTable: [Freq]) {
for freq in frequencyTable {
let i = Int(freq.byte)
tree[i].count = freq.count
tree[i].index = i
}
buildTree()
}

``````func decompressData(data: NSData, frequencyTable: [Freq]) -> NSData {
restoreTree(fromTable: frequencyTable)

let reader = BitReader(data: data)
let outData = NSMutableData()
let byteCount = tree[root].count

var i = 0
while i < byteCount {
var b = findLeafNode(reader: reader, nodeIndex: root)
outData.append(&b, length: 1)
i += 1
}
return outData
}

``````private func findLeafNode(reader reader: BitReader, nodeIndex: Int) -> UInt8 {
var h = nodeIndex
while tree[h].right != -1 {
if reader.readBit() {
h = tree[h].left
} else {
h = tree[h].right
}
}
return UInt8(h)
}

``````let frequencyTable = huffman1.frequencyTable()

let huffman2 = Huffman()
let decompressedData = huffman2.decompressData(compressedData, frequencyTable: frequencyTable: frequencyTable)

let s2 = String(data: decompressedData, encoding: NSUTF8StringEncoding)!

iOS