【专业课学习】Hash Tables

29 阅读3分钟

Hash functions

将关键字转换为自然数

哈希函数只能接收自然数以进行处理,因此需要将待处理的关键字事先通过合适的方式转换为自然数(有点像数据科学中要构造"特征向量")。

对于字符串,可以考虑将其视作一个"多项式"进行处理。例如:

"ABC" = 65 * (128 ** 0) + 66 * (128 ** 1) + 67 * (128 ** 2)

除法散列法

function hash(str) {
    let result = 0;
    for (let i = 0; i < str.length; ++i) {
        result = (str[i].charCodeAt(0) * (128 ** i) + result) % 111;
    }
    return result;
}

console.log(hash("Hello"));  // 58
console.log(hash("World"));  // 106
console.log(hash("Introduction"));  // 79
console.log(hash("to"));  // 5
console.log(hash("Algorithm"));  // 82

乘法散列法

链表法

class HashTable {
    constructor(n = 111) {
        this.size = n;
        // 由于这仅仅是一个演示代码,我们用JavaScript的可变长数组来代替链表
        this.table = Array(n).fill(0).map(_ => []);
    }
    __hash__(key) {
        let result = 0;
        let type = Object.prototype.toString.call(key).slice(8, -1);
        if (type === 'String') {
            for (let i = 0; i < key.length; ++i) {
                result = (key[i].charCodeAt(0) * (128 ** i) + result) % this.size;
            }
        }
        else if (type === 'Number') {
            result = key % this.size;
        }
        else {
            console.error("key with unsupported type!");
        }
        return result;        
    }
    get(key) {
        let results = this.table[this.__hash__(key)];
        for (let [k, v] of results) {
            if (key === k) return v;
        }
        return undefined;
    }
    set(key, value) {
        let results = this.table[this.__hash__(key)];
        for (let result of results) {
            if (result[0] === key) {
                result[1] = value;
                return;
            }
        }
        results.push([key, value]);
    }
    remove(key) {
        let results = this.table[this.__hash__(key)];
        for (let i = 0; i < results.length; ++i) {
            if (results[i][0] === key) {
                results.splice(i, 1);
                return true;
            }
        }
        return false;
    }
}

const MyTable = new HashTable();
MyTable.set("ABC", 1);
MyTable.set("World", 2);
MyTable.set(2004, 1979);
MyTable.set(126, 2024);  // 126和'ABC'会发生冲突!
console.log(MyTable.get('World'));  // 1
console.log(MyTable.get('ABC'))  // 2
console.log(MyTable.get(2004))  // 1979
console.log(MyTable.get(126))  // 2024
MyTable.remove('ABC');
console.log(MyTable.get('ABC')) // undefined
console.log(MyTable.get(126))  // 2024

开放寻址法

线性探查

二次探查

双重散列

class HashTable {
    constructor(n = 111) {
        this.size = n;
        this.table = Array(n);
        // status=0, 当前槽为空
        // status=1, 当前槽被占用
        // status=2, 当前槽中的元素被删除
        this.status = Array(n).fill(0);
    }
    getAddr(processedKey, i) {
        return (processedKey % this.size + i * (1 + processedKey % (this.size - 1))) % this.size;
    }
    processKey(rawKey) {
        let type = Object.prototype.toString.call(rawKey).slice(8, -1);
        let result = 0;
        if (type === 'String') {
            for (let j = 0; j < rawKey.length; ++j) {
                result += rawKey[j].charCodeAt(0) * (128 ** j);
            }
        }
        else if (type === 'Number') {
            result = rawKey;
        }
        else {
            console.error("key with unsupported type!");
        }
        return result;        
    }
    get(key) {
        let processedKey = this.processKey(key);
        let i = 0;
        do {
            let addr = this.getAddr(processedKey, i);
            let stat = this.status[addr];
            let elem = this.table[addr];
            if (stat === 1) {
                if (elem[0] === key) {
                    return elem[1];
                }
            }
            // 一旦碰到空槽,说明目标key一定不在表中,不用再向下查找了
            else if (stat === 0) {
                return undefined;
            }
            else if (stat === 2) {
                console.log('Some element has been deleted here, we ignore it and probe continuely.');
            }
            ++i;
        } while (i < this.size);
        return undefined;
    }
    set(key, value) {
        let processedKey = this.processKey(key);
        let i = 0;
        do {
            let addr = this.getAddr(processedKey, i);
            let elem = this.table[addr];
            let stat = this.status[addr];
            // 如果当前槽未被占用(stat==0或2),则直接设置即可
            if (stat !== 1) {
                this.status[addr] = 1;
                this.table[addr] = [key, value];
                return;
            }
            // 如果当前槽被占用(stat=1),要判断占用槽的关键字与目标关键字是否一致
            // 如果一致,直接修改并退出函数
            else if (elem[0] === key) {
                elem[1] = value;
                return;
            }
            // 如果不一致,继续向下查找
            ++i;
        } while (i < this.size);
        console.error('Hash table overflow!');
    }
    remove(key) {
        let processedKey = this.processKey(key);
        let i = 0;
        do {
            let addr = this.getAddr(processedKey, i);
            let stat = this.status[addr];
            let elem = this.table[addr];
            if (stat === 1) {
                if (elem[0] === key) {
                    this.status[addr] = 2;
                    this.table[addr] = null;
                    return true;
                }
            }
            else if (stat === 0) {
                return false;
            }
            ++i;
        } while (i < this.size);
        return false;
    }
}

const MyTable = new HashTable();
MyTable.set("ABC", 1);
MyTable.set("World", 2);
MyTable.set(2004, 1979);
MyTable.set(126, 2024);  // 126和'ABC'会发生冲突!
console.log(MyTable.get('World'));  // 2
console.log(MyTable.get('ABC'));  // 1
console.log(MyTable.get(2004));  // 1979
console.log(MyTable.get(126));  // 2024
MyTable.remove('ABC');
console.log(MyTable.get(126));