字典树(前缀树)Tries的C++实现

823 阅读7分钟

lexicographic search trees: tries

// 我的字典树,根据leetcode 720. 词典中最长的单词建立
#include<cstring>
template<int Maxnum_children>
class Index
{
public:
    int operator[](const char vchar)
    {
        return vchar % Maxnum_children;
    }
};

template<int Maxnum_children>
struct Trie_node
{
    int num_children; //子节点的个数
    int freq; // 重复单词的词频
    Trie_node* children[Maxnum_children]; //指向子节点的指针
    Trie_node() : freq(0), num_children(0)
    {
        //for (int i = 0; i < Maxnum_children; ++i) children[i] = NULL;
        memset(children, 0, sizeof(children));
    }
    ~Trie_node()
    {
        for (int i = 0; i < Maxnum_children; ++i)
        {
            delete children[i];
            children[i] = NULL; // delete之后再等于NULL??比较这两行代码
        }
    }
};

template<int Maxnum_children, class Alphabetic_order>
class Trie {
public: // Add method prototypes here.
    typedef Trie_node<Maxnum_children> Node;
    typedef Trie_node<Maxnum_children>* pNode;

    //Trie() :root(new Trie_node<Maxnum_children>()),index(Alphabetic_order()) {}
    Trie() :root(new Trie_node<Maxnum_children>()) {}

    pNode get_root() const
    {
        return root;
    }

    template <typename Iterator>
    void insert(Iterator beg, Iterator end);
    void insert(const char* new_entry);

    template <typename Iterator>
    int get_freq(Iterator beg, Iterator end); // 不能是const函数??
    int get_freq(const char* target);

    template <typename Iterator>
    bool find(Iterator beg, Iterator end);
    bool find(const char* target);

    template <typename Iterator>
    bool erase(Iterator beg, Iterator end);
    bool erase(const char* target);
    template <typename Iterator>
    bool erase_node(Iterator begin, Iterator end, pNode to_delete);


private: // data members
    pNode root;
    Alphabetic_order index; // 字母对应子节点的下标值
};

template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
void Trie<Maxnum_children, Alphabetic_order>::insert(Iterator beg, Iterator end)
{
    pNode current = root;
    while (beg != end)
    {
        if (!current->children[index[*beg]]) // 若不存在当前字符的子节点
        {
            current->children[index[*beg]] = new Node;
            ++current->num_children; //当前子节点个数增加1
            /* 优先级
            .、->从左至右,优先级2
            a++从左至右,优先级2
            ++a从右至左,优先级3
            即--current->freq和current->freq--和(current->freq)--一样
            */
        }
        current = current->children[index[*beg]];
        beg++;
    }
    ++current->freq;
}

/* 插入字符串,针对C风格字符串的重载版本 */
template<int Maxnum_children, class Alphabetic_order>
void Trie<Maxnum_children, Alphabetic_order>::insert(const char* new_entry)
{
    return insert(new_entry, new_entry + strlen(new_entry));
}


template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
int Trie<Maxnum_children, Alphabetic_order>::get_freq(Iterator beg, Iterator end)// const:不能是const函数,否则尝试匹配参数列表“(const Alphabetic_order, const char)”时失败
{
    pNode current = root;
    while (beg != end)
    {
        if (!current->children[index[*beg]]) // 若不存在当前字符的子节点
        {
            cerr << "No such string to get frequency.\n";
            return 0; // return false for find()
        }
        current = current->children[index[*beg]];
        beg++;
    }
    return current->freq; // 不能直接return true因为可能存在节点,只是被路过而已
}

template<int Maxnum_children, class Alphabetic_order>
int Trie<Maxnum_children, Alphabetic_order>::get_freq(const char* target)
{
    return get_freq(target, target + strlen(target));
}


template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
bool Trie<Maxnum_children, Alphabetic_order>::find(Iterator beg, Iterator end)
{
    return get_freq(beg,end) > 0; 
}

template<int Maxnum_children, class Alphabetic_order>
bool Trie<Maxnum_children, Alphabetic_order>::find(const char* target)
{
    return find(target, target + strlen(target));
}

template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
bool Trie<Maxnum_children, Alphabetic_order>::erase(Iterator beg, Iterator end)
{
    return erase_node(beg, end, root);
}

template<int Maxnum_children, class Alphabetic_order>
bool Trie<Maxnum_children, Alphabetic_order>::erase(const char* target)
{
    return erase(target, target + strlen(target));
}

template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
bool Trie<Maxnum_children, Alphabetic_order>::erase_node(Iterator beg, Iterator end, pNode to_delete)
{
    if (beg == end) //当到达字符串结尾,递归的终止条件
    {
        if (to_delete->freq > 0)
        {
            --to_delete->freq; // 词频减一
            return to_delete->freq == 0 && to_delete->num_children == 0; //不存在子节点才能删除
        }
        else
        {
            cerr << "No such node to erase.\n";
            return false;
        }
    }

    if (!to_delete->children[index[*beg]])
    {
        cerr << "No such node to erase.\n";
        return false;
    }
    // 判断是否删除未到达字符串结尾的经过的节点
    else if (erase_node((++beg)--, end, to_delete->children[index[*beg]]))
        // 删除除了考虑是否删除终点节点,还要考虑经过的路线中最后一段freq为0的路径是否可以删除
    {
        delete to_delete->children[index[*beg]];
        --to_delete->num_children;
        //若当前节点为树叶,那么通知其父节点删除它
        return to_delete->freq == 0 && to_delete->num_children == 0;
    }
    return false;
}

int main()
{  
    const int maxnum_children = 26;
    Trie_node<maxnum_children>* trie_node = new Trie_node<maxnum_children>();
    Index<maxnum_children> idx;
    cout << 'a' % maxnum_children << ", "<<idx['a']<<endl;
    cout << 'b' % maxnum_children << ", " << idx['b'] << endl;
    cout << 'B' % maxnum_children << ", " << idx['B'] << endl;
    Trie<maxnum_children, Index<maxnum_children>> trie;
    trie.get_root();
    trie.insert("free");
    trie.insert("tree");
    trie.insert("tree");
    trie.erase("tree");
    trie.erase("tea");
    trie.insert("tea");
    if (trie.find("tea"))cout << "True\n";
    else cout << "False\n";
    trie.insert("team");
    trie.insert("teammate");
    trie.insert("teammate");
    trie.insert("teamwork");
    cout << trie.get_freq("teammate") << endl;
    
    
    return 0;
}

关于const函数

// leetcode 208
const int Maxnum_children=26;
class Index
{
public:
    int operator[](const char vchar) const // 加上const之后,Trie的search可以加上const
    {
        return vchar % Maxnum_children;
    }
};

struct Trie_node
{
    int num_children; //子节点的个数
    int freq; // 重复单词的词频
    Trie_node* children[Maxnum_children]; //指向子节点的指针
    Trie_node() : freq(0), num_children(0)
    {
        memset(children, 0, sizeof(children));
    }
    ~Trie_node()
    {
        for (int i = 0; i < Maxnum_children; ++i)
        {
            delete children[i];
            children[i] = NULL; 
        }
    }
};

class Trie {
public:
    /** Initialize your data structure here. */
    Trie() {
        root=new Trie_node();
    }
    
    /** Inserts a word into the trie. */
    void insert(string word) {
        auto beg=word.begin();
        auto end=word.end();
        auto current=root;
        while(beg!=end)
        {
            if (!current->children[index[*beg]])
            {
                current->children[index[*beg]]=new Trie_node();
                ++current->num_children;
            }
            current = current->children[index[*beg]];
            beg++;
        }
        ++current->freq;
    }
    
    /** Returns if the word is in the trie. */
    bool search(string word) const{
        auto beg=word.begin();
        auto end=word.end();
        auto current=root;
        while(beg!=end)
        {
            if (!current->children[index[*beg]])
            {
                return false;
            }
            current = current->children[index[*beg]];
            beg++;
        }
        return current->freq>0;
    }
    
    /** Returns if there is any word in the trie that starts with the given prefix. */
    bool startsWith(string prefix) {
        auto beg=prefix.begin();
        auto end=prefix.end();
        auto current=root;
        while(beg!=end)
        {
            if (!current->children[index[*beg]])
            {
                return false;
            }
            current = current->children[index[*beg]];
            beg++;
        }
        return true;
    }
private:
    Trie_node* root;
    Index index;
};

/**
 * Your Trie object will be instantiated and called as such:
 * Trie* obj = new Trie();
 * obj->insert(word);
 * bool param_2 = obj->search(word);
 * bool param_3 = obj->startsWith(prefix);
 */

在Index类的[]重载中设为const,从而在Trie类中的find函数和get_freq函数可以为const函数

#include<cstring>
template<int Maxnum_children>
class Index
{
public:
    int operator[](const char vchar) const
    {
        return vchar % Maxnum_children;
    }
};

template<int Maxnum_children>
struct Trie_node
{
    int num_children; //子节点的个数
    int freq; // 重复单词的词频
    Trie_node* children[Maxnum_children]; //指向子节点的指针
    Trie_node() : freq(0), num_children(0)
    {
        //for (int i = 0; i < Maxnum_children; ++i) children[i] = NULL;
        memset(children, 0, sizeof(children));
    }
    ~Trie_node()
    {
        for (int i = 0; i < Maxnum_children; ++i)
        {
            delete children[i];
            children[i] = NULL; // delete之后再等于NULL??比较这两行代码
        }
    }
};

template<int Maxnum_children, class Alphabetic_order>
class Trie {
public: // Add method prototypes here.
    typedef Trie_node<Maxnum_children> Node;
    typedef Trie_node<Maxnum_children>* pNode;

    //Trie() :root(new Trie_node<Maxnum_children>()),index(Alphabetic_order()) {}
    Trie() :root(new Trie_node<Maxnum_children>()) {}

    pNode get_root() const
    {
        return root;
    }

    template <typename Iterator>
    void insert(Iterator beg, Iterator end);
    void insert(const char* new_entry);

    template <typename Iterator>
    int get_freq(Iterator beg, Iterator end) const;
    int get_freq(const char* target) const;

    template <typename Iterator>
    bool find(Iterator beg, Iterator end) const;
    bool find(const char* target) const;

    template <typename Iterator>
    bool erase(Iterator beg, Iterator end);
    bool erase(const char* target);
    template <typename Iterator>
    bool erase_node(Iterator begin, Iterator end, pNode to_delete);


private: // data members
    pNode root;
    Alphabetic_order index; // 字母对应子节点的下标值
};

template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
void Trie<Maxnum_children, Alphabetic_order>::insert(Iterator beg, Iterator end)
{
    pNode current = root;
    while (beg != end)
    {
        if (!current->children[index[*beg]]) // 若不存在当前字符的子节点
        {
            current->children[index[*beg]] = new Node;
            ++current->num_children; //当前子节点个数增加1
            /* 优先级
            .、->从左至右,优先级2
            a++从左至右,优先级2
            ++a从右至左,优先级3
            即--current->freq和current->freq--和(current->freq)--一样
            */
        }
        current = current->children[index[*beg]];
        beg++;
    }
    ++current->freq;
}

/* 插入字符串,针对C风格字符串的重载版本 */
template<int Maxnum_children, class Alphabetic_order>
void Trie<Maxnum_children, Alphabetic_order>::insert(const char* new_entry)
{
    return insert(new_entry, new_entry + strlen(new_entry));
}


template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
int Trie<Maxnum_children, Alphabetic_order>::get_freq(Iterator beg, Iterator end) const
{
    pNode current = root;
    while (beg != end)
    {
        if (!current->children[index[*beg]]) // 若不存在当前字符的子节点
        {
            cerr << "No such string to get frequency.\n";
            return 0; // return false for find()
        }
        current = current->children[index[*beg]];
        beg++;
    }
    return current->freq; // 不能直接return true因为可能存在节点,只是被路过而已
}

template<int Maxnum_children, class Alphabetic_order>
int Trie<Maxnum_children, Alphabetic_order>::get_freq(const char* target) const
{
    return get_freq(target, target + strlen(target));
}


template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
bool Trie<Maxnum_children, Alphabetic_order>::find(Iterator beg, Iterator end) const
{
    return get_freq(beg,end) > 0; 
}

template<int Maxnum_children, class Alphabetic_order>
bool Trie<Maxnum_children, Alphabetic_order>::find(const char* target) const
{
    return find(target, target + strlen(target));
}

template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
bool Trie<Maxnum_children, Alphabetic_order>::erase(Iterator beg, Iterator end)
{
    return erase_node(beg, end, root);
}

template<int Maxnum_children, class Alphabetic_order>
bool Trie<Maxnum_children, Alphabetic_order>::erase(const char* target)
{
    return erase(target, target + strlen(target));
}

template<int Maxnum_children, class Alphabetic_order>
template <typename Iterator>
bool Trie<Maxnum_children, Alphabetic_order>::erase_node(Iterator beg, Iterator end, pNode to_delete)
{
    if (beg == end) //当到达字符串结尾,递归的终止条件
    {
        if (to_delete->freq > 0)
        {
            --to_delete->freq; // 词频减一
            return to_delete->freq == 0 && to_delete->num_children == 0; //不存在子节点才能删除
        }
        else
        {
            cerr << "No such node to erase.\n";
            return false;
        }
    }

    if (!to_delete->children[index[*beg]])
    {
        cerr << "No such node to erase.\n";
        return false;
    }
    // 判断是否删除未到达字符串结尾的经过的节点
    else if (erase_node((++beg)--, end, to_delete->children[index[*beg]]))
        // 删除除了考虑是否删除终点节点,还要考虑经过的路线中最后一段freq为0的路径是否可以删除
    {
        delete to_delete->children[index[*beg]];
        --to_delete->num_children;
        //若当前节点为树叶,那么通知其父节点删除它
        return to_delete->freq == 0 && to_delete->num_children == 0;
    }
    return false;
}