开启掘金成长之旅！这是我参与「掘金日新计划 · 12 月更文挑战」的第16天，点击查看活动详情

前缀树

前缀树实现

前缀树的优点：

为什么我们还需要 Trie 树呢？尽管哈希表可以在 O(1) 时间内寻找键值，却无法高效的完成以下操作：

找到具有同一前缀的全部键值。
按词典序枚举字符串的数据集

Trie 树优于哈希表的另一个理由是，随着哈希表大小增加，会出现大量的冲突，时间复杂度可能增加到 O(n)，其中 n 是插入的键的数量。与哈希表相比，Trie 树在存储多个具有相同前缀的键时可以使用较少的空间。此时 Trie 树只需要 O(m) 的时间复杂度，其中 m 为键长。而在平衡树中查找键值需要 O(mlogn) 时间复杂度。

 class Trie {
 public:
     /** Initialize your data structure here. */
     Trie():root_(new TreeNode) { }
 
     ~Trie() { destroy(root_); }
     
     /** Inserts a word into the trie. */
    
     void insert(const std::string& word) {
         if(word.empty()) return;
 
         TreeNode* curr = root_;
         for(const char& ch : word) { 
             size_t index = ch -'a';
             if(curr->branchs[index] == nullptr) 
             {
                 curr->branchs[index] = new TreeNode;
             }
             curr = curr->branchs[index];
         }
         curr->end = true; 
     }
     
     /** Returns if the word is in the trie. */
     bool search(const std::string& word) {
         if(word.empty()) return true; 
 
         TreeNode* curr = root_;
         for(const char& ch : word) { 
             int index = ch - 'a';
             if(curr->branchs[index] == nullptr) 
             {
                 return false;
             }
 
             curr = curr->branchs[index];
         }
 
         return curr->end;
     }
     
     /** Returns if there is any word in the trie that starts with the given prefix. */
     bool startsWith(const std::string& prefix) {
       if(prefix.empty()) return true;
 
       TreeNode* curr = root_;
       for(const char& ch : prefix) { 
         int index = ch - 'a';
         if(curr->branchs[index] == nullptr) 
         {
             return false;
         }
 
         curr = curr->branchs[index];
       }
 
       return true;
     }
 
 private:
     struct TreeNode { 
       bool end; // 是否是某个字符串的结尾标志
       std::array<TreeNode*, 26> branchs;
 
       TreeNode(bool end=false): end(end)
       { 
           branchs.fill(nullptr);
       }
     };
 
   void destroy(TreeNode* node) {
     if(node ==nullptr) return;
 
     for(TreeNode* entry : node->branchs)
     {
      destroy(entry); 
     } 
 
     delete node;
     node = nullptr;
   }
     
   TreeNode* root_;
 };

最大异或和子数组

 题目：给定一个数组，求子数组的最大异或和。其中，一个数组的异或和定义为数组中所有的数异或起来的结果。

常规解法

涉及子数组的题目，总体思路一样：以i结尾的子数组异或和，再将i从0 < i <= n遍历一遍，求出所有位置的子数组异或和，最大值就在其中。但是如此的时间复杂度分析：

 以i为例：
     i-1~i
     i-2~2
     ...
     0 ~ i

需要求出i个子数组的异或和，每个子数组异或和时间复杂度为O(n),因为仅仅求一个位置i的子数组异或和就高达O(n^2)的复杂度。外层再将i从0 < i <= n遍历一遍，时间复杂度O(n^3)

 int maxXor(int* arr, int length) {
     int outside_xor =0;
     int max_=0;
 
     for(int i=0; i < length; ++i) { 
         outside_xor ^= arr[i];   // 记录以 i 位置结束的 0~i的数组异或和
         max_ = std::max(max_, outside_xor);
         // 求取以i位置结尾的所有子数组异或和
         for(int j=0; j < i; ++j) { 
             int inside_xor = 0;
             for(int k=0; k < j; ++k) { 
                 inside_xor ^= arr[k];                //  0~ j-1 位置的异或和 
                 int x = outside_xor ^ inside_xor;    // j ~ i 位置的异或和，其中 0 <= j <i，这就是以i结尾所有子数组异或和
                 if(max_ < x) max_ = x;
             }
         }
     }
     return max_;
 }

如果改进，降低算法的复杂度，可以使用一个数组记录下之前的一些重复计算的值。比如这题的所有以j结尾的0~j的子数组，都在重复计算，那么就是可以用一个数组来存储，时间复杂度可以降低为O(n^2)

 int maxXor(int* arr, int length) {
     int outside_xor =0;
     int max_=0;
     int xorsum[length]; // 以 i结束的 0~i的所有异或和
 
     for(int i=0; i < length; ++i) { 
         outside_xor ^= arr[i]; 
         max_ = std::max(max_, outside_xor);
         xorsum[i] = outside_xor;   // 存储起来
         for(int j=0; j < i; ++j) { 
             int x =  xorsum[j] ^ outside_xor; // 这里以 0~j区间的异或和，之前已经计算好，直接使用
             if(max_ < x) max_ = x;
         }
     }
     return max_;
 }

前缀树

将每个数字变成二进制用一个二叉树来表示，所有的二叉树都是左子节点是0，右子节点是1。整个树根节点不表示，只是串联左右子树。

 0 ~ 1的异或和
 0 ~ 2的异或和
 ...
 0 ~ i-1的异或和

在求0~i区间最大异或和子数组时，前面得各个位置信息都已经具备了。因此可以直接使用了：将 0 ~ i 前面的异或和生成前缀树，在求i位置arr[i]的时候，如下限制：

第一位是符号位，应该该位置是0，表示正数
除了符号位外，在前缀树中，能选择1就选择1。如果没有1，只能选0。

满足以上两个，这样才能保持最大。说的可能不太好理解。

 比如：
  0 ~ 0的异或和是 0001 
  0 ~ 1的异或和是 0101
  0 ~ 2的异或和是 1011
  0 ~ 3的异或和是 1111
  求以位置3结尾的最大异或和子数组 
  
                         root 
                       /     \
                      0       1
                     / \     /
                    0   1   0
                   /    /    \
                  0    0      1
                  \     \      \
                   1     1      1

以0~3位置的异或和1111来确定以位置3结尾的最大异或和子数组。

第一位符号位是1：期望的是0，上面的前缀树中第二层存在 root - 1
第二位是1，如果要让异或和最大，这位异或后的结果也是1，那么就期待0。前缀树中也存在 root - 1 - 0
同理，第三位也期望是1，需要与0异或，但是此时不存在0，只能选择1
同理，第四位也只能与1异或

最终的异或1011 ^ 1111 = 0100，即使4，也就是最大异或子数组区间：[2,3]。代码及其解释如下：

 public class Solution { 
 
     public static class Node {
         public Node[] nexts = new Node[2]; // 两个节点，一个指向0，一个指向1，
     }
 
     public static class NumTrie {
         public Node head = new Node();
 
         // 用于生成前缀树
         public void add(int num) {
             Node cur = head;
             for (int move = 31; move >= 0; move--) {
                 // 从高位到低位 依次取出每一位的二进制数
                 int path = ((num >> move) & 1);
                 // cur.nexts[path] = cur.nexts[path] == null ? new Node() : cur.nexts[path];
                 if(cur.nexts[path] == null)
                     cur.nexts[path] = new Node();
                     
                 cur = cur.nexts[path];
             }
         }
 
         // 根据前缀树查询异或最大值
         public int maxXor(int num) {
             Node cur = head;
             int res = 0;
             for (int move = 31; move >= 0; move--) {
                 // 取出每位二进制数
                 int path = (num >> move) & 1;
                 // 符号位，需要和符号位保持一致，才能异或为0。非符号位期望是1 
                 // 非符号位置，需要异或的结果是1，因此 path ^1，最低为位肯定是1
                 int best = move == 31 ? path : (path ^ 1);
                 // 这里没有创建结点，就是查询，选择道路
                 best = cur.nexts[best] != null ? best : (best ^ 1); // 实际的值
                 res |= (path ^ best) << move; 
                 cur = cur.nexts[best];
             }
             return res;
         }
 
     }
 
     public static int maxXorSubarray(int[] arr) {
         if (arr == null || arr.length == 0) {
             return 0;
         }
         int max = Integer.MIN_VALUE;
         int eor = 0;
         NumTrie numTrie = new NumTrie();
         numTrie.add(0);
         for (int i = 0; i < arr.length; i++) {
             eor ^= arr[i]; //  0 ~ i 的异或和
             max = Math.max(max, numTrie.maxXor(eor)); // 求取
             numTrie.add(eor);
         }
         return max;
     }
 }

显而易见，时间复杂度是O(N)。