哈希、树、图、堆
哈希函数
- O(1)的时间复杂度,一般选择好的hash函数不太会有冲突
- Java hashMap源码解析 解读
- Java Set 和Map接口提供的方法 和基本使用 java Set 接口中的方法 继承类 HashSet TreeSet
bollean add(E e);
bollean addAll(Collection <? extends E> c);
void clear();
bollean contians(E e);
bollean containsAll(Collection <?> c);
bollean isEmpty();
int size();
java Map接口中的方法 继承实现类 HashMap HashTable TreeMap ConcurrentHashMap
void clear();
bollean contiansKey(Object key);
bollean contiansValue(Object value);
V get(Object key);
default V getOrDefault(Object key,V defaultValue);
bollean isEmpty();
V put (K key, V value)
V remove (Object key)
default V replace (K key, V value)
-
TreeMap等基本特性
-
java hashMap实现分析
- java8之前,通过数组指针+链表实现,java8之后做了改进,解决了循环引用问题,链表节点数大于8,会改成通过红黑树实现
- hash循环引用 导致cpu 100%问题详见 coolshell.cn/articles/96…
- 具体源码实现分析对比 可参考 (juejin.cn/post/684490…
树
*二叉搜索树 概念 中序遍历时递增的,空树也是树 二叉搜索树,也称二叉搜索树、有序二叉树(Ordered Binary Tree)、 排序二叉树(Sorted Binary Tree),是指一棵空树或者具有下列性质的 二叉树:
- 左子树上所有结点的值均小于它的根结点的值;
- 右子树上所有结点的值均大于它的根结点的值;
- 以此类推:左、右子树也分别为二叉查找树。 (这就是 重复性!)
树的经典题目
- 中序遍历
递归
//通过使用栈
public List<Integer> inorderTraversal(TreeNode root) {
List<Integer> list = new ArrayList<>();
if(root == null) return list;
Stack<TreeNode> stack = new Stack<>();
while(root != null || !stack.empty()){
while(root != null){
stack.push(root);
root = root.left;
}
root = stack.pop();
list.add(root.val);
root = root.right;
}
return list;
}
2 使用中序遍历 求二叉搜索树 第K元素
public int kthSmallest(TreeNode root, int k) {
Stack<TreeNode> stack = new Stack<>();
while(root != null || !stack.isEmpty()) {
while(root != null) {
stack.push(root);
root = root.left;
}
root = stack.pop();
if(--k == 0) break;
root = root.right;
}
return root.val;
}
- 使用中序遍历 判断一颗树是二叉搜索树
public boolean isValidBST(TreeNode root) {
if (root == null) return true;
Stack<TreeNode> stack = new Stack<>();
TreeNode pre = null;
while (root != null || !stack.isEmpty()) {
while (root != null) {
stack.push(root);
root = root.left;
}
root = stack.pop();
if(pre != null && root.val <= pre.val) return false;
pre = root;
root = root.right;
}
return true;
}
堆
- 堆的实现方式有多种 包括二叉堆 、fibonacii堆
- 二叉堆 可以用数组实现,插入和删除都是O(logN) 取元素O(1) 二叉堆的性质:
- 是一个完全二叉树,完全二叉树是指除了最底层的叶子节点,其他根节点的叶子节点都是满的
- 大顶堆的话 树的任意节点的值> 子节点的值 根据二叉堆的性质,一般用数组实现 java中可以直接用PriorityQueue
堆可以用数组实现,该数组从逻辑上讲就是一个堆结构,我们用简单的公式来描述一下堆的定义就是:
大顶堆:arr[i] >= arr[2i+1] && arr[i] >= arr[2i+2]
小顶堆:arr[i] <= arr[2i+1] && arr[i] <= arr[2i+2]
-
二叉堆的使用场景 (求去Top K问题,通过建立1个大顶堆存储小的元素,1个小顶堆存储较大元素来实现中位数)
-
二叉堆的一个实现代码
import java.util.Arrays;
import java.util.NoSuchElementException;
public class BinaryHeap {
private static final int d = 2;
private int[] heap;
private int heapSize;
/**
* This will initialize our heap with default size.
*/
public BinaryHeap(int capacity) {
heapSize = 0;
heap = new int[capacity + 1];
Arrays.fill(heap, -1);
}
public boolean isEmpty() {
return heapSize == 0;
}
public boolean isFull() {
return heapSize == heap.length;
}
private int parent(int i) {
return (i - 1) / d;
}
private int kthChild(int i, int k) {
return d * i + k;
}
/**
* Inserts new element in to heap
* Complexity: O(log N)
* As worst case scenario, we need to traverse till the root
*/
public void insert(int x) {
if (isFull()) {
throw new NoSuchElementException("Heap is full, No space to insert new element");
}
heap[heapSize] = x;
heapSize ++;
heapifyUp(heapSize - 1);
}
/**
* Deletes element at index x
* Complexity: O(log N)
*/
public int delete(int x) {
if (isEmpty()) {
throw new NoSuchElementException("Heap is empty, No element to delete");
}
int maxElement = heap[x];
heap[x] = heap[heapSize - 1];
heapSize--;
heapifyDown(x);
return maxElement;
}
/**
* Maintains the heap property while inserting an element.
*/
private void heapifyUp(int i) {
int insertValue = heap[i];
while (i > 0 && insertValue > heap[parent(i)]) {
heap[i] = heap[parent(i)];
i = parent(i);
}
heap[i] = insertValue;
}
/**
* Maintains the heap property while deleting an element.
*/
private void heapifyDown(int i) {
int child;
int temp = heap[i];
while (kthChild(i, 1) < heapSize) {
child = maxChild(i);
if (temp >= heap[child]) {
break;
}
heap[i] = heap[child];
i = child;
}
heap[i] = temp;
}
private int maxChild(int i) {
int leftChild = kthChild(i, 1);
int rightChild = kthChild(i, 2);
return heap[leftChild] > heap[rightChild] ? leftChild : rightChild;
}
/**
* Prints all elements of the heap
*/
public void printHeap() {
System.out.print("nHeap = ");
for (int i = 0; i < heapSize; i++)
System.out.print(heap[i] + " ");
System.out.println();
}
/**
* This method returns the max element of the heap.
* complexity: O(1)
*/
public int findMax() {
if (isEmpty())
throw new NoSuchElementException("Heap is empty.");
return heap[0];
}
public static void main(String[] args) {
BinaryHeap maxHeap = new BinaryHeap(10);
maxHeap.insert(10);
maxHeap.insert(4);
maxHeap.insert(9);
maxHeap.insert(1);
maxHeap.insert(7);
maxHeap.insert(5);
maxHeap.insert(3);
maxHeap.printHeap();
maxHeap.delete(5);
maxHeap.printHeap();
maxHeap.delete(2);
maxHeap.printHeap();
}
}
堆排序Heap Sort 堆排序(Heapsort)是指利用堆这种数据结构所设计的一种排序算法。堆积是一个近似完全二叉树的结构,并同时满足堆积的性质:即子结点的键值或索引总是小于(或者大于)它的父节点。堆排序可以说是一种利用堆的概念来排序的选择排序。分为两种方法: 大顶堆:每个节点的值都大于或等于其子节点的值,在堆排序算法中用于升序排列; 小顶堆:每个节点的值都小于或等于其子节点的值,在堆排序算法中用于降序排列; 堆排序的平均时间复杂度为 Ο(nlogn)。
算法步骤
- 创建一个堆 H[0……n-1];
- 把堆首(最大值)和堆尾互换;
- 把堆的尺寸缩小 1,并调用 shift_down(0),目的是把新的数组顶端数据调整到相应位置;
- 重复步骤 2,直到堆的尺寸为 1
java 代码实现
// Java program for implementation of Heap Sort
public class HeapSort
{
public void sort(int arr[])
{
int n = arr.length;
// Build heap (rearrange array)
for (int i = n / 2 - 1; i >= 0; i--)
heapify(arr, n, i);
// One by one extract an element from heap
for (int i=n-1; i>0; i--)
{
// Move current root to end
int temp = arr[0];
arr[0] = arr[i];
arr[i] = temp;
// call max heapify on the reduced heap
heapify(arr, i, 0);
}
}
// To heapify a subtree rooted with node i which is
// an index in arr[]. n is size of heap
void heapify(int arr[], int n, int i)
{
int largest = i; // Initialize largest as root
int l = 2*i + 1; // left = 2*i + 1
int r = 2*i + 2; // right = 2*i + 2
// If left child is larger than root
if (l < n && arr[l] > arr[largest])
largest = l;
// If right child is larger than largest so far
if (r < n && arr[r] > arr[largest])
largest = r;
// If largest is not root
if (largest != i)
{
int swap = arr[i];
arr[i] = arr[largest];
arr[largest] = swap;
// Recursively heapify the affected sub-tree
heapify(arr, n, largest);
}
}
/* A utility function to print array of size n */
static void printArray(int arr[])
{
int n = arr.length;
for (int i=0; i<n; ++i)
System.out.print(arr[i]+" ");
System.out.println();
}
// Driver program
public static void main(String args[])
{
int arr[] = {12, 11, 13, 5, 6, 7};
int n = arr.length;
HeapSort ob = new HeapSort();
ob.sort(arr);
System.out.println("Sorted array is");
printArray(arr);
}
}
图
图的表示法 Graph(V,E) Vertex edge 节点和边 V - Vertex 点 1.点入度和出度 2.点与点是否连通
E - Edge 边 1.有向 无向 连接线 2.边长 权重
图的表示
- 邻接矩阵
- 邻接链表
图的算法,伪代码 dfs
visited = set() # 和树中的DFS最大区别
def dfs():
if node in visited: #terminator
# already visited
return
visited.add(node)
# process current node here
...
for next_node in node.children():
if not next_node in visited:
dfs(next_node,visited)
bfs
def bfs(grpah,start,end):
queue = []
queue.append([start])
visited = set() # 和树中的BFS的最大区别
while queue:
node = queue.pop()
visited.add(node)
process(node)
nodes = generate_related_nodes(node)
queue.push(nodes)
Python Djikstra算法
import heapq
graph = {
"A": {"B": 5, "C": 1},
"B": {"A": 5, "C": 2, "D": 1},
"C": {"A": 1, "B": 2, "D": 4, "E": 8},
"D": {"B": 1, "C": 4, "E": 3, "F": 6},
"E": {"C": 8, "D": 3},
"F": {"D": 6}
}
class Dijkstra:
def init_distance(self, graph, start):
distance = {start: 0}
for key in graph.keys():
if key != start:
distance[key] = float('inf')
return distance
def dijkstra(self, graph, start):
if not graph or not start:
return None
distance = self.init_distance(graph, start)
pqueue = []
heapq.heappush(pqueue, (0, start))
seen = set()
parent = {start: None}
while pqueue:
cur_distance, cur_node = heapq.heappop(pqueue)
seen.add(cur_node)
nodes = graph[cur_node]
for node, dist in nodes.items():
if node in seen:
continue
elif distance[node] > cur_distance + dist:
heapq.heappush(pqueue, (dist + cur_distance, node))
parent[node] = cur_node
distance[node] = cur_distance + dist
return distance, parent
if __name__ == '__main__':
s = Dijkstra()
res, parent = s.dijkstra(graph, "A")
print(res)
print(parent)
有向图邻接表实现
import heapq
import math
class GraphError(Exception):
pass
# 有向网(图)
class GraphDI:
def __init__(self, graph={}):
self._graph = graph
self._vnum = len(graph)
def _invalid(self, vertex):
return self._graph.__contains__(vertex)
def add_vertex(self, vertex):
if self._invalid(vertex):
raise GraphError("添加顶点失败,已经有该顶点。")
self._graph[vertex] = {}
self._vnum += 1
def add_edge(self, start, end, val):
if not self._invalid(start) or not self._invalid(end):
raise GraphError("不存在" + start + "或者" + end + "这样的顶点")
self._graph[start].update({end: val})
def get_edge(self, start, end):
if not self._invalid(start) or not self._invalid(end):
raise GraphError("不存在" + start + "或者" + end + "这样的顶点")
return self._graph[start][end]
def get_vertexNum(self):
return self._graph.__len__()
def get_outEdge(self, vertex):
if not self._invalid(vertex):
raise GraphError("不存在" + vertex + "这样的顶点")
return self._graph[vertex]
def get_inEdge(self, vertex):
if not self._invalid(vertex):
raise GraphError("不存在" + vertex + "这样的顶点")
result = {}
for start in self._graph:
if vertex in self._graph[start]:
if self._graph[start][vertex] is not None:
result[start] = self._graph[start][vertex]
return result
# 拓扑排序
def topological_sort(self):
indegree = {} # 入度表
zerov = [] # 利用0度栈记录已知的入度为0的但还未处理的顶点
m = 0 # 输出顶点计数
topo = [] # 拓扑排序结果
# 生成入度表和0度栈
for vetx in self._graph:
indegree[vetx] = self.get_inEdge(vetx).__len__()
if indegree[vetx] == 0:
zerov.append(vetx)
pass
while zerov.__len__() != 0:
Vi = zerov.pop()
topo.append(Vi)
m += 1
for Vj in self.get_outEdge(Vi).keys(): # 对顶点Vi的每个邻接点入度减1,如果Vj的入度变为0,则将Vj入栈,表示Vj就是下一个需要处理的顶点
indegree[Vj] -= 1
if indegree[Vj] == 0:
zerov.append(Vj)
if m < self.get_vertexNum(): # 该有向图有回路
return False
return topo
# 关键路径
def criticalPath(self, delay=0):
topo = self.topological_sort()
if not topo:
raise GraphError("存在有向环!")
ve = [0 for i in range(len(topo))] # 事件最早开始时间
vl = [0 for i in range(len(topo))] # 事件最迟开始时间
cp = [] # 关键路径
result = {} # 返回结果
# --------------------------------计算事件的最早发生时间-------------------------------------------------------
for i in range(topo.__len__()):
start = topo[i] # 取出拓扑节点
for node in self.get_outEdge(start).keys(): # 获取拓扑节点的邻接点,计算ve
w = self._graph[start][node] # 当前节点与邻接节点的边
j = topo.index(node) # 邻接节点的下标
if ve[j] < ve[i] + w: # 更新邻接点的最早发生时间,选大的时间
ve[j] = ve[i] + w
pass
# --------------------------------计算事件的最晚发生时间-------------------------------------------------------
for i in range(topo.__len__()): # 给每个事件的最迟发生时间置初值,初值为最早发生时间中的最大值
vl[i] = ve[topo.__len__() - 1] + delay
for i in reversed(range(topo.__len__())):
k = topo[i] # 取出拓扑节点
for node in self.get_inEdge(k).keys(): # 获取拓扑节点的逆邻接点,计算vl
w = self._graph[node][k] # 逆邻接点和当前节点的边
j = topo.index(node) # 逆邻接点的下标
if vl[j] > vl[i] - w: # 更新逆邻接点的最晚发生时间,选小的时间
vl[j] = vl[i] - w
pass
# --------------------------------判断每一活动是否为关键路径---------------------------------------------------
for i in range(topo.__len__()):
start = topo[i]
for node in self.get_outEdge(start).keys():
j = topo.index(node) # 获得邻接顶点的下标
w = self._graph[start][node] # 当前节点与邻接节点的边
e = ve[i] # 计算活动<start,node>的最早开始时间
l = vl[j] - w - delay # 计算活动<start,node>的最晚开始时间
if e == l:
cp.append((start, node)) # 如果相等就说明为关键路径
pass
for i in range(topo.__len__()):
result[topo[i]] = (ve[i], vl[i])
pass
return result, cp
def __str__(self):
s = ""
for kv in self._graph.items():
s += kv.__str__() + "\n"
return s