公众号:尤而小屋
编辑:Peter
作者:Peter
大家好,我是Peter~
今天给大家分享一个网络关系的绘图神器:Networkx
官网学习地址:networkx.org/
NetworkX is a Python package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks
网络图
网络关系图是一种用于描述多个节点之间相互关系的图表,通常用于分析和展示复杂的网络数据。
网络关系图在数据分析中扮演着重要的角色,尤其是在需要展现实体间复杂联系的场景下。
本文介绍NetworkX的基础知识和如何绘制网络关系图。
注:运行结果具有一定随机性,每次运行结果可能稍有差异~
NetworkX 安装
指定版本号直接安装:
pip install networkx==2.8 # 普通安装
pip install networkx==2.8 -i http://pypi.douban.com/simple --trusted-host pypi.douban.com # 快速安装
导入库
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
基本知识
4种图形
可以利用networkx创建四种图:
- Graph:无多重边无向图
- DiGraph:无多重边有向图
- MultiGraph:有多重边无向图
- MultiDiGraph:有多重边有向图
import networkx as nx
G = nx.Graph()
G = nx.DiGraph()
G = nx.MultiGraph()
G = nx.MultiDiGraph()
添加节点和边
import networkx as nx
import matplotlib.pyplot as plt
G = nx.DiGraph() # 有向图
G.add_node("Z",name="point_x",weight=1) # 添加单个节点Z,命名及权重
G.add_nodes_from([1,2,3,4]) # 从列表中添加节点
G.add_edge("x","y") # 添加边,起点是x,终点是y
G.add_edges_from([(1,2),(1,3),(2,3),(4,1)]) # 添加多条边
nx.draw_networkx(G, with_labels=True)
plt.show()
下面详细介绍顶点nodes和edges边的各种操作:
顶点操作
添加顶点
G = nx.DiGraph() # 有向图
# 首先添加多个节点
G.add_node(11,name="n1",weight=10)
G.add_node(22,name="n2",weight=20)
G.add_node(33,name="n3",weight=30)
G.add_node(44,name="n4",weight=40)
G.nodes # 定点视图
NodeView((11, 22, 33, 44))
查看定点属性
查看顶点的具体信息:
G.nodes(data=True) # 方式1
NodeDataView({11: {'name': 'n1', 'weight': 10}, 22: {'name': 'n2', 'weight': 20}, 33: {'name': 'n3', 'weight': 30}, 44: {'name': 'n4', 'weight': 40}})
G.nodes.data() # 方式2
NodeDataView({11: {'name': 'n1', 'weight': 10}, 22: {'name': 'n2', 'weight': 20}, 33: {'name': 'n3', 'weight': 30}, 44: {'name': 'n4', 'weight': 40}})
G._node # 方式3
{11: {'name': 'n1', 'weight': 10},
22: {'name': 'n2', 'weight': 20},
33: {'name': 'n3', 'weight': 30},
44: {'name': 'n4', 'weight': 40}}
list(G.nodes)
[11, 22, 33, 44]
G._node[11] # 查看顶点11的信息
{'name': 'n1', 'weight': 10}
G._node[11]["name"] # 查看顶点11的名称
'n1'
更新顶点信息
通过赋值的方式进行更新:
G._node[11]["name"] = "n11" # 更新顶点名称,n1--->n11
再次查看顶点信息,第一个顶点的名称已经改变:
G._node
{11: {'name': 'n11', 'weight': 10},
22: {'name': 'n2', 'weight': 20},
33: {'name': 'n3', 'weight': 30},
44: {'name': 'n4', 'weight': 40}}
通过update函数进行更新:
G._node[11].update({"name":"n1"}) # 更新顶点名称,n11--->n1
G._node
{11: {'name': 'n1', 'weight': 10},
22: {'name': 'n2', 'weight': 20},
33: {'name': 'n3', 'weight': 30},
44: {'name': 'n4', 'weight': 40}}
顶点个数
G.number_of_nodes()
4
删除顶点
G.remove_node(11) # 删除某个顶点
G._node # 删除后的顶点信息
{22: {'name': 'n2', 'weight': 20},
33: {'name': 'n3', 'weight': 30},
44: {'name': 'n4', 'weight': 40}}
以列表的形式删除多个顶点:
G.remove_nodes_from([22,33]) # 删除多个顶点
G._node # 再次删除后的顶点信息
{44: {'name': 'n4', 'weight': 40}}
删除顶点属性
del G._node[44]["name"] # 删除name属性
G._node
{44: {'weight': 40}}
检查顶点是否存在
G.has_node(22)
False
G.has_node(44)
True
边的操作
图的边用来表示两个顶点之间的关系。
添加边
# 1-添加单条边
G.add_edge(2,3)
G.edges()
OutEdgeView([(2, 3)])
# 2-从列表中添加多条边
G.add_edges_from([(1,2),(1,3),(2,5)])
G.edges()
OutEdgeView([(2, 3), (2, 5), (1, 2), (1, 3)])
# 3-元组解包方式添加
e = (3,7)
G.add_edge(*e) # 元组解包
G.edges()
OutEdgeView([(2, 3), (2, 5), (3, 7), (1, 2), (1, 3)])
添加边的属性
G.add_edge(1,2,weight=5,relationship="renew") # 添加边的属性、关系等
基于三元组的形式添加边的权重:
G.add_weighted_edges_from([(1,2,0.25),(3,4,0.75),(2,4,0.5),(3,4,1)])
# 从列表中添加多条边,同时添加属性
G.add_edges_from([(1,2,{"weight":0.5}),(1,3,{"color":"blue"}),(2,5,{"weight":0.75})])
查看边的属性
G.edges(data=True) # 方式1
OutEdgeDataView([(2, 3, {}), (2, 5, {'weight': 0.75}), (2, 4, {'weight': 0.5}), (3, 7, {}), (3, 4, {'weight': 1}), (1, 2, {'weight': 0.5, 'relationship': 'renew'}), (1, 3, {'color': 'blue'})])
G.edges.data() # 方式2
OutEdgeDataView([(2, 3, {}), (2, 5, {'weight': 0.75}), (2, 4, {'weight': 0.5}), (3, 7, {}), (3, 4, {'weight': 1}), (1, 2, {'weight': 0.5, 'relationship': 'renew'}), (1, 3, {'color': 'blue'})])
list(G.edges(data=True)) # 方式3
[(2, 3, {}), (2, 5, {'weight': 0.75}), (2, 4, {'weight': 0.5}), (3, 7, {}), (3, 4, {'weight': 1}), (1, 2, {'weight': 0.5, 'relationship': 'renew'}), (1, 3, {'color': 'blue'})]
因为边是两个顶点组成的,需要可以通过两个点来查看某个边的信息:
G[1][2]
{'weight': 0.5, 'relationship': 'renew'}
G[1][3]
{'color': 'blue'}
边的个数
G.number_of_edges()
7
删除边
G.remove_edge(1,3)
此时(1,3)顶点组成的边已经被删除:
list(G.edges(data=True))
[(2, 3, {}), (2, 5, {'weight': 0.75}), (2, 4, {'weight': 0.5}), (3, 7, {}), (3, 4, {'weight': 1}), (1, 2, {'weight': 0.5, 'relationship': 'renew'})]
通过列表的形式删除多条边:
G.remove_edges_from([(2,3),(3,4)])
list(G.edges(data=True))
[(2, 5, {'weight': 0.75}), (2, 4, {'weight': 0.5}), (3, 7, {}), (1, 2, {'weight': 0.5, 'relationship': 'renew'})]
更新边的属性
G[2][5]["weight"] = 0.66 # 方式1
# G.edge[2][5]["weight"] = 0.66 # 方式2
list(G.edges(data=True)) # 查看结果
[(2, 5, {'weight': 0.66}), (2, 4, {'weight': 0.5}), (3, 7, {}), (1, 2, {'weight': 0.5, 'relationship': 'renew'})]
# G[1][2].update({"weight":111})
G.edges[1,2].update({"weight":111})
list(G.edges(data=True)) # 查看结果
[(2, 5, {'weight': 0.66}), (2, 4, {'weight': 0.5}), (3, 7, {}), (1, 2, {'weight': 111, 'relationship': 'renew'})]
删除边的属性
del G[1][2]["relationship"]
检查边是否存在
G.has_edge(1,2)
True
G.has_edge(2,3)
False
内置图形
G = nx.petersen_graph()
plt.subplot(121)
nx.draw_networkx(G,with_labels=True, font_weight="bold")
plt.subplot(122)
nx.draw_networkx(G, pos=nx.shell_layout(G, nlist=[range(5,10), range(11)]), with_labels=True, font_weight="bold")
plt.show()
### 保存图形的命令
# plt.savefig("result.png")
options = {
"node_color":"yellow",
"node_size":100,
"width":5,
"linewidths":1,
"edge_color":"black",
"style":"-",
"font_size":10
}
subax1 = plt.subplot(221)
nx.draw_networkx(G,pos=nx.random_layout(G),**options)
subax2 = plt.subplot(222)
nx.draw_networkx(G,pos=nx.circular_layout(G),**options)
subax3 = plt.subplot(223)
nx.draw_networkx(G,pos=nx.spectral_layout(G),**options)
subax4 = plt.subplot(224)
nx.draw_networkx(G,pos=nx.shell_layout(G),**options)
手动添加数据绘制网络图
# 创建一个空的无向图
G = nx.Graph()
# 添加节点
G.add_node(1)
G.add_node(2)
G.add_node(3)
G.add_node(4)
# 添加边
G.add_edge(1, 2)
G.add_edge(2, 3)
G.add_edge(3, 1)
G.add_edge(3, 4)
# 绘制网络图
nx.draw_networkx(G,with_labels=True, node_size=1500, node_color="skyblue", pos=nx.fruchterman_reingold_layout(G))
plt.title("Basic Network Graph")
plt.show()
基于pandas的DataFrame生成网络图
模拟数据
df = pd.DataFrame({"parent":["A","B","C","D","A","B"], # 父节点
"children":["D","C","E","B","C","E"]} # 子节点
)
df
| parent | children | |
|---|---|---|
| 0 | A | D |
| 1 | B | C |
| 2 | C | E |
| 3 | D | B |
| 4 | A | C |
| 5 | B | E |
绘制基础图形
G = nx.from_pandas_edgelist(df,"parent","children") # 创建无向图实例对象
G
<networkx.classes.graph.Graph at 0x2ca23164050>
每次运行的图形可能不同:
nx.draw_networkx(G, with_labels=True) # 带上标签名绘图
plt.title("Network Graph with Networkx")
plt.show()
自定义Networkx图形外观
仍然是使用上面的数据,对图形的线条、字体等进行自定义
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True, # 显示标签
width=10, # 边线条宽
edge_color="skyblue", # 边线条颜色
style="solid", # 线条风格
font_size=20, # 字体大小
font_color="yellow", # 字体颜色
font_weight="bold", # 字体形式
node_size=1500, # 节点大小
node_color="skyblue", # 节点颜色
node_shape="s", # 节点形状
alpha=0.8, # 透明度
linewidths=4 # 线条宽度
)
plt.show()
设置背景图
设置图形的背景颜色:
fig = plt.figure() # 绘图
nx.draw_networkx(G, with_labels=True, node_color="skyblue", node_size=1200) # 带上标签名绘图
fig.set_facecolor("#00000F") # 改变背景色
plt.title("Change Background Color of Network Graph")
plt.show()
# 保存图片的时候保留背景色
# plt.savefig("name.png",facecolor=fig.get_facecolor(),dpi=300)
网络布局
Networkx开发出了多种算法来计算每个节点的最佳位置。
- fruchterman_reingold_layout:基于牛顿迭代法的布局算法
- shell_layout:顶点在同心圆上分布
- circular_layout:顶点在一个圆环上均匀分布
- random_layout:顶点随机分布
- spectral_layout:根据图的Laplace特征向量排列顶点
- spring_layout:用Fruchterman-Reingold算法排列顶点
- kamada_kawai_layout:基于牛顿迭代法的布局算法
- planar_layout:基于欧拉回路的平面布局算法
fruchterman_reingold_layout牛顿迭代布局
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True, # 显示标签
pos=nx.fruchterman_reingold_layout(G), # fruchterman_reingold_layout
node_size=1500, # 节点大小
node_color="skyblue", # 节点颜色
node_shape="s", # 节点形状
alpha=0.6, # 透明度
linewidths=10 # 线条宽度
)
plt.show()
shell_layout 布局
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True, # 显示标签
pos=nx.shell_layout(G), # shell_layout布局
node_size=1500, # 节点大小
node_color="skyblue", # 节点颜色
node_shape="s", # 节点形状
alpha=0.6, # 透明度
linewidths=10 # 线条宽度
)
plt.show()
circular_layout环形布局
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True, # 显示标签
pos=nx.circular_layout(G), # circular_layout 环形布局
node_size=1500, # 节点大小
node_color="skyblue", # 节点颜色
node_shape="s", # 节点形状
alpha=0.6, # 透明度
linewidths=10 # 线条宽度
)
plt.show()
random_layout随机布局
# Random 随机布局
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True,
pos=nx.random_layout(G), # 随机布局
node_size=1500,
node_color="skyblue")
plt.title("Random Layout")
plt.show()
spectral_layout光谱式布局
# spectral_layout 随机布局
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True,
pos=nx.spectral_layout(G), # 光谱式布局
node_size=1500,
node_color="skyblue")
plt.title("Spectral_Layout")
plt.show()
spring_layout跳跃式布局
# spring_layout 跳跃式布局
G = nx.from_pandas_edgelist(df,"parent","children")
nx.draw_networkx(G,
with_labels=True,
pos=nx.spring_layout(G), # 跳跃式布局
node_size=1500,
node_color="skyblue")
plt.title("Spectral_Layout")
plt.show()
生成有向图
上面的图形Networkx都是生成无向图,也可以生成有向图DIRECTED GRAPH
第一种生成有向图的方法是使用nx.Graph(),同时显示箭头:
G = nx.from_pandas_edgelist(df,
"parent",
"children",
create_using=nx.Graph() # 默认无向图
)
nx.draw_networkx(G,
with_labels=True, # 显示标签名
arrows=True # 显示箭头之后变成了有向!!!
)
plt.title("Network Graph with Networkx")
plt.show()
第二种方法是使用nx.DiGraph(),直接生成有向图:
G = nx.from_pandas_edgelist(df,
"parent",
"children",
create_using=nx.DiGraph() # 创建有向图实例
)
nx.draw_networkx(G,
with_labels=True, # 显示标签名
)
plt.title("Network Graph with Networkx")
plt.show()
网络节点配色
简单地说,就是给网络节点配置不同的颜色:
df # 父子节点的数据
| parent | children | |
|---|---|---|
| 0 | A | D |
| 1 | B | C |
| 2 | C | E |
| 3 | D | B |
| 4 | A | C |
| 5 | B | E |
基于数值型数据配置节点颜色
再生成一份数据,表示所有节点的颜色:
df1 = pd.DataFrame({"point":["A","B","C","D","E"],
"value":[20,30,80,50,5]}) # 每个节点的大小
df1
| point | value | |
|---|---|---|
| 0 | A | 20 |
| 1 | B | 30 |
| 2 | C | 80 |
| 3 | D | 50 |
| 4 | E | 5 |
G = nx.from_pandas_edgelist(df,"parent","children",
create_using=nx.Graph() # 生成无向图
)
查看节点的顺序:
G.nodes() # 查看节点的顺序
NodeView(('A', 'D', 'B', 'C', 'E'))
df1 = df1.set_index("point") # 设置行索引
df1
| value | |
|---|---|
| point | |
| A | 20 |
| B | 30 |
| C | 80 |
| D | 50 |
| E | 5 |
改变行索引顺序:
df1 = df1.reindex(G.nodes()) # 改变行索引顺序
df1
| value | |
|---|---|
| point | |
| A | 20 |
| D | 50 |
| B | 30 |
| C | 80 |
| E | 5 |
nx.draw_networkx(G,
with_labels=True, # 显示标签名
node_color=df1["value"].tolist(),
#node_color=np.array(df1["value"].values, dtype="float32"), # 设定颜色,必须是float数组或int值
cmap=plt.cm.Blues
)
plt.title("Network Graph with Networkx")
plt.show()
基于分类型数据配置节点颜色
再生成一份数据
df2 = pd.DataFrame({"point":["A","B","C","D","E"],
"value":["G1","G2","G2","G3","G1"]}) # 分类型数据
df2
| point | value | |
|---|---|---|
| 0 | A | G1 |
| 1 | B | G2 |
| 2 | C | G2 |
| 3 | D | G3 |
| 4 | E | G1 |
G = nx.from_pandas_edgelist(df,"parent","children",
create_using=nx.Graph() # 生成无向图
)
G.nodes() # 节点顺序
NodeView(('A', 'D', 'B', 'C', 'E'))
设置数据df2的行索引:
df2 = df2.set_index("point")
df2 = df2.reindex(G.nodes())
df2
| value | |
|---|---|
| point | |
| A | G1 |
| D | G3 |
| B | G2 |
| C | G2 |
| E | G1 |
df2["value"] = pd.Categorical(df2["value"]) # 生成分类型数据
df2["value"].cat.codes # 分类型数据的codes属性
point
A 0
D 2
B 1
C 1
E 0
dtype: int8
nx.draw_networkx(G,
with_labels=True, # 显示标签名
node_color=df2["value"].cat.codes, # 设置节点颜色
cmap=plt.cm.Set1,
node_size=1200
)
plt.title("Network Graph with Networkx")
plt.show()
可以发现:相同value值的节点颜色是相同的
网络边配置颜色
给网络图的边配置颜色
基于数值型数据配置节点颜色
df3 = df.copy() # 生成副本
df3["value"] = [1,7,3,5,4,5] # 添加新字段
df3
| parent | children | value | |
|---|---|---|---|
| 0 | A | D | 1 |
| 1 | B | C | 7 |
| 2 | C | E | 3 |
| 3 | D | B | 5 |
| 4 | A | C | 4 |
| 5 | B | E | 5 |
G = nx.from_pandas_edgelist(df3,"parent","children")
nx.draw_networkx(G,
with_labels=True, # 显示标签名
node_color="skyblue",
edge_color=df3['value'], # 网络的边配色:使用新字段的数据
width=4.0, # 线宽
edge_cmap=plt.cm.Blues
)
plt.title("Network Graph with Networkx")
plt.show()
基于分类型数据配置节点颜色
df3["group"] = ["G1","G2","G1","G1","G2","G1"]
df3["group"] = pd.Categorical(df3["group"]) # 转成分类型数据
df3
| parent | children | value | group | |
|---|---|---|---|---|
| 0 | A | D | 1 | G1 |
| 1 | B | C | 7 | G2 |
| 2 | C | E | 3 | G1 |
| 3 | D | B | 5 | G1 |
| 4 | A | C | 4 | G2 |
| 5 | B | E | 5 | G1 |
df3["group"].cat.codes # 分类标签属性
0 0
1 1
2 0
3 0
4 1
5 0
dtype: int8
G = nx.from_pandas_edgelist(df3,"parent","children")
nx.draw_networkx(G,
with_labels=True, # 显示标签名
node_color="skyblue",
edge_color=df3["group"].cat.codes, # 网络的边配色:分类型字段的标签属性值
width=4.0, # 线宽
edge_cmap=plt.cm.Blues
)
plt.title("Network Graph with Networkx")
plt.show()
网络图形边上添加值
有的时候我们想在网络图形的边上把具体的数值显示出来,如何实现?
自定义数据实现
# 创建一个图
G = nx.DiGraph()
# 添加节点和边
G.add_edge('A', 'B', weight=1)
G.add_edge('B', 'C', weight=2)
G.add_edge('C', 'D', weight=3)
G.add_edge('C', 'E', weight=4)
G.add_edge('B', 'E', weight=5)
# 绘制图形
pos = nx.spring_layout(G) # 图形布局
nx.draw_networkx(G, pos, with_labels=True, node_color='skyblue', edge_color='gray')
# 获取边的权重 :两种不同写法
# edge_labels = {(u, v): d['weight'] for u, v, d in G.edges(data=True)}
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
# 显示图形
plt.show()
基于DataFrame数据实现
df3
| parent | children | value | group | |
|---|---|---|---|---|
| 0 | A | D | 1 | G1 |
| 1 | B | C | 7 | G2 |
| 2 | C | E | 3 | G1 |
| 3 | D | B | 5 | G1 |
| 4 | A | C | 4 | G2 |
| 5 | B | E | 5 | G1 |
G = nx.from_pandas_edgelist(df3,"parent","children",["value"])
nx.draw_networkx(G,
with_labels=True,
pos=nx.spring_layout(G), # 跳跃式布局
node_size=1500,
edge_color="gray",
node_color="skyblue")
# 在边上添加数值
edge_labels = {(u, v): d['value'] for u, v, d in G.edges(data=True)}
# edge_labels = nx.get_edge_attributes(G, 'value')
nx.draw_networkx_edge_labels(G, pos=nx.spring_layout(G), edge_labels=edge_labels)
plt.show()