持续创作,加速成长!这是我参与「掘金日新计划 · 10 月更文挑战」的第18天,点击查看活动详情
前言
昨天的文章中,使用python程序进行了数据库的连接,以及基本操作,并且分别测试了通过python脚本进行数据库的连接、节点的创建、关系的创建、节点的删除、修改等,并且完成了一个小的工具类,而在今天的文章中,将对之前漏洞数据数据构建知识图谱。
数据处理
之前采集的数据格式如下:
{
'content': 'a null pointer dereference issue was found in kvm when releasing a vcpu with dirty ring support enabled. this flaw allows an unprivileged local attacker on the host to issue specific ioctl calls, causing a kernel oops condition that results in a denial of service.',
'company': [
'amazon_2',
'ubuntu_22.04'
],
'product': [
'kernel',
'linux'
],
'version': [
'*',
'*'
],
'influence': [
'5.15.43',
'20.123.amzn2',
'5.15.0',
'35.36'
],
'type': '系统',
'cve_number': 'CVE-2022-1263',
'title': '空标题',
'href': 'https://avd.aliyun.com/detail?id=AVD-2022-1263'
}
其中需要构建的节点包括公司【company】、产品【product】、版本【version】、影响版本【influence】、CVE编号【cve_number】、漏洞类型【type】。
本着先构建节点再联线建关系的原则,首先要做的就是处理数据。将节点剥离出来。
查看之前的数据格式,发现产品、应用、与版本对应不上,因此重新改变了一下数据格式,如下:
{
'content': '暂无',
'data': [
[
'系统',
'amazon_2',
'kernel',
'*',
'Up',
'to',
'(excluding)',
'5.15.50-23.125.amzn2'
]
],
'solution': '建议您更新当前系统或软件至最新版,完成漏洞的修复。',
'cve_number': 'CVE-2022-3449',
'title': 'Browsing',
'href': 'https://avd.aliyun.com/detail?id=AVD-2022-3449'
}
数据处理代码以及插入代码如下:
import codecs
from connectUtils import connect2Neo4J
def cleanData(tempList):
returnList = []
for item in tempList:
itemDict = {}
type = item[0]
company = item[1]
product = item[2]
version = item[3]
influence = ' '.join(item[4:]).replace("From"," ").replace("(including)"," ").replace("(excluding)"," ").replace("Up to"," ").replace("-"," ").split()
influence = '-'.join([item for item in influence if item != ""])
itemDict["type"] = type
itemDict["company"] = company
itemDict["product"] = product
itemDict["version"] = version
itemDict["influence"] = influence
returnList.append(itemDict)
return returnList
def loadData():
lines = codecs.open("aliyunSpider.txt",'r','utf-8').readlines()
for line in lines:
try:
json_line = eval(line.strip())
print(json_line["solution"])
print(cleanData(json_line["data"]))
cve_number = (json_line["cve_number"])
print(json_line["href"])
for item in cleanData(json_line["data"]):
type = item["type"]
company = item["company"]
product = item["product"]
version = item["version"]
influence = item["influence"]
print(company)
if (connect.conn.nodes.match("company",company).first()):
companyNode = connect.conn.nodes.match("company",company).first()
if (connect.conn.nodes.match("product",product).first()):
productNode = connect.conn.nodes.match("product",product).first()
connect.creatRelationship(companyNode, productNode, "拥有")
else:
productNode = connect.creatNode("product", product)
connect.creatRelationship(companyNode, productNode, "拥有")
if (connect.conn.nodes.match("cvenumberNode",cve_number).first()):
cvenumberNode = connect.conn.nodes.match("cvenumberNode",cve_number).first()
connect.creatRelationship(productNode,cvenumberNode,"暴露漏洞")
versionNode = connect.creatNode("versionNode", influence)
typeNode = connect.creatNode("type", type)
connect.creatRelationship(cvenumberNode, typeNode, "漏洞类型")
connect.creatRelationship(cvenumberNode, versionNode, "影响版本")
else:
cvenumberNode = connect.creatNode("cvenumberNode", cve_number)
connect.creatRelationship(productNode,cvenumberNode,"暴露漏洞")
versionNode = connect.creatNode("versionNode", influence)
typeNode = connect.creatNode("type", type)
connect.creatRelationship(cvenumberNode, typeNode, "漏洞类型")
connect.creatRelationship(cvenumberNode, versionNode, "影响版本")
else:
companyNode = connect.creatNode("company",company)
productNode = connect.creatNode("product", product)
cvenumberNode = connect.creatNode("cvenumberNode", cve_number)
versionNode = connect.creatNode("versionNode", influence)
typeNode = connect.creatNode("type",type)
connect.creatRelationship(companyNode,productNode,"拥有")
connect.creatRelationship(productNode,cvenumberNode,"暴露漏洞")
connect.creatRelationship(cvenumberNode, typeNode, "漏洞类型")
connect.creatRelationship(cvenumberNode, versionNode, "影响版本")
print(type)
print(product)
print(version)
print(influence)
except Exception as e:
print(e)
if __name__ == '__main__':
connect = connect2Neo4J()
loadData()
connectUtils.py neo4J数据库连接的公共类如下:
from py2neo import Graph, Node, Relationship, NodeMatcher, NodeMatch,RelationshipMatcher
class connect2Neo4J():
def __init__(self):
self.userName = "neo4j"
self.password = ""
self.neo4jUrl = 'http://127.0.0.1:17474'
self.conn = Graph(self.neo4jUrl, auth=(self.userName, self.password))
def creatNode(self, labels, properties):
insertNode = Node(labels, name=properties)
self.conn.create(insertNode)
return insertNode
def creatRelationship(self, nodeA, nodeB, properties):
relation = Relationship(nodeA, properties, nodeB)
self.conn.create(relation)
def searchNode(self, labels, limit=None):
if limit:
data = NodeMatch(self.conn, labels=frozenset({'{}'.format(labels)})).limit(limit)
else:
node_matcher = NodeMatcher(self.conn)
data = node_matcher.match(labels)
return data
def getRelations(self,nodes = None,r_type = None,limit = None):
relation = RelationshipMatcher(self.conn)
relationData = relation.match(nodes=nodes, r_type='{}'.format(r_type)).limit(limit)
return relationData
其中connect = connect2Neo4J()为初始化公共类,加载我们抓取的数据,使用eval()方法将数据转为dict格式,根据每一条样本,分别提取其中的公司【company】、产品【product】、版本【version】、影响版本【influence】、CVE编号【cve_number】、漏洞类型【type】等信息,并存储于neo4j数据库,大致思路为,首先判断公司节点是否存在于数据库内,如果存在,变加载节点,生成与软件之间的关系,软件与CVE编号的关系,CVE编号与软件版本、漏洞类型的关系,最后是以不同公司为核心侯建关系网络。但是实际使用发现如果节点存在,并没有在原有节点基础上建立新的联系,而是创建了新的节点,如下图所示:
我们发现同一个公司有两个节点,这个不符合我们的预期
因此清空数据库重建:
match (n) detach delete n
程序中打断点发现:
connect.conn.nodes.match("company",company).first()
这个好像没有生效,都是None,因此改用NodeMatcher,代码如下:
node_matcher = NodeMatcher(connect.conn)
data = node_matcher.match("company").where(name=company)
print(list(data))
修改后的代码如下:
import codecs
from py2neo import NodeMatcher
from connectUtils import connect2Neo4J
def cleanData(tempList):
returnList = []
for item in tempList:
itemDict = {}
type = item[0]
company = item[1]
product = item[2]
version = item[3]
influence = ' '.join(item[4:]).replace("From"," ").replace("(including)"," ").replace("(excluding)"," ").replace("Up to"," ").replace("-"," ").split()
influence = '-'.join([item for item in influence if item != ""])
itemDict["type"] = type
itemDict["company"] = company
itemDict["product"] = product
itemDict["version"] = version
itemDict["influence"] = influence
returnList.append(itemDict)
return returnList
def loadData():
lines = codecs.open("aliyunSpider.txt",'r','utf-8').readlines()
node_matcher = NodeMatcher(connect.conn)
for line in lines:
try:
json_line = eval(line.strip())
print(json_line["solution"])
print(cleanData(json_line["data"]))
cve_number = (json_line["cve_number"])
print(json_line["href"])
for item in cleanData(json_line["data"]):
type = item["type"]
company = item["company"]
product = item["product"]
version = item["version"]
influence = item["influence"]
if influence == "":
influence = "未知"
print(company)
companyData = node_matcher.match("company").where(name=company)
print(list(companyData))
if (len(list(companyData)) != 0):
companyNode = companyData.first()
productData = node_matcher.match("product").where(name=product)
if (len(list(productData)) != 0):
productNode = productData.first()
connect.creatRelationship(companyNode, productNode, "拥有")
else:
productNode = connect.creatNode("product", product)
connect.creatRelationship(companyNode, productNode, "拥有")
cveData = node_matcher.match("cvenumberNode").where(name=cve_number)
if (len(list(cveData)) != 0):
cvenumberNode = cveData.first()
connect.creatRelationship(productNode,cvenumberNode,"暴露漏洞")
versionNode = connect.creatNode("versionNode", influence)
typeNode = connect.creatNode("type", type)
connect.creatRelationship(cvenumberNode, typeNode, "漏洞类型")
connect.creatRelationship(cvenumberNode, versionNode, "影响版本")
else:
cvenumberNode = connect.creatNode("cvenumberNode", cve_number)
connect.creatRelationship(productNode,cvenumberNode,"暴露漏洞")
versionNode = connect.creatNode("versionNode", influence)
typeNode = connect.creatNode("type", type)
connect.creatRelationship(cvenumberNode, typeNode, "漏洞类型")
connect.creatRelationship(cvenumberNode, versionNode, "影响版本")
else:
companyNode = connect.creatNode("company",company)
productNode = connect.creatNode("product", product)
cvenumberNode = connect.creatNode("cvenumberNode", cve_number)
versionNode = connect.creatNode("versionNode", influence)
typeNode = connect.creatNode("type",type)
connect.creatRelationship(companyNode,productNode,"拥有")
connect.creatRelationship(productNode,cvenumberNode,"暴露漏洞")
connect.creatRelationship(cvenumberNode, typeNode, "漏洞类型")
connect.creatRelationship(cvenumberNode, versionNode, "影响版本")
print(type)
print(product)
print(version)
print(influence)
except Exception as e:
print(e)
if __name__ == '__main__':
connect = connect2Neo4J()
loadData()
主要就是修改了查询节点的代码,修改之后看效果还可以,但是还是有冗余节点,之后再进行修改
Thanks♪(・ω・)ノ