json2tripe
'''该模块用户导出实体与关系的三元组'''
'''实体的主要属性为name,用于实体与关系的查询'''
import json
class_dict = {"ns0__description":"描述","edukg_prop_history__main-P43":"时间","edukg_prop_history__main-P33":"时间","rdfs__label":"描述",
"edukg_prop_history__main-P272":"时间","edukg_prop_chinese__main-P25":"描述","edukg_prop_history__main-P163":"地址","edukg_prop_chinese__main-P24":"职业",
"edukg_prop_history__main-P144":"描述","edukg_prop_chinese__main-P17":"时间","edukg_prop_history__main-P399":"描述","edukg_prop_chinese__main-P19":"时间",
"edukg_prop_common__main-P3":"描述","edukg_prop_chinese__main-P5":"时间","edukg_prop_chinese__main-P7":"描述","edukg_prop_chinese__main-P9":"描述","edukg_prop_chinese__main-R2":"书名","edukg_prop_history__main-P65":"地址","edukg_prop_history__main-P1":"地址","edukg_prop_history__main-P283":"时期",
"edukg_prop_history__main-P400":"描述","edukg_prop_history__main-P227":"时代","edukg_prop_common__main-P4":"描述","edukg_prop_history__main-P509":"风格","edukg_prop_history__main-P70":"评价","edukg_prop_common__main-P1":"别名","edukg_prop_common__main-P6":"风格",
"dukg_prop_history__main-P35":"评价","edukg_prop_history__main-P35":"评价","edukg_prop_history__main-P270":"地址","edukg_prop_history__main-P254":"评价","edukg_prop_history__main-P216":"描述",
"edukg_prop_history__main-P416":"主要作品","edukg_prop_history__main-P62":"代表作","edukg_prop_common__main-P21":"评价"}
"""
Parameters:
json_file_name (str): 文件名
Return:
data(dict):字典格式的文件数据
"""
def read_json(json_file_name):
data = None
with open(json_file_name, 'r', encoding='utf-8',errors='replace') as file:
data = json.load(file)
return data
"""
Parameters:
data (dict): 读取的文件数据
Return:
[
[{"StartNodeLabel":{attrs..}},relationship,{"endNodeLabel":{attrs..}}]
]:多个三元组数据
[{"StartNodeLabel":{attrs..}},relationship,{"endNodeLabel":{attrs..}}]
表示的意思是{起始节点的标签:起始
"""
def json2triple(data):
final_result = []
if "instanceInfo" in data and data["instanceInfo"]:
if "property" in data["instanceInfo"]:
for item in data["instanceInfo"]["property"]:
triple_arr = [{"作者":item['subject']},item["predicateLabel"],{class_dict[item["predicate"]]:item["object"]}]
final_result.append(triple_arr)
if "relation" in data["instanceInfo"]:
for item in data["instanceInfo"]["relation"]:
triple_arr = [{"标题": item['subject']}, item["predicateLabel"],
{"作者":item["object"]}]
final_result.append(triple_arr)
if "bookList" in data and data["bookList"]:
if "data" in data["bookList"]:
for item in data["bookList"]["data"]:
triple_arr = [{"几年级上下册":item["bookName"]},"学科属于",{"学科":item["subject"]}]
triple_arr1= [{"几年级上下册":item["bookName"]},"出版时间",{"时间":item["editionTime"]}]
triple_arr2 = [{"几年级上下册":item["bookName"]},"版本属于",{"版本":item["edition"]}]
final_result.append(triple_arr)
final_result.append(triple_arr1)
final_result.append(triple_arr2)
return final_result
if __name__ == "__main__":
print(json2triple(read_json('data//author//json//ywl.json')["data"]))
Education_Graph
from py2neo import Graph, Node, Relationship,NodeMatcher, RelationshipMatcher
from json2triple import json2triple,read_json
class EducationGraph:
def __init__(self,host,port,username,password):
self.neo4j_connection(host,port,username,password)
def neo4j_connection(self,host,port,username,password):
self.driver = Graph(
host=host,
port=port,
user=username,
password=password)
def create_node(self,label, attrs):
n = "_.name=" + "\"" + attrs["name"] + "\""
matcher = NodeMatcher(self.driver)
value = matcher.match(label).where(n).first()
if value is None:
node = Node(label, **attrs)
n = self.driver.create(node)
return node
else:
return value
def create_relationship(self, label1, attrs1, label2, attrs2, r_name):
value1 = self.match_node(self.driver, label1, attrs1)
value2 = self.match_node(self.driver, label2, attrs2)
if value1 is None or value2 is None:
return False
r = Relationship(value1, r_name, value2)
self.driver.create(r)
def create_relationship_by_node(self, node1,rel,node2):
if self.has_relationship(node1,node2,rel):
return
ab = Relationship(node1, rel, node2)
self.driver.create(ab)
def match_node(self, label, attrs):
n = "_.name=" + "\"" + attrs["name"] + "\""
matcher = NodeMatcher(self.driver)
return matcher.match(label).where(n).first()
def update_node(self,label,attrs,new_attrs):
node = self.match_node(label, attrs)
node.update(new_attrs)
self.driver.push(node)
def search_nodes(self,label,attrs):
macher1 = NodeMatcher(self.driver)
node = macher1.match(label, **attrs)
return node
def search_relationships(self,r_type):
macher2 = RelationshipMatcher(self.driver)
relationship = macher2.match(r_type=r_type)
return relationship
def has_relationship(self,firstNode, finalNode, rel):
query = f"MATCH {firstNode}-[r:{rel}]->{finalNode} RETURN r"
relationships = self.driver.run(query)
if len(list(relationships)) > 0:
print('关系已经存在')
return True
else:
return False
if __name__ == '__main__':
data = json2triple(read_json('data//author//json//鲍圭埃特.json')["data"])
education_graph = EducationGraph(host=自己的host,port=端口号,username=用户名,password=密码)
for item in data:
start_value = None
end_value = None
for key, value in item[0].items():
start_value = [key,{"name":value}]
for key, value in item[2].items():
end_value = [key, {"name":value}]
start_node = education_graph.create_node(start_value[0],start_value[1])
relationship = item[1]
end_node = education_graph.create_node(end_value[0],end_value[1])
education_graph.create_relationship_by_node(start_node,relationship,end_node)