关于python操作neo4j

15 阅读5分钟

json2tripe


'''该模块用户导出实体与关系的三元组'''
'''实体的主要属性为name,用于实体与关系的查询'''

# 目前的实体有 书名 作品补充 作者 几年级上下册 出版社  推荐 时期  标题  正文  科目 课文序列号
# 目前的关系有 主要作品 作者 全文  出版  出版社 属于  推荐书籍 选自
import json


# 起始节点与终止节点的标签类
class_dict = {"ns0__description":"描述","edukg_prop_history__main-P43":"时间","edukg_prop_history__main-P33":"时间","rdfs__label":"描述",
              "edukg_prop_history__main-P272":"时间","edukg_prop_chinese__main-P25":"描述","edukg_prop_history__main-P163":"地址","edukg_prop_chinese__main-P24":"职业",
              "edukg_prop_history__main-P144":"描述","edukg_prop_chinese__main-P17":"时间","edukg_prop_history__main-P399":"描述","edukg_prop_chinese__main-P19":"时间",
              "edukg_prop_common__main-P3":"描述","edukg_prop_chinese__main-P5":"时间","edukg_prop_chinese__main-P7":"描述","edukg_prop_chinese__main-P9":"描述","edukg_prop_chinese__main-R2":"书名","edukg_prop_history__main-P65":"地址","edukg_prop_history__main-P1":"地址","edukg_prop_history__main-P283":"时期",
              "edukg_prop_history__main-P400":"描述","edukg_prop_history__main-P227":"时代","edukg_prop_common__main-P4":"描述","edukg_prop_history__main-P509":"风格","edukg_prop_history__main-P70":"评价","edukg_prop_common__main-P1":"别名","edukg_prop_common__main-P6":"风格",
              "dukg_prop_history__main-P35":"评价","edukg_prop_history__main-P35":"评价","edukg_prop_history__main-P270":"地址","edukg_prop_history__main-P254":"评价","edukg_prop_history__main-P216":"描述",
              "edukg_prop_history__main-P416":"主要作品","edukg_prop_history__main-P62":"代表作","edukg_prop_common__main-P21":"评价"}
"""
   Parameters:
   json_file_name (str): 文件名
   Return:
   data(dict):字典格式的文件数据
   """
def read_json(json_file_name):
    data = None
    with open(json_file_name, 'r', encoding='utf-8',errors='replace') as file:
    # 读取文件内容,并将其解析为Python字典
        data = json.load(file)
    
    return data


"""
   Parameters:
   data (dict): 读取的文件数据
   Return:
   [
    [{"StartNodeLabel":{attrs..}},relationship,{"endNodeLabel":{attrs..}}]
   ]:多个三元组数据
   
   
   [{"StartNodeLabel":{attrs..}},relationship,{"endNodeLabel":{attrs..}}]
   表示的意思是{起始节点的标签:起始
   """


def json2triple(data):
    final_result = []
    if "instanceInfo" in data and data["instanceInfo"]:
        if "property" in data["instanceInfo"]:
            for item in data["instanceInfo"]["property"]:
                triple_arr = [{"作者":item['subject']},item["predicateLabel"],{class_dict[item["predicate"]]:item["object"]}]
                final_result.append(triple_arr)

        if "relation" in data["instanceInfo"]:
            for item in data["instanceInfo"]["relation"]:
                triple_arr = [{"标题": item['subject']}, item["predicateLabel"],
                              {"作者":item["object"]}]
                final_result.append(triple_arr)
    if "bookList" in data and data["bookList"]:
        if "data" in data["bookList"]:
            for item in data["bookList"]["data"]:
                triple_arr = [{"几年级上下册":item["bookName"]},"学科属于",{"学科":item["subject"]}]
                triple_arr1= [{"几年级上下册":item["bookName"]},"出版时间",{"时间":item["editionTime"]}]
                triple_arr2 = [{"几年级上下册":item["bookName"]},"版本属于",{"版本":item["edition"]}]
                final_result.append(triple_arr)
                final_result.append(triple_arr1)
                final_result.append(triple_arr2)
    return final_result

if __name__ == "__main__":
    print(json2triple(read_json('data//author//json//ywl.json')["data"]))

Education_Graph

from py2neo import Graph, Node, Relationship,NodeMatcher, RelationshipMatcher
from json2triple import json2triple,read_json
class EducationGraph:
    def __init__(self,host,port,username,password):
        # 连接数据库的示例
        self.neo4j_connection(host,port,username,password)

     
    def neo4j_connection(self,host,port,username,password):
         self.driver = Graph(
            host=host,  # 127.0.0.1",  # neo4j 搭载服务器的ip地址,ifconfig可获取到
            port=port,  # neo4j 服务器监听的端口号
            user=username,  # "lhy",  # 数据库user name,如果没有更改过,应该是neo4j
            password=password)  # "lhy123")
         
    # name为节点查询关键字,查询时以节点标签Type与节点name节点标签为唯一查询条件   
    # 建立一个节点
    def create_node(self,label, attrs):
        n = "_.name=" + "\"" + attrs["name"] + "\""
        matcher = NodeMatcher(self.driver)
        # 查询是否已经存在,若存在则返回节点,否则返回None
        value = matcher.match(label).where(n).first()
        # 如果要创建的节点不存在则创建
        if value is None:
            node = Node(label, **attrs)
            n = self.driver.create(node)
            # 返回节点
            return node
        else:
            # 更新节点
            # value.update(attrs)  # 修改结点的属性
            # self.driver.push(value)  # 更新结点,注意:如果没有这一步,则结点不会被更新
            # 返回已经存在的节点
            return value
        
   # 建立两个节点之间的关系
#    创建关系通过标签、与属性
#    self只在实例方法中可使用,充当类的实例,接收的第一个参数仍是使用者传递的参数
    def create_relationship(self, label1, attrs1, label2, attrs2, r_name):
       
        value1 = self.match_node(self.driver, label1, attrs1)
        value2 = self.match_node(self.driver, label2, attrs2)
        if value1 is None or value2 is None:
            return False
        r = Relationship(value1, r_name, value2)
        self.driver.create(r)
    
    # 创建关系通过节点的方式
    def create_relationship_by_node(self, node1,rel,node2):
       if self.has_relationship(node1,node2,rel):
           return 
       ab = Relationship(node1, rel, node2)
       self.driver.create(ab)

    
    # 查询是否存在某个节点
    def match_node(self, label, attrs):
        n = "_.name=" + "\"" + attrs["name"] + "\""
        matcher = NodeMatcher(self.driver)
        return matcher.match(label).where(n).first()
    
    # 更新节点
    def update_node(self,label,attrs,new_attrs):
        node = self.match_node(label, attrs)  # 找到对应的结点
        node.update(new_attrs)  # 修改结点的属性
        self.driver.push(node)  # 更新结点,注意:如果没有这一步,则结点不会被更新

       

    # 返回节点
    def search_nodes(self,label,attrs):
        macher1 = NodeMatcher(self.driver)
        node = macher1.match(label, **attrs)  
        return node
      
    
    # 返回关系
    def search_relationships(self,r_type):
        macher2 = RelationshipMatcher(self.driver)
        relationship = macher2.match(r_type=r_type)  # 找出关系类型为KNOWS的关系
        return relationship
    
    # 判断关系是否存在
    def has_relationship(self,firstNode, finalNode, rel):
       query = f"MATCH {firstNode}-[r:{rel}]->{finalNode} RETURN r"
       relationships = self.driver.run(query)
       if len(list(relationships)) > 0:
           print('关系已经存在')
           return True
       else:
           return False
        






    
 
if __name__ == '__main__':
    data = json2triple(read_json('data//author//json//鲍圭埃特.json')["data"])

    # neo4j操作
    education_graph = EducationGraph(host=自己的host,port=端口号,username=用户名,password=密码)

    for item in data:
        start_value = None
        end_value = None
        for key, value in item[0].items():
            start_value = [key,{"name":value}]
        for key, value in item[2].items():
            end_value = [key, {"name":value}]
        # print(start_value)

        start_node = education_graph.create_node(start_value[0],start_value[1])
        relationship = item[1]
        end_node = education_graph.create_node(end_value[0],end_value[1])
        education_graph.create_relationship_by_node(start_node,relationship,end_node)