Gremlin创建schema(包括实体和关系)

1、构建图谱schema,流程包括图创建、实体构建以及关系构建。

      创建图时需要指定图库名称以及主键字段。

       实体构建时需要指定主键字段,每个属性需要指定数据类型,是否非空以及默认值。关系构建时需要包括关系名称、指向头实体的标签,指向尾实体的标签等字段。

Java代码展示:

import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
public class AddSchema {
    public static void main(String[] args) throws Exception {
        try{
            String address = "localhost";
            int port = 9999;
            String username = "";
            String password = "";
            String database = "实体1";
            GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);
            Client client = cluster.connect(true);
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("graph=GridGraphFactory.createGraph('%s');graph.createPrimaryKey('id');", database));
            sb.append("equipSchema = graph.createSchema('实体1', SchemaType.VERTEX);");
            sb.append("equipSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");
            sb.append("equipSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");

            sb.append("manuSchema = graph.createSchema('实体2', SchemaType.VERTEX);");
            sb.append("manuSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");
            sb.append("manuSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");

            sb.append("subSchema = graph.createSchema('实体3', SchemaType.VERTEX);");
            sb.append("subSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");
            sb.append("subSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");

            sb.append("relSchema = graph.createSchema('关系', SchemaType.EDGE);");
            sb.append("relSchema.createProperty('h_table', GridDataType.STRING, false, false, null);");
            sb.append("relSchema.createProperty('t_table', GridDataType.STRING, false, false, null);");
            sb.append("relSchema.createProperty('r', GridDataType.STRING, false, false, null);");
            client.submit(sb.toString());
            client.close();
        } catch (GridGraphAuthenticationException e) {
            throw new RuntimeException(e);
        }
    }
}

Python代码展示:

#encoding=utf8
from gremlin_python.driver import client

if __name__ == '__main__':
    client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')
    database = "实体1"
    prefix = f"graph=GridGraphFactory.createGraph('{database}');graph.createPrimaryKey('id');"    client.submit(prefix +
    f'''
    equipSchema = graph.createSchema('实体1', SchemaType.VERTEX);
    equipSchema.createProperty('属性1', GridDataType.STRING, false, false, null);
    equipSchema.createProperty('属性2', GridDataType.STRING, false, false, null);

    manuSchema = graph.createSchema('实体2', SchemaType.VERTEX);
    manuSchema.createProperty('属性1', GridDataType.STRING, false, false, null);
    manuSchema.createProperty('属性2', GridDataType.STRING, false, false, null);

    subSchema = graph.createSchema('实体3', SchemaType.VERTEX);
    subSchema.createProperty('属性1', GridDataType.STRING, false, false, null);
    subSchema.createProperty('属性2', GridDataType.STRING, false, false, null);
    
    relSchema = graph.createSchema('关系', SchemaType.EDGE)
    relSchema.createProperty('h_table', GridDataType.STRING, false, false, null)
    relSchema.createProperty('t_table', GridDataType.STRING, false, false, null)
    relSchema.createProperty('r', GridDataType.STRING, false, false, null)
    ''')
    client.close()

2、填入数据,添加实体时,需要指定实体主键、属性以及对应的属性值,添加关系时,需要指定头实体、尾实体以及关系名,最终形成知识图谱。

Java代码展示:

import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
public class AddData {
    public static void main(String[] args) throws Exception {
        try{
            String address = "localhost";
            int port = 9999;
            String username = "";
            String password = "";
            String database = "实体1";
            GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);
            Client client = cluster.connect(true);
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("graph=GridGraphFactory.openGraph('%s');", database));
            sb.append("e1 = graph.addVertex(T.label, '实体1', 'id', '1', '属性1', '1', '属性2', '2');");
            sb.append("e2 = graph.addVertex(T.label, '实体2', 'id', '2', '属性1', '1', '属性2', '2');");
            sb.append("e3 = graph.addVertex(T.label, '实体3', 'id', '3', '属性1', '1', '属性2', '2');");
            sb.append("e1.addEdge('关系', e2, 'h_table', '实体1', 't_table', '实体3', 'r', 'r1');");
            sb.append("e1.addEdge('关系', e3, 'h_table', '实体1', 't_table', '实体3', 'r', 'r2');");
            sb.append("graph.tx().commit();");
            client.submit(sb.toString());
            client.close();

        } catch (GridGraphAuthenticationException e) {
            throw new RuntimeException(e);
        }
    }

Python代码展示:

#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':
    client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')
    database = "实体1"
    prefix = f"graph=GridGraphFactory.openGraph('{database}');g=graph.traversal();"
    client.submit(prefix +
        f'''
             e1 = graph.addVertex(T.label, '实体1', 'id', '1', '属性1', '1', '属性2', '2')         
             e2 = graph.addVertex(T.label, '实体2', 'id', '2', '属性1', '1', '属性2', '2')
             e3 = graph.addVertex(T.label, '实体3', 'id', '3', '属性1', '1', '属性2', '2')
             e1.addEdge('关系', e2, 'h_table', '实体1', 't_table', '实体3', 'r', 'r1')
             e1.addEdge('关系', e3, 'h_table', '实体1', 't_table', '实体3', 'r', 'r2')
             graph.tx().commit();
        '''
    )
    client.close()

3、打印schema结构

对于每一个实体,遍历图数据库中所有的schema,同时遍历每一个schema中的每一个属性,生成“实体类型(属性1,属性2,属性3)”的结构;

对于每一个关系,可以遍历所有关系数据中的 头标签、关系名、尾标签,对其进行去重,生成“头标签--[r:关系名]-->尾标签”的结构。

Java代码展示:

import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
import org.apache.tinkerpop.gremlin.driver.Result;
import org.apache.tinkerpop.gremlin.driver.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class SchemaPrompt {
    public static void main(String[] args) throws Exception {
        try{
            String address = "localhost";
            int port = 9999;
            String username = "";
            String password = "";
            String database = "实体1";
            GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);
            Client client = cluster.connect(true);
            client.submit(String.format("graph=GridGraphFactory.openGraph('%s');g=graph.traversal();return;", database));
            ResultSet resultSet = client.submit("return graph.schemas().stream().map{s -> s.getName()};");
            StringBuilder entity_str = new StringBuilder("实体有:");
            StringBuilder rel_str = new StringBuilder("关系有:");
            List<String> resultList = resultSet.all().get().stream().map(Result::getString).collect(Collectors.toList());
            List<String> entities = new ArrayList<>();
            List<String> rels = new ArrayList<>();
            for(String table:resultList){
                ResultSet typeSet = client.submit(String.format("return graph.getSchema('%s').getType().toString();", table));
                List<String> typeList = typeSet.all().get().stream().map(Result::getString).collect(Collectors.toList());
                if(typeList.get(0).equals("VERTEX")){ //实体类型
                    StringBuilder entity = new StringBuilder();
                    entity.append(table);
                    ResultSet prosSet = client.submit(String.format("return graph.getSchema('%s').getProperties().stream().map{s -> s.getName()};", table));
                    List<String> prosList = prosSet.all().get().stream().map(Result::getString).collect(Collectors.toList());
                    entity.append("(");
                    for(int i = 0; i < prosList.size(); i++) {
                        String pro = prosList.get(i);
                        if (pro.equals("_id"))
                            continue;
                        entity.append(pro);
                        if(i < prosList.size() - 1)
                            entity.append(',');
                    }
                    entity.append(")");
                    entities.add(entity.toString());
                }else if(typeList.get(0).equals("EDGE")){
                    ResultSet relationSet = client.submit(String.format("return g.E().hasLabel('%s').valueMap('h_table', 'r', 't_table').dedup();", table));
                    List<Result> relationList = relationSet.all().get();
                    for(Result rel:relationList){
                        Map<String, String> a_rel = rel.get(Map.class);
                        String h_table = a_rel.get("h_table");
                        String t_table = a_rel.get("t_table");
                        String r = a_rel.get("r");
                        rels.add(String.format("%s--[r:%s]->%s", h_table, r, t_table));
                    }
                }
            }
            entity_str.append(String.join(",", entities));
            rel_str.append(String.join(",", rels));
            System.out.println(entity_str);
            System.out.println(rel_str);
client.close();
        } catch (GridGraphAuthenticationException e) {
            throw new RuntimeException(e);
        }
    }
}

Python代码展示:

#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':
    client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')
    database = "实体1"
    prefix = f'graph=GridGraphFactory.openGraph("{database}");g=graph.traversal();'
    tables = client.submit(prefix + "graph.schemas().stream().map{s -> s.getName()};").all().result()
    entities, rels = [], []
    for table in tables:
        tp = client.submit(prefix + f"graph.getSchema('{table}').getType().toString();").one()[0]
        if tp == 'VERTEX': #实体类型
            pros = client.submit(prefix + f"graph.getSchema('{table}')" + ".getProperties().stream().map{s -> s.getName()};").all().result()
            pros = [pro for pro in pros if pro != '_id']
            entities.append(f"{table}({','.join(pros)})")
        elif tp == 'EDGE':
            rs = client.submit(prefix + f"g.E().hasLabel('{table}').valueMap('h_table', 'r', 't_table').dedup();").all().result()
            for mp in rs:
                rels.append(f"{mp['h_table']}--[r:{mp['r']}]-->{mp['t_table']}")
'''            
    也可以用:g.E().hasLabel('关系').toList().stream().map{e -> e.outVertex().schema().getName()+"--[r:"+e.values('r').next()+ "]-->"  +  e.inVertex().schema().getName()}.distinct()
'''
    print("实体有:" + ",".join(entities))
    print("关系有:" + ",".join(rels))
    client.close()

posted on 2025-06-11 17:08  sw-lab  阅读(15)  评论(0)    收藏  举报  来源

导航