Gremlin创建schema(包括实体和关系)
1、构建图谱schema,流程包括图创建、实体构建以及关系构建。
创建图时需要指定图库名称以及主键字段。
实体构建时需要指定主键字段,每个属性需要指定数据类型,是否非空以及默认值。关系构建时需要包括关系名称、指向头实体的标签,指向尾实体的标签等字段。
Java代码展示:
import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
public class AddSchema {
public static void main(String[] args) throws Exception {
try{
String address = "localhost";
int port = 9999;
String username = "";
String password = "";
String database = "实体1";
GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);
Client client = cluster.connect(true);
StringBuilder sb = new StringBuilder();
sb.append(String.format("graph=GridGraphFactory.createGraph('%s');graph.createPrimaryKey('id');", database));
sb.append("equipSchema = graph.createSchema('实体1', SchemaType.VERTEX);");
sb.append("equipSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");
sb.append("equipSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");
sb.append("manuSchema = graph.createSchema('实体2', SchemaType.VERTEX);");
sb.append("manuSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");
sb.append("manuSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");
sb.append("subSchema = graph.createSchema('实体3', SchemaType.VERTEX);");
sb.append("subSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");
sb.append("subSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");
sb.append("relSchema = graph.createSchema('关系', SchemaType.EDGE);");
sb.append("relSchema.createProperty('h_table', GridDataType.STRING, false, false, null);");
sb.append("relSchema.createProperty('t_table', GridDataType.STRING, false, false, null);");
sb.append("relSchema.createProperty('r', GridDataType.STRING, false, false, null);");
client.submit(sb.toString());
client.close();
} catch (GridGraphAuthenticationException e) {
throw new RuntimeException(e);
}
}
}
Python代码展示:
#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':
client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')
database = "实体1"
prefix = f"graph=GridGraphFactory.createGraph('{database}');graph.createPrimaryKey('id');" client.submit(prefix +
f'''
equipSchema = graph.createSchema('实体1', SchemaType.VERTEX);
equipSchema.createProperty('属性1', GridDataType.STRING, false, false, null);
equipSchema.createProperty('属性2', GridDataType.STRING, false, false, null);
manuSchema = graph.createSchema('实体2', SchemaType.VERTEX);
manuSchema.createProperty('属性1', GridDataType.STRING, false, false, null);
manuSchema.createProperty('属性2', GridDataType.STRING, false, false, null);
subSchema = graph.createSchema('实体3', SchemaType.VERTEX);
subSchema.createProperty('属性1', GridDataType.STRING, false, false, null);
subSchema.createProperty('属性2', GridDataType.STRING, false, false, null);
relSchema = graph.createSchema('关系', SchemaType.EDGE)
relSchema.createProperty('h_table', GridDataType.STRING, false, false, null)
relSchema.createProperty('t_table', GridDataType.STRING, false, false, null)
relSchema.createProperty('r', GridDataType.STRING, false, false, null)
''')
client.close()
2、填入数据,添加实体时,需要指定实体主键、属性以及对应的属性值,添加关系时,需要指定头实体、尾实体以及关系名,最终形成知识图谱。
Java代码展示:
import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
public class AddData {
public static void main(String[] args) throws Exception {
try{
String address = "localhost";
int port = 9999;
String username = "";
String password = "";
String database = "实体1";
GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);
Client client = cluster.connect(true);
StringBuilder sb = new StringBuilder();
sb.append(String.format("graph=GridGraphFactory.openGraph('%s');", database));
sb.append("e1 = graph.addVertex(T.label, '实体1', 'id', '1', '属性1', '1', '属性2', '2');");
sb.append("e2 = graph.addVertex(T.label, '实体2', 'id', '2', '属性1', '1', '属性2', '2');");
sb.append("e3 = graph.addVertex(T.label, '实体3', 'id', '3', '属性1', '1', '属性2', '2');");
sb.append("e1.addEdge('关系', e2, 'h_table', '实体1', 't_table', '实体3', 'r', 'r1');");
sb.append("e1.addEdge('关系', e3, 'h_table', '实体1', 't_table', '实体3', 'r', 'r2');");
sb.append("graph.tx().commit();");
client.submit(sb.toString());
client.close();
} catch (GridGraphAuthenticationException e) {
throw new RuntimeException(e);
}
}
Python代码展示:
#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':
client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')
database = "实体1"
prefix = f"graph=GridGraphFactory.openGraph('{database}');g=graph.traversal();"
client.submit(prefix +
f'''
e1 = graph.addVertex(T.label, '实体1', 'id', '1', '属性1', '1', '属性2', '2')
e2 = graph.addVertex(T.label, '实体2', 'id', '2', '属性1', '1', '属性2', '2')
e3 = graph.addVertex(T.label, '实体3', 'id', '3', '属性1', '1', '属性2', '2')
e1.addEdge('关系', e2, 'h_table', '实体1', 't_table', '实体3', 'r', 'r1')
e1.addEdge('关系', e3, 'h_table', '实体1', 't_table', '实体3', 'r', 'r2')
graph.tx().commit();
'''
)
client.close()
3、打印schema结构
对于每一个实体,遍历图数据库中所有的schema,同时遍历每一个schema中的每一个属性,生成“实体类型(属性1,属性2,属性3)”的结构;
对于每一个关系,可以遍历所有关系数据中的 头标签、关系名、尾标签,对其进行去重,生成“头标签--[r:关系名]-->尾标签”的结构。
Java代码展示:
import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
import org.apache.tinkerpop.gremlin.driver.Result;
import org.apache.tinkerpop.gremlin.driver.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class SchemaPrompt {
public static void main(String[] args) throws Exception {
try{
String address = "localhost";
int port = 9999;
String username = "";
String password = "";
String database = "实体1";
GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);
Client client = cluster.connect(true);
client.submit(String.format("graph=GridGraphFactory.openGraph('%s');g=graph.traversal();return;", database));
ResultSet resultSet = client.submit("return graph.schemas().stream().map{s -> s.getName()};");
StringBuilder entity_str = new StringBuilder("实体有:");
StringBuilder rel_str = new StringBuilder("关系有:");
List<String> resultList = resultSet.all().get().stream().map(Result::getString).collect(Collectors.toList());
List<String> entities = new ArrayList<>();
List<String> rels = new ArrayList<>();
for(String table:resultList){
ResultSet typeSet = client.submit(String.format("return graph.getSchema('%s').getType().toString();", table));
List<String> typeList = typeSet.all().get().stream().map(Result::getString).collect(Collectors.toList());
if(typeList.get(0).equals("VERTEX")){ //实体类型
StringBuilder entity = new StringBuilder();
entity.append(table);
ResultSet prosSet = client.submit(String.format("return graph.getSchema('%s').getProperties().stream().map{s -> s.getName()};", table));
List<String> prosList = prosSet.all().get().stream().map(Result::getString).collect(Collectors.toList());
entity.append("(");
for(int i = 0; i < prosList.size(); i++) {
String pro = prosList.get(i);
if (pro.equals("_id"))
continue;
entity.append(pro);
if(i < prosList.size() - 1)
entity.append(',');
}
entity.append(")");
entities.add(entity.toString());
}else if(typeList.get(0).equals("EDGE")){
ResultSet relationSet = client.submit(String.format("return g.E().hasLabel('%s').valueMap('h_table', 'r', 't_table').dedup();", table));
List<Result> relationList = relationSet.all().get();
for(Result rel:relationList){
Map<String, String> a_rel = rel.get(Map.class);
String h_table = a_rel.get("h_table");
String t_table = a_rel.get("t_table");
String r = a_rel.get("r");
rels.add(String.format("%s--[r:%s]->%s", h_table, r, t_table));
}
}
}
entity_str.append(String.join(",", entities));
rel_str.append(String.join(",", rels));
System.out.println(entity_str);
System.out.println(rel_str);
client.close();
} catch (GridGraphAuthenticationException e) {
throw new RuntimeException(e);
}
}
}
Python代码展示:
#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':
client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')
database = "实体1"
prefix = f'graph=GridGraphFactory.openGraph("{database}");g=graph.traversal();'
tables = client.submit(prefix + "graph.schemas().stream().map{s -> s.getName()};").all().result()
entities, rels = [], []
for table in tables:
tp = client.submit(prefix + f"graph.getSchema('{table}').getType().toString();").one()[0]
if tp == 'VERTEX': #实体类型
pros = client.submit(prefix + f"graph.getSchema('{table}')" + ".getProperties().stream().map{s -> s.getName()};").all().result()
pros = [pro for pro in pros if pro != '_id']
entities.append(f"{table}({','.join(pros)})")
elif tp == 'EDGE':
rs = client.submit(prefix + f"g.E().hasLabel('{table}').valueMap('h_table', 'r', 't_table').dedup();").all().result()
for mp in rs:
rels.append(f"{mp['h_table']}--[r:{mp['r']}]-->{mp['t_table']}")
'''
也可以用:g.E().hasLabel('关系').toList().stream().map{e -> e.outVertex().schema().getName()+"--[r:"+e.values('r').next()+ "]-->" + e.inVertex().schema().getName()}.distinct()
'''
print("实体有:" + ",".join(entities))
print("关系有:" + ",".join(rels))
client.close()
浙公网安备 33010602011771号