docker run -it -d \
--publish=7474:7474 --publish=7687:7687 \
--volume=/home/abc/neo4jdata:/data \
--volume=/home/abc/neo4jimport:/var/lib/neo4j/import \
neo4j
#neo4j
LOAD CSV WITH HEADERS FROM "file:///user.txt" AS row
CREATE (:UserPerson {id: row.id, user_id: row.user_id, user_name: row.user_name});
CREATE INDEX ON :UserPerson(id);
LOAD CSV WITH HEADERS FROM "file:///gx.txt" AS row
MATCH (entity1:UserPerson{id:row.father_id}) , (entity2:UserPerson{id:row.id})
create (entity1)-[r:gx]->(entity2)
//返回 张莱柠 下三层数据
MATCH p=(n:UserPerson{user_name:'张莱柠'})-[*..3]->() return p
match (n:UserPerson{user_name:'张莱柠'})-[*..3]->(result) return count(result)
//返回所有节点含自己
match (n:UserPerson{user_name:'张莱柠'})-[*0..]->(result) return count(result)
//返回所有节点不含自己
match (n:UserPerson{user_name:'张莱柠'})-[*1..]->(result) return count(result)
//返回 张莱柠 下最深的路径
MATCH path=(person:UserPerson)-[:gx*]->(x)
WHERE person.user_name = '张莱柠' AND NOT EXISTS((x)-[:gx]->())
WITH person, path
ORDER BY LENGTH(path) DESC
LIMIT 1
RETURN person, [n IN NODES(path) | n.user_name] AS ids, LENGTH(path) AS depth
//返回 下所有子节点
MATCH path=(person:UserPerson)-[:gx*]->(x)
WHERE person.user_name = '张莱柠'
return count(x)
# 使用函数,查询张莱柠与张三的最短路径
match p=shortestpath((:UserPerson{user_name:"张莱柠"})-[*..5]->(:UserPerson{user_name:"张三"})) return p;
# 更新数据
MATCH (n:node)-[]->()
WITH n, count(*) as c
SET n.data = c
添加节点
await using var driver= GraphDatabase.Driver("bolt:/127.0.0.1:7687", AuthTokens.Basic("neo4j", "admin1234567"));
var session = driver.AsyncSession();
var tx = await session.BeginTransactionAsync();
var idx = 1;
foreach (tree tree in data)
{
idx++;
await tx.RunAsync("CREATE (:Person {node_id:$node_id},address: $address});", new
{
node_id = tree.node_id,
address = tree.address
});
if (idx%500==0)
{
await tx.CommitAsync();
tx = await session.BeginTransactionAsync();
}
}
await tx.CommitAsync();
添加关系
await using var driver= GraphDatabase.Driver("bolt:/127.0.0.1:7687", AuthTokens.Basic("neo4j", "admin1234567"));
var session = driver.AsyncSession();
var tx = await session.BeginTransactionAsync();
var idx = 1;
foreach (tree tree in data)
{
idx++;
if (tree.node_id==tree.parent_id)
{
continue;
}
await tx.RunAsync("MATCH (a), (b) WHERE ID(a) = $startNodeId AND ID(b) = $endNodeId CREATE (a)-[r:F]->(b)",new
{
startNodeId=tree.parent_id,
endNodeId=tree.node_id
});
if (idx%500==0)
{
await tx.CommitAsync();
tx = await session.BeginTransactionAsync();
}
}
await tx.CommitAsync();
查询id 50 下最大路径
MATCH path=(person:Person)-[:F*]->(x)
WHERE person.node_id = 50 AND NOT EXISTS((x)-[:F]->())
WITH person, path
ORDER BY LENGTH(path) DESC
LIMIT 1
RETURN person, [n IN NODES(path) | n.node_id] AS ids, LENGTH(path) AS depth
更新所有节点深度数据
MATCH path=(person:Person)-[:F*]->(x)
WHERE NOT EXISTS((x)-[:F]->())
WITH person, path, LENGTH(path) AS depth
SET person.depth = depth
python 版本
import pyarrow
import pandas as pd
import glob
import time
import duckdb
import networkx as nx
#df = pd.read_csv('*.csv')
#-------------------
conn = duckdb.connect("depth.duckdb")
#conn.execute("CREATE TABLE rs AS FROM '*.csv';")
df=conn.execute("select * from rs").df()
display(df.head())
#conn.execute("ALTER TABLE rs ADD COLUMN depth INTEGER;")
#-------------------
# 创建一个空的有向图
graph = nx.DiGraph()
# 添加节点
for _, row in df.iterrows():
node_id = row['Id']
# attributes = row.drop(['Id', 'Parentid']).to_dict()
attributes=row.to_dict()
graph.add_node(node_id, **attributes)
# 添加边
for _, row in df.iterrows():
node_id = row['Id']
parent_id = row['Parentid']
if parent_id != 0 and node_id!=parent_id: # 如果 parentid 不为 0,则添加边
graph.add_edge(parent_id, node_id)
# 计算节点的最大深度
def compute_max_depth(node):
if graph.out_degree(node) == 0:
return 0
else:
max_depth = 0
for successor in graph.successors(node):
depth = compute_max_depth(successor)
max_depth = max(max_depth, depth)
return max_depth + 1
# 为每个节点添加最大深度属性
for node in graph.nodes():
max_depth = compute_max_depth(node)
graph.nodes[node]['depth'] = max_depth
for node, attributes in graph.nodes(data=True):
print(f"节点 {node} 的属性:{attributes}")
if node>5 :
break
# df = pd.DataFrame()
node_attributes = []
# 遍历图中的节点,并将属性添加到 DataFrame 中
for node, attributes in graph.nodes(data=True):
# attributes['id'] = node # 添加'id'列
# df = df.append(attributes, ignore_index=True)
node_attributes.append(attributes)
df = pd.DataFrame(node_attributes)
display(df.head())
conn.close()
# 指定要查找的起始节点
start_node = 1
# 使用 successors() 方法查找起始节点下的所有节点
all_nodes = nx.descendants(graph, start_node)
# 打印找到的所有节点
print(f"节点 {start_node} 下的所有节点:")
for node in all_nodes:
print(node)
import sqlite3
conn3 = sqlite3.connect('database.db')
df.to_sql('table_name', conn3, if_exists='replace', index=False)
conn3.close()
matplotlib 修复
import matplotlib
print(matplotlib.get_cachedir())
rm -rf /Users/xxx/.matplotlib
# 复制C:\Windows\Fonts\simhei.ttf C:\Users\admin\mambaforge\Lib\site-packages\matplotlib\mpl-data\fonts\ttf\simhei.ttf
画图
import matplotlib.pyplot as plt
import random
def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5):
'''
From Joel's answer at https://stackoverflow.com/a/29597209/2966723.
Licensed under Creative Commons Attribution-Share Alike
If the graph is a tree this will return the positions to plot this in a
hierarchical layout.
G: the graph (must be a tree)
root: the root node of current branch
- if the tree is directed and this is not given,
the root will be found and used
- if the tree is directed and this is given, then
the positions will be just for the descendants of this node.
- if the tree is undirected and not given,
then a random choice will be used.
width: horizontal space allocated for this branch - avoids overlap with other branches
vert_gap: gap between levels of hierarchy
vert_loc: vertical location of root
xcenter: horizontal location of root
'''
if not nx.is_tree(G):
raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')
if root is None:
if isinstance(G, nx.DiGraph):
root = next(iter(nx.topological_sort(G))) #allows back compatibility with nx version 1.11
else:
root = random.choice(list(G.nodes))
def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):
'''
see hierarchy_pos docstring for most arguments
pos: a dict saying where all nodes go if they have been assigned
parent: parent of this branch. - only affects it if non-directed
'''
if pos is None:
pos = {root:(xcenter,vert_loc)}
else:
pos[root] = (xcenter, vert_loc)
children = list(G.neighbors(root))
if not isinstance(G, nx.DiGraph) and parent is not None:
children.remove(parent)
if len(children)!=0:
dx = width/len(children)
nextx = xcenter - width/2 - dx/2
for child in children:
nextx += dx
pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap,
vert_loc = vert_loc-vert_gap, xcenter=nextx,
pos=pos, parent = root)
return pos
return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
start_node = 256
subgraph_nodes = nx.descendants(graph, start_node)
subgraph = graph.subgraph(subgraph_nodes.union([start_node]))
pos = nx.fruchterman_reingold_layout(subgraph)
nx.draw_networkx(subgraph, pos, with_labels=True, node_color='lightblue', node_size=200, font_size=12, arrows=True)
# 其他绘图设置
plt.title(f"节点 {start_node} 下的组织架构图")
plt.axis('off')
plt.show()
plt. figure(figsize=(20,8))
# 定义布局
pos = hierarchy_pos(subgraph)
# plt.gca().invert_yaxis()
# 绘制图形
nx.draw_networkx(subgraph, pos, with_labels=True, node_color='lightblue', node_size=200, font_size=12, arrows=True)
# 其他绘图设置
plt.title(f"节点 {start_node} 下的组织架构图")
plt.axis('off')
plt.show()