python迁移milvus数据

环境:

Python:3.11

 

from pymilvus import connections, Collection, utility

# ------------------ 配置 ------------------
COLLECTION_NAME = "my_collection"
BATCH_SIZE = 20   # 每次迭代多少条

# A 服务器
A_HOST, A_PORT,A_DBNAME = "192.168.1.134", "19530","db_test"
A_USER, A_PASS = "root", "Milvus"

# B 服务器
B_HOST, B_PORT,B_DBNAME = "192.168.1.135", "19530","db_test"
B_USER, B_PASS = "root", "Milvus"
# ------------------------------------------

# 连接到 A、B
connections.connect(
    alias="A",
    host=A_HOST,
    port=A_PORT,
    user=A_USER,
    password=A_PASS,
    db_name=A_DBNAME
)

connections.connect(
    alias="B",
    host=B_HOST,
    port=B_PORT,
    user=B_USER,
    password=B_PASS,
    db_name=B_DBNAME
)

# 打开 A 上的集合
collection_A = Collection(COLLECTION_NAME, using="A")
schema = collection_A.schema

# 在 B 上建同样的集合(如果不存在)
if not utility.has_collection(COLLECTION_NAME, using="B"):
    collection_B = Collection(
        name=COLLECTION_NAME,
        schema=schema,
        using="B"
    )
else:
    collection_B = Collection(COLLECTION_NAME, using="B")

# 构造 Query Iterator
output_fields = [f.name for f in schema.fields]
iterator = collection_A.query_iterator(
    expr="",   # 空条件,表示全量数据
    output_fields=output_fields,
    batch_size=BATCH_SIZE
)

# 循环读取并插入到 B
count = 0
while True:
    batch = iterator.next()
    if not batch:
        break

    # 转换成 insert 格式(列式存储)
    entities = []
    for field in schema.fields:
        entities.append([row[field.name] for row in batch])

    collection_B.insert(entities)
    count += len(batch)
    print(f"已迁移 {count} 条数据...")

iterator.close()

# (可选)在 B 上创建索引和加载
# collection_B.create_index("vector_field_name", {...})
# collection_B.load()

print("数据迁移完成")

 

posted @ 2025-08-18 17:05  slnngk  阅读(29)  评论(0)    收藏  举报