帮我写个json字符串 包含两个数组 每个数组包含多个对象
针对PostgreSQL 12版本,我来修改存储过程的实现。PostgreSQL的JSON处理语法与MySQL有所不同。
- 创建两张表
-- 创建用户表
CREATE TABLE users (
id INT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(150) UNIQUE NOT NULL,
isActive BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 创建产品表
CREATE TABLE products (
productId VARCHAR(20) PRIMARY KEY,
productName VARCHAR(200) NOT NULL,
price DECIMAL(10,2) NOT NULL,
category VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
2. 创建存储过程(PostgreSQL版本)
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
user_record JSON;
product_record JSON;
BEGIN
-- 开始事务
BEGIN
-- 清空表数据(如果需要保留历史数据,请删除这两行)
DELETE FROM products;
DELETE FROM users;
-- 插入用户数据
FOR user_record IN
SELECT * FROM json_array_elements(p_json_data::json->'users')
LOOP
INSERT INTO users (id, name, email, isActive)
VALUES (
(user_record->>'id')::INTEGER,
user_record->>'name',
user_record->>'email',
(user_record->>'isActive')::BOOLEAN
);
END LOOP;
-- 插入产品数据
FOR product_record IN
SELECT * FROM json_array_elements(p_json_data::json->'products')
LOOP
INSERT INTO products (productId, productName, price, category)
VALUES (
product_record->>'productId',
product_record->>'productName',
(product_record->>'price')::DECIMAL,
product_record->>'category'
);
END LOOP;
-- 提交事务
RETURN 1; -- 成功返回1
EXCEPTION
WHEN OTHERS THEN
-- 回滚事务
RAISE NOTICE '错误: %', SQLERRM;
RETURN 0; -- 失败返回0
END;
END;
$$;
3. 更高效的批量插入版本(推荐)
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 开始事务
BEGIN
-- 清空表数据(如果需要保留历史数据,请删除这两行)
DELETE FROM products;
DELETE FROM users;
-- 批量插入用户数据(更高效的方式)
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM json_array_elements(p_json_data::json->'users');
-- 批量插入产品数据(更高效的方式)
INSERT INTO products (productId, productName, price, category)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category'
FROM json_array_elements(p_json_data::json->'products');
-- 提交事务
RETURN 1; -- 成功返回1
EXCEPTION
WHEN OTHERS THEN
-- 回滚事务(由于是异常块,会自动回滚)
RAISE NOTICE '插入数据时发生错误: %', SQLERRM;
RETURN 0; -- 失败返回0
END;
END;
$$;
4. 使用示例
-- 调用存储函数
DO $$
DECLARE
result INTEGER;
json_data TEXT;
BEGIN
json_data := '{
"users": [
{
"id": 1,
"name": "张三",
"email": "zhangsan@example.com",
"isActive": true
},
{
"id": 2,
"name": "李四",
"email": "lisi@example.com",
"isActive": false
},
{
"id": 3,
"name": "王五",
"email": "wangwu@example.com",
"isActive": true
}
],
"products": [
{
"productId": "P001",
"productName": "笔记本电脑",
"price": 5999.99,
"category": "电子产品"
},
{
"productId": "P002",
"productName": "办公椅",
"price": 899.5,
"category": "家具"
}
]
}';
result := InsertFromJSON(json_data);
RAISE NOTICE '执行结果: %', result;
END
$$;
-- 或者直接调用
SELECT InsertFromJSON('{"users":[...],"products":[...]}');
5. 验证插入结果
-- 查看用户表数据
SELECT * FROM users;
-- 查看产品表数据
SELECT * FROM products;
PostgreSQL版本特点:
使用函数而非存储过程:PostgreSQL中通常使用函数来返回值
JSON处理语法:使用 ::json转换,->>提取文本值
json_array_elements函数:用于展开JSON数组
异常处理:使用EXCEPTION块捕获错误
批量插入:推荐版本使用更高效的批量插入方式
这个PostgreSQL版本的函数能够正确处理JSON数据,并返回1(成功)或0(失败)。
如何json_data是程序调用的给的值 如何调用InsertFromJSON存储过程
在PostgreSQL中,程序调用存储过程(函数)有几种方式。以下是详细的调用方法:
- 在SQL中直接调用
-- 方式1:直接传递JSON字符串
SELECT InsertFromJSON('{
"users": [
{
"id": 1,
"name": "张三",
"email": "zhangsan@example.com",
"isActive": true
}
],
"products": [
{
"productId": "P001",
"productName": "笔记本电脑",
"price": 5999.99,
"category": "电子产品"
}
]
}');
-- 方式2:使用变量
DO $$
DECLARE
result INTEGER;
json_data TEXT;
BEGIN
json_data := '{"users": [{"id": 1, "name": "张三", "email": "zhangsan@example.com", "isActive": true}], "products": [{"productId": "P001", "productName": "笔记本电脑", "price": 5999.99, "category": "电子产品"}]}';
result := InsertFromJSON(json_data);
RAISE NOTICE '执行结果: %', result;
END
$$;
2. 在应用程序中调用(不同语言示例)
Python (使用psycopg2)
import psycopg2
import json
def call_insert_from_json(json_data):
try:
# 连接数据库
conn = psycopg2.connect(
host="localhost",
database="your_database",
user="your_username",
password="your_password"
)
cursor = conn.cursor()
# 调用存储函数
cursor.execute("SELECT InsertFromJSON(%s)", (json.dumps(json_data),))
result = cursor.fetchone()[0]
conn.commit()
print(f"执行结果: {result}")
return result
except Exception as e:
print(f"错误: {e}")
return 0
finally:
if conn:
cursor.close()
conn.close()
使用示例
data = {
"users": [
{
"id": 1,
"name": "张三",
"email": "zhangsan@example.com",
"isActive": True
}
],
"products": [
{
"productId": "P001",
"productName": "笔记本电脑",
"price": 5999.99,
"category": "电子产品"
}
]
}
result = call_insert_from_json(data)
Java (使用JDBC)
import java.sql.*;
import org.json.JSONObject;
public class PostgresJSONExample {
public static int callInsertFromJSON(String jsonData) {
String url = "jdbc:postgresql://localhost:5432/your_database";
String user = "your_username";
String password = "your_password";
try (Connection conn = DriverManager.getConnection(url, user, password)) {
String sql = "SELECT InsertFromJSON(?)";
try (PreparedStatement pstmt = conn.prepareStatement(sql)) {
pstmt.setString(1, jsonData);
try (ResultSet rs = pstmt.executeQuery()) {
if (rs.next()) {
int result = rs.getInt(1);
System.out.println("执行结果: " + result);
return result;
}
}
}
} catch (SQLException e) {
e.printStackTrace();
}
return 0;
}
public static void main(String[] args) {
JSONObject data = new JSONObject();
// 构建JSON数据...
callInsertFromJSON(data.toString());
}
}
Node.js (使用pg库)
const { Client } = require('pg');
async function callInsertFromJSON(jsonData) {
const client = new Client({
host: 'localhost',
database: 'your_database',
user: 'your_username',
password: 'your_password'
});
try {
await client.connect();
const result = await client.query(
'SELECT InsertFromJSON($1)',
[JSON.stringify(jsonData)]
);
console.log('执行结果:', result.rows[0].insertfromjson);
return result.rows[0].insertfromjson;
} catch (error) {
console.error('错误:', error);
return 0;
} finally {
await client.end();
}
}
// 使用示例
const data = {
users: [
{
id: 1,
name: "张三",
email: "zhangsan@example.com",
isActive: true
}
],
products: [
{
productId: "P001",
productName: "笔记本电脑",
price: 5999.99,
"category": "电子产品"
}
]
};
callInsertFromJSON(data);
PHP (使用PDO)
- 重要注意事项
JSON字符串转义:确保JSON字符串正确转义,特别是包含引号时
字符编码:确保使用UTF-8编码以避免中文乱码
事务处理:函数内部已经包含事务,应用程序中不需要额外的事务控制
错误处理:检查返回值,1表示成功,0表示失败 - 简化调用示例
如果JSON数据已经在程序中构建好对象,只需要:
Python简化示例
import json
data = {"users": [...], "products": [...]}
result = call_insert_from_json(data)
这样,您的程序就可以方便地调用PostgreSQL存储函数来处理JSON数据了。
如何p_json_data TEXT 的传值很大的时候 会有什么影响吗?
当 p_json_data TEXT参数传递很大的JSON数据时,确实会有一些影响和考虑因素:
-
性能影响
内存使用
-- 大JSON会占用更多数据库内存
-- 解析大JSON时,PostgreSQL需要:
-- 1. 分配内存存储整个JSON字符串
-- 2. 解析JSON结构
-- 3. 创建临时数据结构
处理时间
JSON解析时间:大JSON的解析时间会线性增长
数据库操作:大量INSERT操作需要更多时间 -
改进方案:分批处理
方案1:修改函数支持分批处理
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 100 -- 每批处理数量
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
users_count INTEGER;
products_count INTEGER;
i INTEGER := 0;
BEGIN
-- 获取用户和产品数量
SELECT COUNT(*) INTO users_count
FROM json_array_elements(p_json_data::json->'users');SELECT COUNT(*) INTO products_count
FROM json_array_elements(p_json_data::json->'products');RAISE NOTICE '总数据量: 用户 %, 产品 %', users_count, products_count;
-- 分批处理用户数据
FOR i IN 0..(users_count/p_batch_size) LOOP
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM json_array_elements(p_json_data::json->'users')
LIMIT p_batch_size OFFSET i * p_batch_size;RAISE NOTICE '已处理用户批次 %', i+1;END LOOP;
-- 分批处理产品数据(类似逻辑)
-- ...RETURN 1;
END;
$$;
方案2:使用游标处理大JSON
CREATE OR REPLACE FUNCTION InsertFromJSONStream(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
user_item JSON;
product_item JSON;
processed_count INTEGER := 0;
batch_size INTEGER := 500; -- 每批提交数量
BEGIN
-- 处理用户数据
FOR user_item IN
SELECT * FROM json_array_elements(p_json_data::json->'users')
LOOP
INSERT INTO users (id, name, email, isActive)
VALUES (
(user_item->>'id')::INTEGER,
user_item->>'name',
user_item->>'email',
(user_item->>'isActive')::BOOLEAN
);processed_count := processed_count + 1; -- 每处理一定数量提交一次 IF processed_count % batch_size = 0 THEN COMMIT; RAISE NOTICE '已提交 % 条用户记录', processed_count; END IF;END LOOP;
-- 重置计数器处理产品数据
processed_count := 0;
FOR product_item IN
SELECT * FROM json_array_elements(p_json_data::json->'products')
LOOP
INSERT INTO products (productId, productName, price, category)
VALUES (
product_item->>'productId',
product_item->>'productName',
(product_item->>'price')::DECIMAL,
product_item->>'category'
);processed_count := processed_count + 1; IF processed_count % batch_size = 0 THEN COMMIT; RAISE NOTICE '已提交 % 条产品记录', processed_count; END IF;END LOOP;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
ROLLBACK;
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$; -
参数大小限制
PostgreSQL参数限制
-- 查看当前设置
SHOW max_allowed_packet; -- MySQL的类似概念
SHOW shared_buffers; -- 共享缓冲区大小
-- PostgreSQL实际限制:
-- TEXT类型理论上最大1GB,但实际受以下限制:
-- 1. max_allowed_packet(如果通过网络传输)
-- 2. 内存大小
-- 3. 客户端/服务器配置
4. 替代方案:外部文件或流式处理
方案1:从文件读取
CREATE OR REPLACE FUNCTION InsertFromJSONFile(file_path TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
file_content TEXT;
BEGIN
-- 使用pg_read_file读取文件(需要超级用户权限)
file_content := pg_read_file(file_path);
RETURN InsertFromJSON(file_content);
END;
$$;
方案2:使用COPY命令(推荐用于大数据量)
-- 先将JSON保存为文件,然后使用COPY
COPY users FROM '/path/to/users.csv' WITH CSV;
COPY products FROM '/path/to/products.csv' WITH CSV;
5. 应用程序端优化
Python示例:分批发送
import json
import psycopg2
def insert_large_json_batch(data, batch_size=1000):
"""分批处理大JSON数据"""
conn = psycopg2.connect("your_connection_string")
cursor = conn.cursor()
users = data.get('users', [])
products = data.get('products', [])
# 分批处理用户数据
for i in range(0, len(users), batch_size):
batch = users[i:i + batch_size]
batch_data = {'users': batch, 'products': []}
cursor.execute("SELECT InsertFromJSON(%s)", (json.dumps(batch_data),))
conn.commit()
print(f"已处理用户批次 {i//batch_size + 1}")
# 分批处理产品数据(类似)
for i in range(0, len(products), batch_size):
batch = products[i:i + batch_size]
batch_data = {'users': [], 'products': batch}
cursor.execute("SELECT InsertFromJSON(%s)", (json.dumps(batch_data),))
conn.commit()
print(f"已处理产品批次 {i//batch_size + 1}")
cursor.close()
conn.close()
- 建议的最佳实践
数据量评估:
< 10MB:可以直接传递
10MB - 100MB:考虑分批处理
100MB:建议使用文件或流式处理
监控指标:
-- 监控内存使用
SELECT * FROM pg_stat_activity WHERE state = 'active';
-- 监控锁等待
SELECT * FROM pg_locks WHERE granted = false;
配置优化:
-- 增加work_mem(需要重启)
-- 在postgresql.conf中设置:
-- work_mem = 100MB
-- shared_buffers = 1GB
总结:对于大JSON数据,建议使用分批处理或文件导入的方式,避免单次传输和处理过大的数据量。
如果传的json串 有2~3万个对象 这样算大吗?
2~3万个对象的JSON数据算是比较大的数据量了。让我从几个方面来分析:
-
数据量估算
-- 估算JSON大小
-- 假设每个用户对象约100字节,每个产品对象约80字节
-- 2万个用户: 20,000 × 100字节 ≈ 2MB
-- 3万个用户: 30,000 × 100字节 ≈ 3MB
-- 加上产品数据,总大小可能在3-6MB左右 -
潜在问题
性能影响
-- 单次插入2-3万条记录的问题:
-- 1. 长事务:执行时间可能达到数秒到数十秒
-- 2. 锁竞争:长时间锁定表影响其他操作
-- 3. 内存压力:解析大JSON占用较多内存
-- 4. WAL日志:产生大量写前日志
实际测试示例
-- 假设处理3万条记录
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();-- 生成测试数据(3万条)
SELECT json_build_object(
'users', (SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', true
)) FROM generate_series(1, 30000) g)
) INTO big_json;result := InsertFromJSON(big_json);
end_time := clock_timestamp();RAISE NOTICE '处理3万条记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;
END
$$; -
改进方案(针对2-3万数据量)
方案1:优化现有函数(推荐)
CREATE OR REPLACE FUNCTION InsertFromJSONLarge(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
batch_size CONSTANT INTEGER := 1000; -- 每批1000条
total_users INTEGER;
total_products INTEGER;
i INTEGER;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');SELECT COUNT(*) INTO total_products
FROM json_array_elements(p_json_data::json->'products');RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;
-- 清空表(在事务外执行,减少锁时间)
TRUNCATE users, products RESTART IDENTITY;-- 分批插入用户数据
FOR i IN 0..((total_users - 1) / batch_size) LOOP
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'users')
) t
WHERE rn BETWEEN (i * batch_size + 1) AND ((i + 1) * batch_size);IF i % 10 = 0 THEN -- 每10批输出一次进度 RAISE NOTICE '用户数据进度: %/%', LEAST((i + 1) * batch_size, total_users), total_users; END IF;END LOOP;
-- 分批插入产品数据(类似逻辑)
FOR i IN 0..((total_products - 1) / batch_size) LOOP
INSERT INTO products (productId, productName, price, category)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category'
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'products')
) t
WHERE rn BETWEEN (i * batch_size + 1) AND ((i + 1) * batch_size);IF i % 10 = 0 THEN RAISE NOTICE '产品数据进度: %/%', LEAST((i + 1) * batch_size, total_products), total_products; END IF;END LOOP;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$;
方案2:使用COPY命令(最高效)
-- 先将JSON转换为临时表,再用COPY
CREATE OR REPLACE FUNCTION InsertFromJSONCopy(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 创建临时表存放用户数据
CREATE TEMP TABLE temp_users AS
SELECT
(value->>'id')::INTEGER as id,
value->>'name' as name,
value->>'email' as email,
(value->>'isActive')::BOOLEAN as is_active
FROM json_array_elements(p_json_data::json->'users');-- 使用COPY从临时表插入(最快的方式)
INSERT INTO users (id, name, email, isActive)
SELECT id, name, email, is_active FROM temp_users;-- 类似处理产品数据...
DROP TABLE temp_users;
RETURN 1;
END;
$$; -
应用程序端优化建议
Python分批处理示例
def insert_large_data_safely(json_data, batch_size=2000):
"""安全处理2-3万条数据"""
users = json_data.get('users', [])
products = json_data.get('products', [])print(f"总数据量: {len(users)}用户, {len(products)}产品")
分批处理用户数据
for i in range(0, len(users), batch_size):
batch = users[i:i + batch_size]
batch_data = {'users': batch, 'products': []}# 每次处理较小的批次 result = call_insert_function(json.dumps(batch_data)) if result == 0: print(f"第{i//batch_size + 1}批处理失败") return 0 print(f"用户数据进度: {min(i + batch_size, len(users))}/{len(users)}")类似处理产品数据...
return 1
-
性能优化配置
-- 临时调整配置提升大数据量插入性能
-- 在函数开始时设置(需要权限)
SET LOCAL work_mem = '100MB';
SET LOCAL maintenance_work_mem = '256MB';
SET LOCAL max_parallel_workers_per_gather = 4; -
监控建议
-- 在执行前后监控
SELECT
now() as check_time,
pg_size_pretty(pg_database_size(current_database())) as db_size,
(SELECT count() FROM users) as user_count,
(SELECT count() FROM products) as product_count;
总结建议
对于2-3万个对象的数据量:
使用分批处理:每批1000-2000条记录
添加进度监控:实时显示处理进度
考虑使用COPY命令:最高效的批量插入方式
应用程序分批调用:避免单次传输过大JSON
做好超时处理:设置合理的超时时间
这样的数据量在PostgreSQL中是可以处理的,但需要合理的优化策略。
你的实例
-- 假设处理3万条记录
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();-- 生成测试数据(3万条)
SELECT json_build_object(
'users', (SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', true
)) FROM generate_series(1, 30000) g)
) INTO big_json;result := InsertFromJSON(big_json);
end_time := clock_timestamp();RAISE NOTICE '处理3万条记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;
END
$$;
为什么只插入到user表 没有插入到product是怎么控制的?
您观察得很仔细!在我的测试示例中确实只插入了用户表,没有插入产品表。这是通过JSON结构控制的:
-
当前代码的问题
SELECT json_build_object(
'users', (SELECT json_agg(...) FROM generate_series(1, 30000) g)
-- 这里缺少了'products'字段!
) INTO big_json; -
修正后的完整版本
-- 完整的测试代码(包含用户和产品表)
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();-- 生成测试数据(3万条用户 + 1万条产品)
SELECT json_build_object(
'users', (
SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', CASE WHEN g % 10 != 0 THEN true ELSE false END
)) FROM generate_series(1, 30000) g
),
'products', (
SELECT json_agg(json_build_object(
'productId', 'P' || lpad(g::text, 5, '0'),
'productName', '产品' || g,
'price', (random() * 10000)::numeric(10,2),
'category', CASE
WHEN g % 4 = 0 THEN '电子产品'
WHEN g % 4 = 1 THEN '家具'
WHEN g % 4 = 2 THEN '服装'
ELSE '食品'
END
)) FROM generate_series(1, 10000) g
)
) INTO big_json;result := InsertFromJSON(big_json);
end_time := clock_timestamp();RAISE NOTICE '处理3万用户 + 1万产品记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;-- 验证插入结果
RAISE NOTICE '用户表记录数: %', (SELECT count() FROM users);
RAISE NOTICE '产品表记录数: %', (SELECT count() FROM products);
END
$$; -
控制插入的逻辑
插入到哪张表是由存储函数中的JSON解析逻辑控制的:
-- 在InsertFromJSON函数中:
-- 这部分控制插入users表
INSERT INTO users (id, name, email, isActive)
SELECT ... FROM json_array_elements(p_json_data::json->'users');
-- 这部分控制插入products表
INSERT INTO products (productId, productName, price, category)
SELECT ... FROM json_array_elements(p_json_data::json->'products');
4. 更灵活的控制方式
如果您想要更灵活地控制插入哪些表,可以修改函数:
方案1:添加控制参数
CREATE OR REPLACE FUNCTION InsertFromJSON(
p_json_data TEXT,
p_insert_users BOOLEAN DEFAULT true,
p_insert_products BOOLEAN DEFAULT true
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 只插入用户数据
IF p_insert_users THEN
INSERT INTO users (id, name, email, isActive)
SELECT ... FROM json_array_elements(p_json_data::json->'users');
END IF;
-- 只插入产品数据
IF p_insert_products THEN
INSERT INTO products (productId, productName, price, category)
SELECT ... FROM json_array_elements(p_json_data::json->'products');
END IF;
RETURN 1;
END;
$$;
-- 使用示例:只插入用户数据,不插入产品数据
SELECT InsertFromJSON(
'{"users": [...], "products": [...]}',
true, -- 插入用户
false -- 不插入产品
);
方案2:动态表名控制
CREATE OR REPLACE FUNCTION InsertFromJSONFlexible(
p_json_data TEXT,
p_table_name TEXT -- 'users' 或 'products' 或 'both'
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
IF p_table_name IN ('users', 'both') THEN
INSERT INTO users (id, name, email, isActive)
SELECT ... FROM json_array_elements(p_json_data::json->'users');
END IF;
IF p_table_name IN ('products', 'both') THEN
INSERT INTO products (productId, productName, price, category)
SELECT ... FROM json_array_elements(p_json_data::json->'products');
END IF;
RETURN 1;
END;
$$;
5. 测试不同场景
-- 测试1:只插入用户数据
DO $$
DECLARE
test_json TEXT := '{"users": [{"id": 1, "name": "测试用户"}], "products": []}';
BEGIN
PERFORM InsertFromJSON(test_json, true, false);
RAISE NOTICE '只插入了用户数据';
END
$$;
-- 测试2:只插入产品数据
DO $$
DECLARE
test_json TEXT = '{"users": [], "products": [{"productId": "P001", "productName": "测试产品"}]}';
BEGIN
PERFORM InsertFromJSON(test_json, false, true);
RAISE NOTICE '只插入了产品数据';
END
$$;
总结:插入到哪张表是由JSON中的键名('users'或'products')和存储函数中的解析逻辑共同控制的。我的原始示例缺少了'products'键,所以只插入了用户表。
现在做了更改 user表是insert product表根据主键productId进行更新
好的!我来修改存储函数,实现用户表插入、产品表根据主键更新的逻辑。
-
修改后的存储函数
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 开始事务
BEGIN
-- 插入用户数据(保持原来的INSERT逻辑)
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM json_array_elements(p_json_data::json->'users');-- 产品表:根据productId进行UPSERT(更新或插入) INSERT INTO products (productId, productName, price, category, updated_at) SELECT value->>'productId', value->>'productName', (value->>'price')::DECIMAL, value->>'category', CURRENT_TIMESTAMP FROM json_array_elements(p_json_data::json->'products') ON CONFLICT (productId) DO UPDATE SET productName = EXCLUDED.productName, price = EXCLUDED.price, category = EXCLUDED.category, updated_at = CURRENT_TIMESTAMP; RETURN 1; -- 成功返回1EXCEPTION
WHEN OTHERS THEN
-- 回滚事务
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0; -- 失败返回0
END;
END;
$$; -
分批处理版本(针对大数据量)
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');SELECT COUNT(*) INTO total_products
FROM json_array_elements(p_json_data::json->'products');RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;
-- 分批插入用户数据
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'users')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);IF i % 10 = 0 THEN RAISE NOTICE '用户数据进度: %/%', LEAST((i + 1) * p_batch_size, total_users), total_users; END IF;END LOOP;
-- 分批UPSERT产品数据
FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'products')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
ON CONFLICT (productId)
DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP;IF i % 10 = 0 THEN RAISE NOTICE '产品数据进度: %/%', LEAST((i + 1) * p_batch_size, total_products), total_products; END IF;END LOOP;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$; -
测试更新逻辑
-- 先插入一些测试数据
INSERT INTO products (productId, productName, price, category) VALUES
('P001', '旧产品名称', 100.00, '旧分类'),
('P002', '另一个产品', 200.00, '电子产品');
-- 测试更新逻辑
DO $$
DECLARE
test_json TEXT;
result INTEGER;
BEGIN
test_json := '{
"users": [
{"id": 1, "name": "用户1", "email": "user1@test.com", "isActive": true}
],
"products": [
{"productId": "P001", "productName": "新产品名称", "price": 150.00, "category": "新分类"},
{"productId": "P003", "productName": "新增产品", "price": 300.00, "category": "新增分类"}
]
}';
result := InsertFromJSON(test_json);
RAISE NOTICE '执行结果: %', result;
-- 查看更新结果
RAISE NOTICE '产品表数据:';
FOR rec IN SELECT * FROM products ORDER BY productId LOOP
RAISE NOTICE ' %: % - 价格: %, 分类: %, 更新时间: %',
rec.productid, rec.productname, rec.price, rec.category, rec.updated_at;
END LOOP;
END
$$;
4. 更详细的UPSERT版本(包含更多控制)
CREATE OR REPLACE FUNCTION InsertFromJSONDetailed(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
inserted_count INTEGER := 0;
updated_count INTEGER := 0;
BEGIN
-- 插入用户数据
WITH user_inserts AS (
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id) DO NOTHING -- 如果用户ID冲突,跳过
RETURNING 1
)
SELECT COUNT(*) INTO inserted_count FROM user_inserts;
-- UPSERT产品数据,并统计操作类型
WITH product_upserts AS (
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'products')
ON CONFLICT (productId)
DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP
RETURNING
(xmax = 0) AS is_insert -- xmax=0表示插入,否则表示更新
)
SELECT
COUNT(*) FILTER (WHERE is_insert) AS inserts,
COUNT(*) FILTER (WHERE NOT is_insert) AS updates
INTO inserted_count, updated_count
FROM product_upserts;
RAISE NOTICE '用户插入: % 条, 产品插入: % 条, 产品更新: % 条',
inserted_count, inserted_count, updated_count;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
5. 使用示例
-- 测试大数据量更新
DO $$
DECLARE
big_json TEXT;
result INTEGER;
BEGIN
-- 生成包含更新和新增的测试数据
SELECT json_build_object(
'users', (
SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', true
)) FROM generate_series(1, 1000) g
),
'products', (
SELECT json_agg(json_build_object(
'productId', 'P' || lpad((g % 100)::text, 3, '0'), -- 只有100个不同产品,测试更新
'productName', '产品' || g,
'price', (random() * 1000)::numeric(10,2),
'category', CASE g % 4
WHEN 0 THEN '电子产品'
WHEN 1 THEN '家具'
ELSE '其他'
END
)) FROM generate_series(1, 1000) g
)
) INTO big_json;
result := InsertFromJSON(big_json);
RAISE NOTICE '执行结果: %', result;
END
$$;
6. 验证结果
-- 查看操作统计
SELECT
(SELECT COUNT() FROM users) as total_users,
(SELECT COUNT() FROM products) as total_products,
(SELECT COUNT(DISTINCT productId) FROM products) as distinct_products;
-- 查看产品的更新时间分布
SELECT
COUNT(*) as product_count,
DATE(updated_at) as update_date
FROM products
GROUP BY DATE(updated_at)
ORDER BY update_date DESC;
主要变更点:
用户表:保持INSERT逻辑(如果ID冲突可以使用ON CONFLICT DO NOTHING)
产品表:使用ON CONFLICT DO UPDATE实现UPSERT
更新时间:自动记录更新时间戳
分批处理:支持大数据量的分批UPSERT
这样就能实现用户表插入、产品表根据主键更新的需求了。
user表如果主键重复的改为更新 没有重复的使用insert merge into
好的!我来修改存储函数,使用MERGE INTO(在PostgreSQL中对应的是INSERT ... ON CONFLICT)来实现用户表的UPSERT操作。
-
修改后的存储函数(使用UPSERT)
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 开始事务
BEGIN
-- 用户表:根据id进行UPSERT(更新或插入)
INSERT INTO users (id, name, email, isActive, updated_at)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN,
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id)
DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = CURRENT_TIMESTAMP;-- 产品表:根据productId进行UPSERT INSERT INTO products (productId, productName, price, category, updated_at) SELECT value->>'productId', value->>'productName', (value->>'price')::DECIMAL, value->>'category', CURRENT_TIMESTAMP FROM json_array_elements(p_json_data::json->'products') ON CONFLICT (productId) DO UPDATE SET productName = EXCLUDED.productName, price = EXCLUDED.price, category = EXCLUDED.category, updated_at = CURRENT_TIMESTAMP; RETURN 1; -- 成功返回1EXCEPTION
WHEN OTHERS THEN
-- 回滚事务
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0; -- 失败返回0
END;
END;
$$; -
分批处理版本
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
user_inserts INTEGER := 0;
user_updates INTEGER := 0;
product_inserts INTEGER := 0;
product_updates INTEGER := 0;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');SELECT COUNT(*) INTO total_products
FROM json_array_elements(p_json_data::json->'products');RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;
-- 分批UPSERT用户数据
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
WITH user_batch AS (
INSERT INTO users (id, name, email, isActive, updated_at)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN,
CURRENT_TIMESTAMP
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'users')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
ON CONFLICT (id)
DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = CURRENT_TIMESTAMP
RETURNING (xmax = 0) as is_insert
)
SELECT
COUNT() FILTER (WHERE is_insert),
COUNT() FILTER (WHERE NOT is_insert)
INTO user_inserts, user_updates
FROM user_batch;IF i % 10 = 0 THEN RAISE NOTICE '用户数据进度: %/% (插入:%, 更新:%)', LEAST((i + 1) * p_batch_size, total_users), total_users, user_inserts, user_updates; END IF;END LOOP;
-- 分批UPSERT产品数据
FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
WITH product_batch AS (
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'products')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
ON CONFLICT (productId)
DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP
RETURNING (xmax = 0) as is_insert
)
SELECT
COUNT() FILTER (WHERE is_insert),
COUNT() FILTER (WHERE NOT is_insert)
INTO product_inserts, product_updates
FROM product_batch;IF i % 10 = 0 THEN RAISE NOTICE '产品数据进度: %/% (插入:%, 更新:%)', LEAST((i + 1) * p_batch_size, total_products), total_products, product_inserts, product_updates; END IF;END LOOP;
RAISE NOTICE '处理完成: 用户(插入:%, 更新:%), 产品(插入:%, 更新:%)',
user_inserts, user_updates, product_inserts, product_updates;RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$; -
测试UPSERT逻辑
-- 先插入一些测试数据
INSERT INTO users (id, name, email, isActive) VALUES
(1, '旧用户1', 'old1@test.com', true),
(2, '旧用户2', 'old2@test.com', false);
INSERT INTO products (productId, productName, price, category) VALUES
('P001', '旧产品1', 100.00, '旧分类'),
('P002', '旧产品2', 200.00, '电子产品');
-- 测试UPSERT逻辑
DO $$
DECLARE
test_json TEXT;
result INTEGER;
BEGIN
test_json := '{
"users": [
{"id": 1, "name": "新用户1", "email": "new1@test.com", "isActive": false},
{"id": 3, "name": "新增用户", "email": "new3@test.com", "isActive": true}
],
"products": [
{"productId": "P001", "productName": "新产品1", "price": 150.00, "category": "新分类"},
{"productId": "P003", "productName": "新增产品", "price": 300.00, "category": "新增分类"}
]
}';
result := InsertFromJSON(test_json);
RAISE NOTICE '执行结果: %', result;
-- 查看用户表结果
RAISE NOTICE '用户表数据:';
FOR rec IN SELECT * FROM users ORDER BY id LOOP
RAISE NOTICE ' %: % - 邮箱: %, 状态: %, 更新时间: %',
rec.id, rec.name, rec.email, rec.isactive, rec.updated_at;
END LOOP;
-- 查看产品表结果
RAISE NOTICE '产品表数据:';
FOR rec IN SELECT * FROM products ORDER BY productid LOOP
RAISE NOTICE ' %: % - 价格: %, 分类: %, 更新时间: %',
rec.productid, rec.productname, rec.price, rec.category, rec.updated_at;
END LOOP;
END
$$;
4. 更精细控制的版本
CREATE OR REPLACE FUNCTION InsertFromJSONAdvanced(
p_json_data TEXT,
p_user_strategy TEXT DEFAULT 'upsert', -- 'upsert', 'insert', 'update'
p_product_strategy TEXT DEFAULT 'upsert'
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 用户表处理
IF p_user_strategy = 'upsert' THEN
INSERT INTO users (id, name, email, isActive, updated_at)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN,
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id)
DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = CURRENT_TIMESTAMP;
ELSIF p_user_strategy = 'insert' THEN
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id) DO NOTHING;
ELSIF p_user_strategy = 'update' THEN
UPDATE users SET
name = updates.name,
email = updates.email,
isActive = updates.isActive,
updated_at = CURRENT_TIMESTAMP
FROM (
SELECT
(value->>'id')::INTEGER as id,
value->>'name' as name,
value->>'email' as email,
(value->>'isActive')::BOOLEAN as isActive
FROM json_array_elements(p_json_data::json->'users')
) AS updates
WHERE users.id = updates.id;
END IF;
-- 产品表处理(类似逻辑)
IF p_product_strategy = 'upsert' THEN
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'products')
ON CONFLICT (productId)
DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP;
-- ... 其他策略类似
END IF;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
5. 使用示例
-- 测试不同的操作策略
SELECT InsertFromJSONAdvanced(
'{"users": [{"id": 1, "name": "测试用户", "email": "test@test.com", "isActive": true}]}',
'upsert', -- 用户表使用UPSERT
'upsert' -- 产品表使用UPSERT
);
-- 只插入新用户,不更新已存在的
SELECT InsertFromJSONAdvanced(
'{"users": [{"id": 1, "name": "测试用户", "email": "test@test.com", "isActive": true}]}',
'insert', -- 只插入
'upsert'
);
6. 验证UPSERT结果
-- 查看详细的UPSERT统计
SELECT
'users' as table_name,
COUNT() as total_count,
COUNT(updated_at) as updated_count
FROM users
UNION ALL
SELECT
'products' as table_name,
COUNT() as total_count,
COUNT(updated_at) as updated_count
FROM products;
-- 查看最近更新的记录
SELECT 'users' as table_name, id as key, name, updated_at
FROM users
WHERE updated_at >= CURRENT_DATE
UNION ALL
SELECT 'products' as table_name, productId as key, productName, updated_at
FROM products
WHERE updated_at >= CURRENT_DATE
ORDER BY table_name, key;
主要变更点:
用户表:使用ON CONFLICT DO UPDATE实现UPSERT
产品表:同样使用UPSERT逻辑
更新时间:自动记录更新时间戳
分批处理:支持大数据量的分批UPSERT
操作统计:返回插入和更新的记录数量
现在两个表都使用UPSERT逻辑,主键重复的更新,不重复的插入。
上面的存储过程和sql语句在在PostgreSQL 9.2.4 兼容吗?
不兼容!PostgreSQL 9.2.4 与上面代码有严重的不兼容问题。让我详细说明:
- 主要不兼容点
JSON 功能不兼容(最严重)
-- PostgreSQL 9.2.4 没有这些JSON函数:
- json_array_elements() -- 9.3+ 才引入
- json_extract_path() -- 9.3+
- json_build_object() -- 9.4+
- jsonb 类型 -- 9.4+ 才引入
UPSERT 功能不兼容
-- ON CONFLICT DO UPDATE 是 9.5+ 的功能
INSERT ... ON CONFLICT DO UPDATE -- 9.5+ 才支持
其他语法不兼容 - FILTER 子句 -- 9.4+ 才支持
- psql 的 \gexec -- 9.6+ 才支持
- 一些窗口函数优化 -- 9.2 功能有限
-
PostgreSQL 9.2.4 兼容版本
兼容的存储函数
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER AS $$
DECLARE
users_json TEXT;
products_json TEXT;
user_rec RECORD;
product_rec RECORD;
user_data JSON;
product_data JSON;
BEGIN
-- 9.2.4 需要手动解析JSON
-- 提取users数组
users_json := substring(p_json_data from '"users":[(.?)]');
products_json := substring(p_json_data from '"products":[(.?)]');-- 手动解析用户数据
FOR user_rec IN
SELECT * FROM regexp_split_to_table(
substring(users_json from '^[(.*)]$'),
'},{'
) AS user_obj
LOOP
-- 手动提取每个字段(简化版,实际需要更复杂的解析)
INSERT INTO users (id, name, email, isActive)
VALUES (
(substring(user_rec.user_obj from '"id"😦\d+)'))::INTEGER,
substring(user_rec.user_obj from '"name":"([^"]+)"'),
substring(user_rec.user_obj from '"email":"([^"]+)"'),
(substring(user_rec.user_obj from '"isActive":(true|false)'))::BOOLEAN
);
END LOOP;-- 类似解析产品数据...
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$ LANGUAGE plpgsql;
更实用的方案:使用PL/Python(如果可用)
-- 如果安装了PL/Python,可以这样处理
CREATE OR REPLACE FUNCTION InsertFromJSONPython(p_json_data TEXT)
RETURNS INTEGER AS $$
import json
import plpy
try:
data = json.loads(p_json_data)
# 处理用户数据
for user in data.get('users', []):
plan = plpy.prepare("""
INSERT INTO users (id, name, email, isActive)
VALUES ($1, $2, $3, $4)
""", ["integer", "text", "text", "boolean"])
plpy.execute(plan, [
user['id'],
user['name'],
user['email'],
user['isActive']
])
# 处理产品数据(需要手动实现UPSERT)
for product in data.get('products', []):
# 检查是否存在
check_plan = plpy.prepare(
"SELECT 1 FROM products WHERE productId = $1",
["text"]
)
exists = plpy.execute(check_plan, [product['productId']])
if exists:
# 更新
update_plan = plpy.prepare("""
UPDATE products
SET productName = $1, price = $2, category = $3
WHERE productId = $4
""", ["text", "numeric", "text", "text"])
plpy.execute(update_plan, [
product['productName'],
float(product['price']),
product['category'],
product['productId']
])
else:
# 插入
insert_plan = plpy.prepare("""
INSERT INTO products (productId, productName, price, category)
VALUES ($1, $2, $3, $4)
""", ["text", "text", "numeric", "text"])
plpy.execute(insert_plan, [
product['productId'],
product['productName'],
float(product['price']),
product['category']
])
return 1
except Exception as e:
plpy.error("操作失败: " + str(e))
return 0
$$ LANGUAGE plpythonu;
3. 最兼容的方案:应用程序端解析
建议在应用程序中处理JSON解析:
Python示例
import psycopg2
import json
def insert_from_json_92(json_data, connection_string):
conn = psycopg2.connect(connection_string)
cursor = conn.cursor()
try:
# 处理用户数据
for user in json_data.get('users', []):
# 手动实现UPSERT
cursor.execute("""
UPDATE users SET name = %s, email = %s, isActive = %s
WHERE id = %s;
INSERT INTO users (id, name, email, isActive)
SELECT %s, %s, %s, %s
WHERE NOT EXISTS (SELECT 1 FROM users WHERE id = %s);
""", (
user['name'], user['email'], user['isActive'], user['id'],
user['id'], user['name'], user['email'], user['isActive'], user['id']
))
# 处理产品数据
for product in json_data.get('products', []):
cursor.execute("""
UPDATE products SET productName = %s, price = %s, category = %s
WHERE productId = %s;
INSERT INTO products (productId, productName, price, category)
SELECT %s, %s, %s, %s
WHERE NOT EXISTS (SELECT 1 FROM products WHERE productId = %s);
""", (
product['productName'], product['price'], product['category'], product['productId'],
product['productId'], product['productName'], product['price'], product['category'], product['productId']
))
conn.commit()
return 1
except Exception as e:
conn.rollback()
print(f"错误: {e}")
return 0
finally:
cursor.close()
conn.close()
-
升级建议
强烈建议升级PostgreSQL版本,因为:
安全风险:9.2.4 已于2017年停止支持,存在安全漏洞
功能限制:缺少现代JSON处理功能
性能问题:老版本性能较差
当前支持的PostgreSQL版本:
9.2:已停止支持
9.6:延长支持结束
12+:推荐使用
14+:最新稳定版 -
如果必须使用9.2.4的临时方案
-- 最简单的逐条处理(性能较差)
CREATE OR REPLACE FUNCTION SimpleInsertFromJSON(p_json_data TEXT)
RETURNS INTEGER AS $$
DECLARE
user_text TEXT;
product_text TEXT;
user_parts TEXT[];
product_parts TEXT[];
i INTEGER;
BEGIN
-- 非常基础的JSON解析(假设格式固定)
user_text := substring(p_json_data from '"users":[(.?)]');
product_text := substring(p_json_data from '"products":[(.?)]');-- 分割用户对象(简单实现)
user_parts := regexp_split_to_array(user_text, '},{');FOR i IN 1..array_length(user_parts, 1) LOOP
-- 手动提取每个字段(极其繁琐且易错)
-- 这里需要复杂的字符串解析...
END LOOP;RETURN 1;
EXCEPTION WHEN OTHERS THEN
RETURN 0;
END;
$$ LANGUAGE plpgsql;
总结:对于PostgreSQL 9.2.4,建议要么升级数据库版本,要么在应用程序中处理JSON解析,而不是在数据库函数中处理。
如果是在openguass中有下面这些函数吗
-- PostgreSQL 9.2.4 没有这些JSON函数:
- json_array_elements() -- 9.3+ 才引入
- json_extract_path() -- 9.3+
- json_build_object() -- 9.4+
- jsonb 类型 -- 9.4+ 才引入
OpenGauss是基于PostgreSQL开发的,但JSON功能的支持情况有所不同。让我为您分析OpenGauss中的JSON函数支持情况:
- OpenGauss中的JSON函数支持
OpenGauss 3.0+ 版本支持以下JSON函数:
✅ 支持的函数:
-- 这些函数在OpenGauss中通常都支持
- json_array_elements() -- ✅ 支持
- json_extract_path() -- ✅ 支持(或类似功能)
- json_build_object() -- ✅ 支持
- jsonb 类型 -- ✅ 支持(OpenGauss有jsonb类型)
OpenGauss特有的JSON函数:
-- OpenGauss扩展的JSON函数 - json_array_length() -- 获取JSON数组长度
- json_each() -- 展开JSON对象为键值对
- json_object_keys() -- 获取JSON对象的所有键
- json_populate_record() -- 将JSON转换为记录
- json_to_record() -- JSON转记录
-
OpenGauss兼容的存储函数示例
-- OpenGauss兼容的存储函数
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
user_item JSON;
product_item JSON;
user_inserts INTEGER := 0;
user_updates INTEGER := 0;
product_inserts INTEGER := 0;
product_updates INTEGER := 0;
BEGIN
-- OpenGauss支持json_array_elements
FOR user_item IN
SELECT * FROM json_array_elements(p_json_data::json->'users')
LOOP
-- OpenGauss支持ON CONFLICT(需要确认版本)
INSERT INTO users (id, name, email, isActive, updated_at)
VALUES (
(user_item->>'id')::INTEGER,
user_item->>'name',
user_item->>'email',
(user_item->>'isActive')::BOOLEAN,
clock_timestamp()
) ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = clock_timestamp();-- 统计操作类型 IF FOUND THEN user_inserts := user_inserts + 1; END IF;END LOOP;
-- 类似处理产品数据
FOR product_item IN
SELECT * FROM json_array_elements(p_json_data::json->'products')
LOOP
INSERT INTO products (productId, productName, price, category, updated_at)
VALUES (
product_item->>'productId',
product_item->>'productName',
(product_item->>'price')::NUMERIC,
product_item->>'category',
clock_timestamp()
) ON CONFLICT (productId) DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = clock_timestamp();IF FOUND THEN product_inserts := product_inserts + 1; END IF;END LOOP;
RAISE NOTICE '处理完成: 用户操作%, 产品操作%', user_inserts, product_inserts;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
3. OpenGauss版本检查
-- 检查OpenGauss版本和JSON支持
SELECT
version() as db_version,
(SELECT setting FROM pg_settings WHERE name = 'server_version') as server_version,
EXISTS(
SELECT 1 FROM pg_proc WHERE proname = 'json_array_elements'
) as has_json_array_elements,
EXISTS(
SELECT 1 FROM pg_type WHERE typname = 'jsonb'
) as has_jsonb_type;
4. OpenGauss批量处理优化版本
-- 更适合OpenGauss的批量处理版本
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 批量插入用户数据(OpenGauss优化)
WITH user_data AS (
SELECT
(value->>'id')::INTEGER as id,
value->>'name' as name,
value->>'email' as email,
(value->>'isActive')::BOOLEAN as isActive
FROM json_array_elements(p_json_data::json->'users')
)
INSERT INTO users (id, name, email, isActive)
SELECT id, name, email, isActive FROM user_data
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = clock_timestamp();
-- 批量插入产品数据
WITH product_data AS (
SELECT
value->>'productId' as productId,
value->>'productName' as productName,
(value->>'price')::NUMERIC as price,
value->>'category' as category
FROM json_array_elements(p_json_data::json->'products')
)
INSERT INTO products (productId, productName, price, category)
SELECT productId, productName, price, category FROM product_data
ON CONFLICT (productId) DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = clock_timestamp();
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '批量处理失败: %', SQLERRM;
RETURN 0;
END;
$$;
5. OpenGauss特有的JSON功能示例
-- OpenGauss增强的JSON处理
CREATE OR REPLACE FUNCTION AdvancedJSONProcessing(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
user_count INTEGER;
product_count INTEGER;
BEGIN
-- 使用OpenGauss的JSON函数
SELECT COUNT(*) INTO user_count
FROM json_array_elements(p_json_data::json->'users');
SELECT COUNT(*) INTO product_count
FROM json_array_elements(p_json_data::json->'products');
RAISE NOTICE 'JSON数据统计: 用户%, 产品%', user_count, product_count;
-- 使用jsonb进行高效处理(如果数据量大)
PERFORM InsertFromJSONBatch(p_json_data);
RETURN 1;
END;
$$;
6. 兼容性检查脚本
-- 在OpenGauss中运行此脚本来检查JSON支持
DO $$
BEGIN
-- 检查关键JSON函数
IF EXISTS(SELECT 1 FROM pg_proc WHERE proname = 'json_array_elements') THEN
RAISE NOTICE '✅ json_array_elements 函数可用';
ELSE
RAISE NOTICE '❌ json_array_elements 函数不可用';
END IF;
IF EXISTS(SELECT 1 FROM pg_type WHERE typname = 'jsonb') THEN
RAISE NOTICE '✅ jsonb 数据类型可用';
ELSE
RAISE NOTICE '❌ jsonb 数据类型不可用';
END IF;
-- 测试ON CONFLICT语法
BEGIN
CREATE TEMP TABLE test_conflict (id INT PRIMARY KEY, name TEXT);
INSERT INTO test_conflict VALUES (1, 'test')
ON CONFLICT (id) DO UPDATE SET name = 'updated';
RAISE NOTICE '✅ ON CONFLICT 语法支持';
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE '❌ ON CONFLICT 语法不支持';
END;
END
$$;
总结
OpenGauss通常支持这些JSON函数,但具体版本可能有差异:
OpenGauss 3.0+:基本都支持上述JSON函数
JSONB支持:OpenGauss有完整的jsonb类型支持
语法兼容:ON CONFLICT等PostgreSQL语法通常都支持
建议:在您的OpenGauss环境中先运行兼容性检查脚本,确认具体支持情况后再使用相应的函数。
我在pg12 使用你的存储过程
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');
SELECT COUNT(*) INTO total_products
FROM json_array_elements(p_json_data::json->'products');
RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;
-- 分批插入用户数据
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'users')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);
IF i % 10 = 0 THEN
RAISE NOTICE '用户数据进度: %/%',
LEAST((i + 1) * p_batch_size, total_users), total_users;
END IF;
END LOOP;
-- 分批UPSERT产品数据
FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'products')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
ON CONFLICT (productId)
DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP;
IF i % 10 = 0 THEN
RAISE NOTICE '产品数据进度: %/%',
LEAST((i + 1) * p_batch_size, total_products), total_products;
END IF;
END LOOP;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$;
执行
-- 完整的测试代码(包含用户和产品表)
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();
-- 生成测试数据(3万条用户 + 1万条产品)
SELECT json_build_object(
'users', (
SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', CASE WHEN g % 10 != 0 THEN true ELSE false END
)) FROM generate_series(1, 30000) g
),
'products', (
SELECT json_agg(json_build_object(
'productId', 'P' || lpad(g::text, 5, '0'),
'productName', '产品' || g,
'price', (random() * 10000)::numeric(10,2),
'category', CASE
WHEN g % 4 = 0 THEN '电子产品'
WHEN g % 4 = 1 THEN '家具'
WHEN g % 4 = 2 THEN '服装'
ELSE '食品'
END
)) FROM generate_series(1, 10000) g
)
) INTO big_json;
result := InsertFromJSON(big_json);
end_time := clock_timestamp();
RAISE NOTICE '处理3万用户 + 1万产品记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;
-- 验证插入结果
RAISE NOTICE '用户表记录数: %', (SELECT count(*) FROM users);
RAISE NOTICE '产品表记录数: %', (SELECT count(*) FROM products);
END
$$;
-- 查看用户表数据
SELECT * FROM users;
-- 查看产品表数据
SELECT * FROM products;
都为空
问题在于存储函数名不匹配!您在测试中调用的是 InsertFromJSON(big_json),但创建的函数名是 InsertFromJSONBatch。
解决方案:
方案1:修改函数名为一致
-- 将函数名改为 InsertFromJSON
CREATE OR REPLACE FUNCTION InsertFromJSON(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');
SELECT COUNT(*) INTO total_products
FROM json_array_elements(p_json_data::json->'products');
RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;
-- 清空表数据(如果需要)
-- DELETE FROM products;
-- DELETE FROM users;
-- 分批插入用户数据
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'users')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);
IF i % 10 = 0 THEN
RAISE NOTICE '用户数据进度: %/%',
LEAST((i + 1) * p_batch_size, total_users), total_users;
END IF;
END LOOP;
-- 分批UPSERT产品数据
FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'products')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
ON CONFLICT (productId)
DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP;
IF i % 10 = 0 THEN
RAISE NOTICE '产品数据进度: %/%',
LEAST((i + 1) * p_batch_size, total_products), total_products;
END IF;
END LOOP;
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$;
方案2:修改测试代码调用正确的函数名
-- 修改测试代码,调用正确的函数名
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();
-- 生成测试数据(3万条用户 + 1万条产品)
SELECT json_build_object(
'users', (
SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', CASE WHEN g % 10 != 0 THEN true ELSE false END
)) FROM generate_series(1, 30000) g
),
'products', (
SELECT json_agg(json_build_object(
'productId', 'P' || lpad(g::text, 5, '0'),
'productName', '产品' || g,
'price', (random() * 10000)::numeric(10,2),
'category', CASE
WHEN g % 4 = 0 THEN '电子产品'
WHEN g % 4 = 1 THEN '家具'
WHEN g % 4 = 2 THEN '服装'
ELSE '食品'
END
)) FROM generate_series(1, 10000) g
)
) INTO big_json;
-- 这里改为调用 InsertFromJSONBatch
result := InsertFromJSONBatch(big_json, 1000); -- 添加批次大小参数
end_time := clock_timestamp();
RAISE NOTICE '处理3万用户 + 1万产品记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;
-- 验证插入结果
RAISE NOTICE '用户表记录数: %', (SELECT count(*) FROM users);
RAISE NOTICE '产品表记录数: %', (SELECT count(*) FROM products);
END
$$;
调试步骤:
- 首先检查函数是否存在
-- 检查函数是否存在
SELECT proname, proargtypes, prosrc
FROM pg_proc
WHERE proname LIKE '%insertfromjson%';
-- 或者查看所有相关函数
\df insertfromjson
2. 简化测试(先测试小数据量)
-- 先测试小数据量,确保基本功能正常
DO $$
DECLARE
small_json TEXT;
result INTEGER;
BEGIN
small_json := '{
"users": [
{"id": 1, "name": "测试用户1", "email": "test1@example.com", "isActive": true},
{"id": 2, "name": "测试用户2", "email": "test2@example.com", "isActive": false}
],
"products": [
{"productId": "P00001", "productName": "测试产品1", "price": 100.50, "category": "电子产品"},
{"productId": "P00002", "productName": "测试产品2", "price": 200.00, "category": "家具"}
]
}';
result := InsertFromJSONBatch(small_json, 1000);
RAISE NOTICE '小数据测试结果: %', result;
-- 立即查看数据
RAISE NOTICE '用户表: %', (SELECT count(*) FROM users);
RAISE NOTICE '产品表: %', (SELECT count(*) FROM products);
END
$$;
3. 添加详细的调试信息
-- 创建带详细调试的函数版本
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
user_records JSON;
product_records JSON;
BEGIN
RAISE NOTICE '开始执行存储过程';
-- 检查JSON结构
RAISE NOTICE 'JSON数据前500字符: %', substring(p_json_data from 1 for 500);
-- 检查users字段是否存在
IF (p_json_data::json->'users') IS NULL THEN
RAISE NOTICE '错误: JSON中缺少users字段';
RETURN 0;
END IF;
-- 检查products字段是否存在
IF (p_json_data::json->'products') IS NULL THEN
RAISE NOTICE '错误: JSON中缺少products字段';
RETURN 0;
END IF;
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');
SELECT COUNT(*) INTO total_products
FROM json_array_elements(p_json_data::json->'products');
RAISE NOTICE '解析结果: 用户%, 产品%', total_users, total_products;
-- 如果记录数为0,直接返回
IF total_users = 0 AND total_products = 0 THEN
RAISE NOTICE '警告: 没有找到要处理的数据';
RETURN 1;
END IF;
-- 分批处理用户数据
IF total_users > 0 THEN
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM (
SELECT value, row_number() OVER () as rn
FROM json_array_elements(p_json_data::json->'users')
) t
WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);
RAISE NOTICE '用户批次 % 完成, 影响行数: %', i + 1, GET DIAGNOSTICS;
END LOOP;
END IF;
-- 类似处理产品数据...
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %, SQLSTATE: %', SQLERRM, SQLSTATE;
RETURN 0;
END;
$$;
4. 逐步执行排查
-- 逐步测试每个环节
DO $$
DECLARE
test_json TEXT;
user_count INTEGER;
product_count INTEGER;
BEGIN
test_json := '{
"users": [{"id": 1, "name": "test", "email": "test@test.com", "isActive": true}],
"products": [{"productId": "P001", "productName": "test", "price": 100, "category": "test"}]
}';
-- 测试JSON解析
SELECT COUNT(*) INTO user_count
FROM json_array_elements(test_json::json->'users');
SELECT COUNT(*) INTO product_count
FROM json_array_elements(test_json::json->'products');
RAISE NOTICE '解析测试: 用户%, 产品%', user_count, product_count;
-- 测试单条插入
INSERT INTO users (id, name, email, isActive)
VALUES (999, '测试', 'test@test.com', true);
RAISE NOTICE '单条插入测试完成';
END
$$;
请先尝试方案2(修改测试代码调用正确的函数名),如果还有问题,运行调试步骤来定位具体问题。
一般情况下 需要在存储过程中控制每次插入数据量的批次吗
这是一个很好的问题!一般情况下,确实需要在存储过程中控制批次插入,但具体是否必要取决于多个因素。
- 需要批次控制的情况
大数据量场景(推荐批次控制)
-- 当单次处理数据量较大时(经验值):
- 1万条记录以上 ✅ 推荐分批
- 单个JSON超过10MB ✅ 推荐分批
- 涉及复杂计算或关联 ✅ 推荐分批
长事务风险场景
-- 以下情况需要批次控制: - 业务高峰期执行
- 系统资源紧张时
- 需要最小化锁等待时间
- 要求可中断的长时间操作
- 不需要批次控制的简单场景
小数据量场景
-- 以下情况可以直接单次插入:
-
几百到几千条记录
-
JSON数据小于1MB
-
非关键业务时段
-
系统资源充足
简单版本(无批次控制)
CREATE OR REPLACE FUNCTION InsertFromJSONSimple(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 用户表:UPSERT操作
INSERT INTO users (id, name, email, isActive, updated_at)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN,
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = CURRENT_TIMESTAMP;-- 产品表:UPSERT操作
INSERT INTO products (productId, productName, price, category, updated_at)
SELECT
value->>'productId',
value->>'productName',
(value->>'price')::DECIMAL,
value->>'category',
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'products')
ON CONFLICT (productId) DO UPDATE SET
productName = EXCLUDED.productName,
price = EXCLUDED.price,
category = EXCLUDED.category,
updated_at = CURRENT_TIMESTAMP;RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
-
批次大小的经验值
数据量
推荐批次大小
适用场景
1,000条以下
一次性处理
小数据量,简单操作
1,000-10,000条
500-1,000条/批
一般业务数据
10,000-100,000条
1,000-2,000条/批
大数据量同步
100,000条以上
2,000-5,000条/批
数据迁移、ETL -
智能批次控制方案
CREATE OR REPLACE FUNCTION InsertFromJSONSmart(
p_json_data TEXT,
p_auto_batch BOOLEAN DEFAULT true -- 是否自动判断是否需要分批
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_records INTEGER;
recommended_batch_size INTEGER;
json_size INTEGER;
BEGIN
-- 估算总记录数和JSON大小
SELECT
COALESCE(json_array_length(p_json_data::json->'users'), 0) +
COALESCE(json_array_length(p_json_data::json->'products'), 0)
INTO total_records;json_size := octet_length(p_json_data);
-- 智能判断批次大小
IF NOT p_auto_batch THEN
-- 不使用分批,直接处理
RETURN InsertFromJSONSimple(p_json_data);
END IF;-- 根据数据量推荐批次大小
IF total_records <= 1000 OR json_size <= 1048576 THEN -- 1MB
-- 小数据量,直接处理
RETURN InsertFromJSONSimple(p_json_data);
ELSIF total_records <= 10000 THEN
recommended_batch_size := 1000;
ELSIF total_records <= 50000 THEN
recommended_batch_size := 2000;
ELSE
recommended_batch_size := 5000;
END IF;RAISE NOTICE '智能批次控制: 总记录%, JSON大小%, 推荐批次%',
total_records, pg_size_pretty(json_size::bigint), recommended_batch_size;-- 调用分批版本
RETURN InsertFromJSONBatch(p_json_data, recommended_batch_size);
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '智能处理失败,尝试直接处理: %', SQLERRM;
-- 失败时回退到简单版本
RETURN InsertFromJSONSimple(p_json_data);
END;
$$;
5. 实际项目中的建议
推荐方案:参数化批次控制
-- 最实用的版本:提供参数,默认智能判断
CREATE OR REPLACE FUNCTION InsertFromJSON(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT NULL -- NULL表示自动判断
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
actual_batch_size INTEGER;
total_users INTEGER;
BEGIN
-- 确定实际批次大小
IF p_batch_size IS NOT NULL AND p_batch_size > 0 THEN
actual_batch_size := p_batch_size;
ELSE
-- 自动判断:小数据直接处理,大数据分批
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');
IF total_users <= 5000 THEN
-- 小数据量,直接处理(更高效)
RETURN InsertFromJSONSimple(p_json_data);
ELSE
actual_batch_size := 1000; -- 默认批次大小
END IF;
END IF;
-- 使用分批处理
RETURN InsertFromJSONBatch(p_json_data, actual_batch_size);
END;
$$;
6. 性能对比考虑
-- 批次控制 vs 单次插入的性能权衡:
-- ✅ 批次控制的优点:
-- 1. 减少长事务锁等待
-- 2. 内存使用更平稳
-- 3. 可以显示进度
-- 4. 错误时损失更小
-- ❌ 批次控制的缺点:
-- 1. 代码更复杂
-- 2. 小数据量时性能稍差
-- 3. 需要更多的事务控制
7. 我的最终建议
对于您的场景(2-3万条记录),我推荐使用智能批次控制:
-- 创建三个层次的函数:
-- 1. 简单版本(小数据量)
-- 2. 分批版本(大数据量)
-- 3. 智能入口函数(自动选择)
-- 这样既保证性能,又保持代码简洁
总结:对于2-3万条记录,使用批次控制(1000-2000条/批)是比较稳妥的选择,但也要提供直接处理的简单版本作为备选。
浙公网安备 33010602011771号