帮我写个json字符串 包含两个数组 每个数组包含多个对象

针对PostgreSQL 12版本,我来修改存储过程的实现。PostgreSQL的JSON处理语法与MySQL有所不同。

  1. 创建两张表
    -- 创建用户表
    CREATE TABLE users (
    id INT PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    email VARCHAR(150) UNIQUE NOT NULL,
    isActive BOOLEAN DEFAULT true,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    );

-- 创建产品表
CREATE TABLE products (
productId VARCHAR(20) PRIMARY KEY,
productName VARCHAR(200) NOT NULL,
price DECIMAL(10,2) NOT NULL,
category VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
2. 创建存储过程(PostgreSQL版本)
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
user_record JSON;
product_record JSON;
BEGIN
-- 开始事务
BEGIN
-- 清空表数据(如果需要保留历史数据,请删除这两行)
DELETE FROM products;
DELETE FROM users;

    -- 插入用户数据
    FOR user_record IN 
        SELECT * FROM json_array_elements(p_json_data::json->'users')
    LOOP
        INSERT INTO users (id, name, email, isActive)
        VALUES (
            (user_record->>'id')::INTEGER,
            user_record->>'name',
            user_record->>'email',
            (user_record->>'isActive')::BOOLEAN
        );
    END LOOP;
    
    -- 插入产品数据
    FOR product_record IN 
        SELECT * FROM json_array_elements(p_json_data::json->'products')
    LOOP
        INSERT INTO products (productId, productName, price, category)
        VALUES (
            product_record->>'productId',
            product_record->>'productName',
            (product_record->>'price')::DECIMAL,
            product_record->>'category'
        );
    END LOOP;
    
    -- 提交事务
    RETURN 1; -- 成功返回1
    
EXCEPTION
    WHEN OTHERS THEN
        -- 回滚事务
        RAISE NOTICE '错误: %', SQLERRM;
        RETURN 0; -- 失败返回0
END;

END;
$$;
3. 更高效的批量插入版本(推荐)
CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 开始事务
BEGIN
-- 清空表数据(如果需要保留历史数据,请删除这两行)
DELETE FROM products;
DELETE FROM users;

    -- 批量插入用户数据(更高效的方式)
    INSERT INTO users (id, name, email, isActive)
    SELECT 
        (value->>'id')::INTEGER,
        value->>'name',
        value->>'email',
        (value->>'isActive')::BOOLEAN
    FROM json_array_elements(p_json_data::json->'users');
    
    -- 批量插入产品数据(更高效的方式)
    INSERT INTO products (productId, productName, price, category)
    SELECT 
        value->>'productId',
        value->>'productName',
        (value->>'price')::DECIMAL,
        value->>'category'
    FROM json_array_elements(p_json_data::json->'products');
    
    -- 提交事务
    RETURN 1; -- 成功返回1
    
EXCEPTION
    WHEN OTHERS THEN
        -- 回滚事务(由于是异常块,会自动回滚)
        RAISE NOTICE '插入数据时发生错误: %', SQLERRM;
        RETURN 0; -- 失败返回0
END;

END;
$$;
4. 使用示例
-- 调用存储函数
DO $$
DECLARE
result INTEGER;
json_data TEXT;
BEGIN
json_data := '{
"users": [
{
"id": 1,
"name": "张三",
"email": "zhangsan@example.com",
"isActive": true
},
{
"id": 2,
"name": "李四",
"email": "lisi@example.com",
"isActive": false
},
{
"id": 3,
"name": "王五",
"email": "wangwu@example.com",
"isActive": true
}
],
"products": [
{
"productId": "P001",
"productName": "笔记本电脑",
"price": 5999.99,
"category": "电子产品"
},
{
"productId": "P002",
"productName": "办公椅",
"price": 899.5,
"category": "家具"
}
]
}';

result := InsertFromJSON(json_data);
RAISE NOTICE '执行结果: %', result;

END
$$;

-- 或者直接调用
SELECT InsertFromJSON('{"users":[...],"products":[...]}');
5. 验证插入结果
-- 查看用户表数据
SELECT * FROM users;

-- 查看产品表数据
SELECT * FROM products;
PostgreSQL版本特点:
使用函数而非存储过程:PostgreSQL中通常使用函数来返回值
JSON处理语法:使用 ::json转换,->>提取文本值
json_array_elements函数:用于展开JSON数组
异常处理:使用EXCEPTION块捕获错误
批量插入:推荐版本使用更高效的批量插入方式
这个PostgreSQL版本的函数能够正确处理JSON数据,并返回1(成功)或0(失败)。
如何json_data是程序调用的给的值 如何调用InsertFromJSON存储过程
在PostgreSQL中,程序调用存储过程(函数)有几种方式。以下是详细的调用方法:

  1. 在SQL中直接调用
    -- 方式1:直接传递JSON字符串
    SELECT InsertFromJSON('{
    "users": [
    {
    "id": 1,
    "name": "张三",
    "email": "zhangsan@example.com",
    "isActive": true
    }
    ],
    "products": [
    {
    "productId": "P001",
    "productName": "笔记本电脑",
    "price": 5999.99,
    "category": "电子产品"
    }
    ]
    }');

-- 方式2:使用变量
DO $$
DECLARE
result INTEGER;
json_data TEXT;
BEGIN
json_data := '{"users": [{"id": 1, "name": "张三", "email": "zhangsan@example.com", "isActive": true}], "products": [{"productId": "P001", "productName": "笔记本电脑", "price": 5999.99, "category": "电子产品"}]}';
result := InsertFromJSON(json_data);
RAISE NOTICE '执行结果: %', result;
END
$$;
2. 在应用程序中调用(不同语言示例)
Python (使用psycopg2)
import psycopg2
import json

def call_insert_from_json(json_data):
try:
# 连接数据库
conn = psycopg2.connect(
host="localhost",
database="your_database",
user="your_username",
password="your_password"
)
cursor = conn.cursor()

    # 调用存储函数
    cursor.execute("SELECT InsertFromJSON(%s)", (json.dumps(json_data),))
    result = cursor.fetchone()[0]
    
    conn.commit()
    print(f"执行结果: {result}")
    
    return result
    
except Exception as e:
    print(f"错误: {e}")
    return 0
finally:
    if conn:
        cursor.close()
        conn.close()

使用示例

data = {
"users": [
{
"id": 1,
"name": "张三",
"email": "zhangsan@example.com",
"isActive": True
}
],
"products": [
{
"productId": "P001",
"productName": "笔记本电脑",
"price": 5999.99,
"category": "电子产品"
}
]
}

result = call_insert_from_json(data)
Java (使用JDBC)
import java.sql.*;
import org.json.JSONObject;

public class PostgresJSONExample {
public static int callInsertFromJSON(String jsonData) {
String url = "jdbc:postgresql://localhost:5432/your_database";
String user = "your_username";
String password = "your_password";

    try (Connection conn = DriverManager.getConnection(url, user, password)) {
        String sql = "SELECT InsertFromJSON(?)";
        try (PreparedStatement pstmt = conn.prepareStatement(sql)) {
            pstmt.setString(1, jsonData);
            try (ResultSet rs = pstmt.executeQuery()) {
                if (rs.next()) {
                    int result = rs.getInt(1);
                    System.out.println("执行结果: " + result);
                    return result;
                }
            }
        }
    } catch (SQLException e) {
        e.printStackTrace();
    }
    return 0;
}

public static void main(String[] args) {
    JSONObject data = new JSONObject();
    // 构建JSON数据...
    callInsertFromJSON(data.toString());
}

}
Node.js (使用pg库)
const { Client } = require('pg');

async function callInsertFromJSON(jsonData) {
const client = new Client({
host: 'localhost',
database: 'your_database',
user: 'your_username',
password: 'your_password'
});

try {
    await client.connect();
    
    const result = await client.query(
        'SELECT InsertFromJSON($1)',
        [JSON.stringify(jsonData)]
    );
    
    console.log('执行结果:', result.rows[0].insertfromjson);
    return result.rows[0].insertfromjson;
    
} catch (error) {
    console.error('错误:', error);
    return 0;
} finally {
    await client.end();
}

}

// 使用示例
const data = {
users: [
{
id: 1,
name: "张三",
email: "zhangsan@example.com",
isActive: true
}
],
products: [
{
productId: "P001",
productName: "笔记本电脑",
price: 5999.99,
"category": "电子产品"
}
]
};

callInsertFromJSON(data);
PHP (使用PDO)

setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $stmt = $pdo->prepare("SELECT InsertFromJSON(?)"); $stmt->execute([json_encode($jsonData)]); $result = $stmt->fetchColumn(); echo "执行结果: " . $result . "\n"; return $result; } catch (PDOException $e) { echo "错误: " . $e->getMessage() . "\n"; return 0; } } // 使用示例 $data = [ 'users' => [ [ 'id' => 1, 'name' => '张三', 'email' => 'zhangsan@example.com', 'isActive' => true ] ], 'products' => [ [ 'productId' => 'P001', 'productName' => '笔记本电脑', 'price' => 5999.99, 'category' => '电子产品' ] ] ]; callInsertFromJSON($data); ?>
  1. 重要注意事项
    JSON字符串转义:确保JSON字符串正确转义,特别是包含引号时
    字符编码:确保使用UTF-8编码以避免中文乱码
    事务处理:函数内部已经包含事务,应用程序中不需要额外的事务控制
    错误处理:检查返回值,1表示成功,0表示失败
  2. 简化调用示例
    如果JSON数据已经在程序中构建好对象,只需要:

Python简化示例

import json

data = {"users": [...], "products": [...]}
result = call_insert_from_json(data)
这样,您的程序就可以方便地调用PostgreSQL存储函数来处理JSON数据了。
如何p_json_data TEXT 的传值很大的时候 会有什么影响吗?
当 p_json_data TEXT参数传递很大的JSON数据时,确实会有一些影响和考虑因素:

  1. 性能影响
    内存使用
    -- 大JSON会占用更多数据库内存
    -- 解析大JSON时,PostgreSQL需要:
    -- 1. 分配内存存储整个JSON字符串
    -- 2. 解析JSON结构
    -- 3. 创建临时数据结构
    处理时间
    JSON解析时间:大JSON的解析时间会线性增长
    数据库操作:大量INSERT操作需要更多时间

  2. 改进方案:分批处理
    方案1:修改函数支持分批处理
    CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
    p_json_data TEXT,
    p_batch_size INTEGER DEFAULT 100 -- 每批处理数量
    )
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    users_count INTEGER;
    products_count INTEGER;
    i INTEGER := 0;
    BEGIN
    -- 获取用户和产品数量
    SELECT COUNT(*) INTO users_count
    FROM json_array_elements(p_json_data::json->'users');

    SELECT COUNT(*) INTO products_count
    FROM json_array_elements(p_json_data::json->'products');

    RAISE NOTICE '总数据量: 用户 %, 产品 %', users_count, products_count;

    -- 分批处理用户数据
    FOR i IN 0..(users_count/p_batch_size) LOOP
    INSERT INTO users (id, name, email, isActive)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN
    FROM json_array_elements(p_json_data::json->'users')
    LIMIT p_batch_size OFFSET i * p_batch_size;

     RAISE NOTICE '已处理用户批次 %', i+1;
    

    END LOOP;

    -- 分批处理产品数据(类似逻辑)
    -- ...

    RETURN 1;
    END;
    $$;
    方案2:使用游标处理大JSON
    CREATE OR REPLACE FUNCTION InsertFromJSONStream(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    user_item JSON;
    product_item JSON;
    processed_count INTEGER := 0;
    batch_size INTEGER := 500; -- 每批提交数量
    BEGIN
    -- 处理用户数据
    FOR user_item IN
    SELECT * FROM json_array_elements(p_json_data::json->'users')
    LOOP
    INSERT INTO users (id, name, email, isActive)
    VALUES (
    (user_item->>'id')::INTEGER,
    user_item->>'name',
    user_item->>'email',
    (user_item->>'isActive')::BOOLEAN
    );

     processed_count := processed_count + 1;
     
     -- 每处理一定数量提交一次
     IF processed_count % batch_size = 0 THEN
         COMMIT;
         RAISE NOTICE '已提交 % 条用户记录', processed_count;
     END IF;
    

    END LOOP;

    -- 重置计数器处理产品数据
    processed_count := 0;
    FOR product_item IN
    SELECT * FROM json_array_elements(p_json_data::json->'products')
    LOOP
    INSERT INTO products (productId, productName, price, category)
    VALUES (
    product_item->>'productId',
    product_item->>'productName',
    (product_item->>'price')::DECIMAL,
    product_item->>'category'
    );

     processed_count := processed_count + 1;
     
     IF processed_count % batch_size = 0 THEN
         COMMIT;
         RAISE NOTICE '已提交 % 条产品记录', processed_count;
     END IF;
    

    END LOOP;

    RETURN 1;
    EXCEPTION
    WHEN OTHERS THEN
    ROLLBACK;
    RAISE NOTICE '处理失败: %', SQLERRM;
    RETURN 0;
    END;
    $$;

  3. 参数大小限制
    PostgreSQL参数限制
    -- 查看当前设置
    SHOW max_allowed_packet; -- MySQL的类似概念
    SHOW shared_buffers; -- 共享缓冲区大小

-- PostgreSQL实际限制:
-- TEXT类型理论上最大1GB,但实际受以下限制:
-- 1. max_allowed_packet(如果通过网络传输)
-- 2. 内存大小
-- 3. 客户端/服务器配置
4. 替代方案:外部文件或流式处理
方案1:从文件读取
CREATE OR REPLACE FUNCTION InsertFromJSONFile(file_path TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
file_content TEXT;
BEGIN
-- 使用pg_read_file读取文件(需要超级用户权限)
file_content := pg_read_file(file_path);
RETURN InsertFromJSON(file_content);
END;
$$;
方案2:使用COPY命令(推荐用于大数据量)
-- 先将JSON保存为文件,然后使用COPY
COPY users FROM '/path/to/users.csv' WITH CSV;
COPY products FROM '/path/to/products.csv' WITH CSV;
5. 应用程序端优化
Python示例:分批发送
import json
import psycopg2

def insert_large_json_batch(data, batch_size=1000):
"""分批处理大JSON数据"""
conn = psycopg2.connect("your_connection_string")
cursor = conn.cursor()

users = data.get('users', [])
products = data.get('products', [])

# 分批处理用户数据
for i in range(0, len(users), batch_size):
    batch = users[i:i + batch_size]
    batch_data = {'users': batch, 'products': []}
    
    cursor.execute("SELECT InsertFromJSON(%s)", (json.dumps(batch_data),))
    conn.commit()
    print(f"已处理用户批次 {i//batch_size + 1}")

# 分批处理产品数据(类似)
for i in range(0, len(products), batch_size):
    batch = products[i:i + batch_size]
    batch_data = {'users': [], 'products': batch}
    
    cursor.execute("SELECT InsertFromJSON(%s)", (json.dumps(batch_data),))
    conn.commit()
    print(f"已处理产品批次 {i//batch_size + 1}")

cursor.close()
conn.close()
  1. 建议的最佳实践
    数据量评估:
    < 10MB:可以直接传递
    10MB - 100MB:考虑分批处理
    100MB:建议使用文件或流式处理
    监控指标:
    -- 监控内存使用
    SELECT * FROM pg_stat_activity WHERE state = 'active';

-- 监控锁等待
SELECT * FROM pg_locks WHERE granted = false;
配置优化:
-- 增加work_mem(需要重启)
-- 在postgresql.conf中设置:
-- work_mem = 100MB
-- shared_buffers = 1GB
总结:对于大JSON数据,建议使用分批处理或文件导入的方式,避免单次传输和处理过大的数据量。
如果传的json串 有2~3万个对象 这样算大吗?
2~3万个对象的JSON数据算是比较大的数据量了。让我从几个方面来分析:

  1. 数据量估算
    -- 估算JSON大小
    -- 假设每个用户对象约100字节,每个产品对象约80字节
    -- 2万个用户: 20,000 × 100字节 ≈ 2MB
    -- 3万个用户: 30,000 × 100字节 ≈ 3MB
    -- 加上产品数据,总大小可能在3-6MB左右

  2. 潜在问题
    性能影响
    -- 单次插入2-3万条记录的问题:
    -- 1. 长事务:执行时间可能达到数秒到数十秒
    -- 2. 锁竞争:长时间锁定表影响其他操作
    -- 3. 内存压力:解析大JSON占用较多内存
    -- 4. WAL日志:产生大量写前日志
    实际测试示例
    -- 假设处理3万条记录
    DO $$
    DECLARE
    start_time TIMESTAMP;
    end_time TIMESTAMP;
    result INTEGER;
    big_json TEXT;
    BEGIN
    start_time := clock_timestamp();

    -- 生成测试数据(3万条)
    SELECT json_build_object(
    'users', (SELECT json_agg(json_build_object(
    'id', g,
    'name', '用户' || g,
    'email', 'user' || g || '@example.com',
    'isActive', true
    )) FROM generate_series(1, 30000) g)
    ) INTO big_json;

    result := InsertFromJSON(big_json);
    end_time := clock_timestamp();

    RAISE NOTICE '处理3万条记录耗时: %', end_time - start_time;
    RAISE NOTICE '执行结果: %', result;
    END
    $$;

  3. 改进方案(针对2-3万数据量)
    方案1:优化现有函数(推荐)
    CREATE OR REPLACE FUNCTION InsertFromJSONLarge(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    batch_size CONSTANT INTEGER := 1000; -- 每批1000条
    total_users INTEGER;
    total_products INTEGER;
    i INTEGER;
    BEGIN
    -- 获取总记录数
    SELECT COUNT(*) INTO total_users
    FROM json_array_elements(p_json_data::json->'users');

    SELECT COUNT(*) INTO total_products
    FROM json_array_elements(p_json_data::json->'products');

    RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;

    -- 清空表(在事务外执行,减少锁时间)
    TRUNCATE users, products RESTART IDENTITY;

    -- 分批插入用户数据
    FOR i IN 0..((total_users - 1) / batch_size) LOOP
    INSERT INTO users (id, name, email, isActive)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN
    FROM (
    SELECT value, row_number() OVER () as rn
    FROM json_array_elements(p_json_data::json->'users')
    ) t
    WHERE rn BETWEEN (i * batch_size + 1) AND ((i + 1) * batch_size);

     IF i % 10 = 0 THEN  -- 每10批输出一次进度
         RAISE NOTICE '用户数据进度: %/%', 
             LEAST((i + 1) * batch_size, total_users), total_users;
     END IF;
    

    END LOOP;

    -- 分批插入产品数据(类似逻辑)
    FOR i IN 0..((total_products - 1) / batch_size) LOOP
    INSERT INTO products (productId, productName, price, category)
    SELECT
    value->>'productId',
    value->>'productName',
    (value->>'price')::DECIMAL,
    value->>'category'
    FROM (
    SELECT value, row_number() OVER () as rn
    FROM json_array_elements(p_json_data::json->'products')
    ) t
    WHERE rn BETWEEN (i * batch_size + 1) AND ((i + 1) * batch_size);

     IF i % 10 = 0 THEN
         RAISE NOTICE '产品数据进度: %/%', 
             LEAST((i + 1) * batch_size, total_products), total_products;
     END IF;
    

    END LOOP;

    RETURN 1;
    EXCEPTION
    WHEN OTHERS THEN
    RAISE NOTICE '处理失败: %', SQLERRM;
    RETURN 0;
    END;
    $$;
    方案2:使用COPY命令(最高效)
    -- 先将JSON转换为临时表,再用COPY
    CREATE OR REPLACE FUNCTION InsertFromJSONCopy(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    BEGIN
    -- 创建临时表存放用户数据
    CREATE TEMP TABLE temp_users AS
    SELECT
    (value->>'id')::INTEGER as id,
    value->>'name' as name,
    value->>'email' as email,
    (value->>'isActive')::BOOLEAN as is_active
    FROM json_array_elements(p_json_data::json->'users');

    -- 使用COPY从临时表插入(最快的方式)
    INSERT INTO users (id, name, email, isActive)
    SELECT id, name, email, is_active FROM temp_users;

    -- 类似处理产品数据...

    DROP TABLE temp_users;
    RETURN 1;
    END;
    $$;

  4. 应用程序端优化建议
    Python分批处理示例
    def insert_large_data_safely(json_data, batch_size=2000):
    """安全处理2-3万条数据"""
    users = json_data.get('users', [])
    products = json_data.get('products', [])

    print(f"总数据量: {len(users)}用户, {len(products)}产品")

    分批处理用户数据

    for i in range(0, len(users), batch_size):
    batch = users[i:i + batch_size]
    batch_data = {'users': batch, 'products': []}

     # 每次处理较小的批次
     result = call_insert_function(json.dumps(batch_data))
     if result == 0:
         print(f"第{i//batch_size + 1}批处理失败")
         return 0
     
     print(f"用户数据进度: {min(i + batch_size, len(users))}/{len(users)}")
    

    类似处理产品数据...

    return 1

  5. 性能优化配置
    -- 临时调整配置提升大数据量插入性能
    -- 在函数开始时设置(需要权限)
    SET LOCAL work_mem = '100MB';
    SET LOCAL maintenance_work_mem = '256MB';
    SET LOCAL max_parallel_workers_per_gather = 4;

  6. 监控建议
    -- 在执行前后监控
    SELECT
    now() as check_time,
    pg_size_pretty(pg_database_size(current_database())) as db_size,
    (SELECT count() FROM users) as user_count,
    (SELECT count(
    ) FROM products) as product_count;
    总结建议
    对于2-3万个对象的数据量:
    使用分批处理:每批1000-2000条记录
    添加进度监控:实时显示处理进度
    考虑使用COPY命令:最高效的批量插入方式
    应用程序分批调用:避免单次传输过大JSON
    做好超时处理:设置合理的超时时间
    这样的数据量在PostgreSQL中是可以处理的,但需要合理的优化策略。
    你的实例
    -- 假设处理3万条记录
    DO $$
    DECLARE
    start_time TIMESTAMP;
    end_time TIMESTAMP;
    result INTEGER;
    big_json TEXT;
    BEGIN
    start_time := clock_timestamp();

    -- 生成测试数据(3万条)
    SELECT json_build_object(
    'users', (SELECT json_agg(json_build_object(
    'id', g,
    'name', '用户' || g,
    'email', 'user' || g || '@example.com',
    'isActive', true
    )) FROM generate_series(1, 30000) g)
    ) INTO big_json;

    result := InsertFromJSON(big_json);
    end_time := clock_timestamp();

    RAISE NOTICE '处理3万条记录耗时: %', end_time - start_time;
    RAISE NOTICE '执行结果: %', result;
    END
    $$;

为什么只插入到user表 没有插入到product是怎么控制的?
您观察得很仔细!在我的测试示例中确实只插入了用户表,没有插入产品表。这是通过JSON结构控制的:

  1. 当前代码的问题
    SELECT json_build_object(
    'users', (SELECT json_agg(...) FROM generate_series(1, 30000) g)
    -- 这里缺少了'products'字段!
    ) INTO big_json;

  2. 修正后的完整版本
    -- 完整的测试代码(包含用户和产品表)
    DO $$
    DECLARE
    start_time TIMESTAMP;
    end_time TIMESTAMP;
    result INTEGER;
    big_json TEXT;
    BEGIN
    start_time := clock_timestamp();

    -- 生成测试数据(3万条用户 + 1万条产品)
    SELECT json_build_object(
    'users', (
    SELECT json_agg(json_build_object(
    'id', g,
    'name', '用户' || g,
    'email', 'user' || g || '@example.com',
    'isActive', CASE WHEN g % 10 != 0 THEN true ELSE false END
    )) FROM generate_series(1, 30000) g
    ),
    'products', (
    SELECT json_agg(json_build_object(
    'productId', 'P' || lpad(g::text, 5, '0'),
    'productName', '产品' || g,
    'price', (random() * 10000)::numeric(10,2),
    'category', CASE
    WHEN g % 4 = 0 THEN '电子产品'
    WHEN g % 4 = 1 THEN '家具'
    WHEN g % 4 = 2 THEN '服装'
    ELSE '食品'
    END
    )) FROM generate_series(1, 10000) g
    )
    ) INTO big_json;

    result := InsertFromJSON(big_json);
    end_time := clock_timestamp();

    RAISE NOTICE '处理3万用户 + 1万产品记录耗时: %', end_time - start_time;
    RAISE NOTICE '执行结果: %', result;

    -- 验证插入结果
    RAISE NOTICE '用户表记录数: %', (SELECT count() FROM users);
    RAISE NOTICE '产品表记录数: %', (SELECT count(
    ) FROM products);
    END
    $$;

  3. 控制插入的逻辑
    插入到哪张表是由存储函数中的JSON解析逻辑控制的:
    -- 在InsertFromJSON函数中:
    -- 这部分控制插入users表
    INSERT INTO users (id, name, email, isActive)
    SELECT ... FROM json_array_elements(p_json_data::json->'users');

-- 这部分控制插入products表
INSERT INTO products (productId, productName, price, category)
SELECT ... FROM json_array_elements(p_json_data::json->'products');
4. 更灵活的控制方式
如果您想要更灵活地控制插入哪些表,可以修改函数:
方案1:添加控制参数
CREATE OR REPLACE FUNCTION InsertFromJSON(
p_json_data TEXT,
p_insert_users BOOLEAN DEFAULT true,
p_insert_products BOOLEAN DEFAULT true
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 只插入用户数据
IF p_insert_users THEN
INSERT INTO users (id, name, email, isActive)
SELECT ... FROM json_array_elements(p_json_data::json->'users');
END IF;

-- 只插入产品数据
IF p_insert_products THEN
    INSERT INTO products (productId, productName, price, category)
    SELECT ... FROM json_array_elements(p_json_data::json->'products');
END IF;

RETURN 1;

END;
$$;

-- 使用示例:只插入用户数据,不插入产品数据
SELECT InsertFromJSON(
'{"users": [...], "products": [...]}',
true, -- 插入用户
false -- 不插入产品
);
方案2:动态表名控制
CREATE OR REPLACE FUNCTION InsertFromJSONFlexible(
p_json_data TEXT,
p_table_name TEXT -- 'users' 或 'products' 或 'both'
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
IF p_table_name IN ('users', 'both') THEN
INSERT INTO users (id, name, email, isActive)
SELECT ... FROM json_array_elements(p_json_data::json->'users');
END IF;

IF p_table_name IN ('products', 'both') THEN
    INSERT INTO products (productId, productName, price, category)
    SELECT ... FROM json_array_elements(p_json_data::json->'products');
END IF;

RETURN 1;

END;
$$;
5. 测试不同场景
-- 测试1:只插入用户数据
DO $$
DECLARE
test_json TEXT := '{"users": [{"id": 1, "name": "测试用户"}], "products": []}';
BEGIN
PERFORM InsertFromJSON(test_json, true, false);
RAISE NOTICE '只插入了用户数据';
END
$$;

-- 测试2:只插入产品数据
DO $$
DECLARE
test_json TEXT = '{"users": [], "products": [{"productId": "P001", "productName": "测试产品"}]}';
BEGIN
PERFORM InsertFromJSON(test_json, false, true);
RAISE NOTICE '只插入了产品数据';
END
$$;
总结:插入到哪张表是由JSON中的键名('users'或'products')和存储函数中的解析逻辑共同控制的。我的原始示例缺少了'products'键,所以只插入了用户表。
现在做了更改 user表是insert product表根据主键productId进行更新
好的!我来修改存储函数,实现用户表插入、产品表根据主键更新的逻辑。

  1. 修改后的存储函数
    CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    BEGIN
    -- 开始事务
    BEGIN
    -- 插入用户数据(保持原来的INSERT逻辑)
    INSERT INTO users (id, name, email, isActive)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN
    FROM json_array_elements(p_json_data::json->'users');

     -- 产品表:根据productId进行UPSERT(更新或插入)
     INSERT INTO products (productId, productName, price, category, updated_at)
     SELECT 
         value->>'productId',
         value->>'productName',
         (value->>'price')::DECIMAL,
         value->>'category',
         CURRENT_TIMESTAMP
     FROM json_array_elements(p_json_data::json->'products')
     ON CONFLICT (productId) 
     DO UPDATE SET
         productName = EXCLUDED.productName,
         price = EXCLUDED.price,
         category = EXCLUDED.category,
         updated_at = CURRENT_TIMESTAMP;
     
     RETURN 1; -- 成功返回1
    

    EXCEPTION
    WHEN OTHERS THEN
    -- 回滚事务
    RAISE NOTICE '操作失败: %', SQLERRM;
    RETURN 0; -- 失败返回0
    END;
    END;
    $$;

  2. 分批处理版本(针对大数据量)
    CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
    p_json_data TEXT,
    p_batch_size INTEGER DEFAULT 1000
    )
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    total_users INTEGER;
    total_products INTEGER;
    i INTEGER;
    BEGIN
    -- 获取总记录数
    SELECT COUNT(*) INTO total_users
    FROM json_array_elements(p_json_data::json->'users');

    SELECT COUNT(*) INTO total_products
    FROM json_array_elements(p_json_data::json->'products');

    RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;

    -- 分批插入用户数据
    FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
    INSERT INTO users (id, name, email, isActive)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN
    FROM (
    SELECT value, row_number() OVER () as rn
    FROM json_array_elements(p_json_data::json->'users')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);

     IF i % 10 = 0 THEN
         RAISE NOTICE '用户数据进度: %/%', 
             LEAST((i + 1) * p_batch_size, total_users), total_users;
     END IF;
    

    END LOOP;

    -- 分批UPSERT产品数据
    FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT
    value->>'productId',
    value->>'productName',
    (value->>'price')::DECIMAL,
    value->>'category',
    CURRENT_TIMESTAMP
    FROM (
    SELECT value, row_number() OVER () as rn
    FROM json_array_elements(p_json_data::json->'products')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
    ON CONFLICT (productId)
    DO UPDATE SET
    productName = EXCLUDED.productName,
    price = EXCLUDED.price,
    category = EXCLUDED.category,
    updated_at = CURRENT_TIMESTAMP;

     IF i % 10 = 0 THEN
         RAISE NOTICE '产品数据进度: %/%', 
             LEAST((i + 1) * p_batch_size, total_products), total_products;
     END IF;
    

    END LOOP;

    RETURN 1;
    EXCEPTION
    WHEN OTHERS THEN
    RAISE NOTICE '处理失败: %', SQLERRM;
    RETURN 0;
    END;
    $$;

  3. 测试更新逻辑
    -- 先插入一些测试数据
    INSERT INTO products (productId, productName, price, category) VALUES
    ('P001', '旧产品名称', 100.00, '旧分类'),
    ('P002', '另一个产品', 200.00, '电子产品');

-- 测试更新逻辑
DO $$
DECLARE
test_json TEXT;
result INTEGER;
BEGIN
test_json := '{
"users": [
{"id": 1, "name": "用户1", "email": "user1@test.com", "isActive": true}
],
"products": [
{"productId": "P001", "productName": "新产品名称", "price": 150.00, "category": "新分类"},
{"productId": "P003", "productName": "新增产品", "price": 300.00, "category": "新增分类"}
]
}';

result := InsertFromJSON(test_json);
RAISE NOTICE '执行结果: %', result;

-- 查看更新结果
RAISE NOTICE '产品表数据:';
FOR rec IN SELECT * FROM products ORDER BY productId LOOP
    RAISE NOTICE '  %: % - 价格: %, 分类: %, 更新时间: %', 
        rec.productid, rec.productname, rec.price, rec.category, rec.updated_at;
END LOOP;

END
$$;
4. 更详细的UPSERT版本(包含更多控制)
CREATE OR REPLACE FUNCTION InsertFromJSONDetailed(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
inserted_count INTEGER := 0;
updated_count INTEGER := 0;
BEGIN
-- 插入用户数据
WITH user_inserts AS (
INSERT INTO users (id, name, email, isActive)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id) DO NOTHING -- 如果用户ID冲突,跳过
RETURNING 1
)
SELECT COUNT(*) INTO inserted_count FROM user_inserts;

-- UPSERT产品数据,并统计操作类型
WITH product_upserts AS (
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT 
        value->>'productId',
        value->>'productName',
        (value->>'price')::DECIMAL,
        value->>'category',
        CURRENT_TIMESTAMP
    FROM json_array_elements(p_json_data::json->'products')
    ON CONFLICT (productId) 
    DO UPDATE SET
        productName = EXCLUDED.productName,
        price = EXCLUDED.price,
        category = EXCLUDED.category,
        updated_at = CURRENT_TIMESTAMP
    RETURNING 
        (xmax = 0) AS is_insert  -- xmax=0表示插入,否则表示更新
)
SELECT 
    COUNT(*) FILTER (WHERE is_insert) AS inserts,
    COUNT(*) FILTER (WHERE NOT is_insert) AS updates
INTO inserted_count, updated_count
FROM product_upserts;

RAISE NOTICE '用户插入: % 条, 产品插入: % 条, 产品更新: % 条', 
    inserted_count, inserted_count, updated_count;

RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
5. 使用示例
-- 测试大数据量更新
DO $$
DECLARE
big_json TEXT;
result INTEGER;
BEGIN
-- 生成包含更新和新增的测试数据
SELECT json_build_object(
'users', (
SELECT json_agg(json_build_object(
'id', g,
'name', '用户' || g,
'email', 'user' || g || '@example.com',
'isActive', true
)) FROM generate_series(1, 1000) g
),
'products', (
SELECT json_agg(json_build_object(
'productId', 'P' || lpad((g % 100)::text, 3, '0'), -- 只有100个不同产品,测试更新
'productName', '产品' || g,
'price', (random() * 1000)::numeric(10,2),
'category', CASE g % 4
WHEN 0 THEN '电子产品'
WHEN 1 THEN '家具'
ELSE '其他'
END
)) FROM generate_series(1, 1000) g
)
) INTO big_json;

result := InsertFromJSON(big_json);
RAISE NOTICE '执行结果: %', result;

END
$$;
6. 验证结果
-- 查看操作统计
SELECT
(SELECT COUNT() FROM users) as total_users,
(SELECT COUNT(
) FROM products) as total_products,
(SELECT COUNT(DISTINCT productId) FROM products) as distinct_products;

-- 查看产品的更新时间分布
SELECT
COUNT(*) as product_count,
DATE(updated_at) as update_date
FROM products
GROUP BY DATE(updated_at)
ORDER BY update_date DESC;
主要变更点:
用户表:保持INSERT逻辑(如果ID冲突可以使用ON CONFLICT DO NOTHING)
产品表:使用ON CONFLICT DO UPDATE实现UPSERT
更新时间:自动记录更新时间戳
分批处理:支持大数据量的分批UPSERT
这样就能实现用户表插入、产品表根据主键更新的需求了。
user表如果主键重复的改为更新 没有重复的使用insert merge into
好的!我来修改存储函数,使用MERGE INTO(在PostgreSQL中对应的是INSERT ... ON CONFLICT)来实现用户表的UPSERT操作。

  1. 修改后的存储函数(使用UPSERT)
    CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    BEGIN
    -- 开始事务
    BEGIN
    -- 用户表:根据id进行UPSERT(更新或插入)
    INSERT INTO users (id, name, email, isActive, updated_at)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN,
    CURRENT_TIMESTAMP
    FROM json_array_elements(p_json_data::json->'users')
    ON CONFLICT (id)
    DO UPDATE SET
    name = EXCLUDED.name,
    email = EXCLUDED.email,
    isActive = EXCLUDED.isActive,
    updated_at = CURRENT_TIMESTAMP;

     -- 产品表:根据productId进行UPSERT
     INSERT INTO products (productId, productName, price, category, updated_at)
     SELECT 
         value->>'productId',
         value->>'productName',
         (value->>'price')::DECIMAL,
         value->>'category',
         CURRENT_TIMESTAMP
     FROM json_array_elements(p_json_data::json->'products')
     ON CONFLICT (productId) 
     DO UPDATE SET
         productName = EXCLUDED.productName,
         price = EXCLUDED.price,
         category = EXCLUDED.category,
         updated_at = CURRENT_TIMESTAMP;
     
     RETURN 1; -- 成功返回1
    

    EXCEPTION
    WHEN OTHERS THEN
    -- 回滚事务
    RAISE NOTICE '操作失败: %', SQLERRM;
    RETURN 0; -- 失败返回0
    END;
    END;
    $$;

  2. 分批处理版本
    CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
    p_json_data TEXT,
    p_batch_size INTEGER DEFAULT 1000
    )
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    total_users INTEGER;
    total_products INTEGER;
    i INTEGER;
    user_inserts INTEGER := 0;
    user_updates INTEGER := 0;
    product_inserts INTEGER := 0;
    product_updates INTEGER := 0;
    BEGIN
    -- 获取总记录数
    SELECT COUNT(*) INTO total_users
    FROM json_array_elements(p_json_data::json->'users');

    SELECT COUNT(*) INTO total_products
    FROM json_array_elements(p_json_data::json->'products');

    RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;

    -- 分批UPSERT用户数据
    FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
    WITH user_batch AS (
    INSERT INTO users (id, name, email, isActive, updated_at)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN,
    CURRENT_TIMESTAMP
    FROM (
    SELECT value, row_number() OVER () as rn
    FROM json_array_elements(p_json_data::json->'users')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
    ON CONFLICT (id)
    DO UPDATE SET
    name = EXCLUDED.name,
    email = EXCLUDED.email,
    isActive = EXCLUDED.isActive,
    updated_at = CURRENT_TIMESTAMP
    RETURNING (xmax = 0) as is_insert
    )
    SELECT
    COUNT() FILTER (WHERE is_insert),
    COUNT(
    ) FILTER (WHERE NOT is_insert)
    INTO user_inserts, user_updates
    FROM user_batch;

     IF i % 10 = 0 THEN
         RAISE NOTICE '用户数据进度: %/% (插入:%, 更新:%)', 
             LEAST((i + 1) * p_batch_size, total_users), total_users, 
             user_inserts, user_updates;
     END IF;
    

    END LOOP;

    -- 分批UPSERT产品数据
    FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
    WITH product_batch AS (
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT
    value->>'productId',
    value->>'productName',
    (value->>'price')::DECIMAL,
    value->>'category',
    CURRENT_TIMESTAMP
    FROM (
    SELECT value, row_number() OVER () as rn
    FROM json_array_elements(p_json_data::json->'products')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
    ON CONFLICT (productId)
    DO UPDATE SET
    productName = EXCLUDED.productName,
    price = EXCLUDED.price,
    category = EXCLUDED.category,
    updated_at = CURRENT_TIMESTAMP
    RETURNING (xmax = 0) as is_insert
    )
    SELECT
    COUNT() FILTER (WHERE is_insert),
    COUNT(
    ) FILTER (WHERE NOT is_insert)
    INTO product_inserts, product_updates
    FROM product_batch;

     IF i % 10 = 0 THEN
         RAISE NOTICE '产品数据进度: %/% (插入:%, 更新:%)', 
             LEAST((i + 1) * p_batch_size, total_products), total_products,
             product_inserts, product_updates;
     END IF;
    

    END LOOP;

    RAISE NOTICE '处理完成: 用户(插入:%, 更新:%), 产品(插入:%, 更新:%)',
    user_inserts, user_updates, product_inserts, product_updates;

    RETURN 1;
    EXCEPTION
    WHEN OTHERS THEN
    RAISE NOTICE '处理失败: %', SQLERRM;
    RETURN 0;
    END;
    $$;

  3. 测试UPSERT逻辑
    -- 先插入一些测试数据
    INSERT INTO users (id, name, email, isActive) VALUES
    (1, '旧用户1', 'old1@test.com', true),
    (2, '旧用户2', 'old2@test.com', false);

INSERT INTO products (productId, productName, price, category) VALUES
('P001', '旧产品1', 100.00, '旧分类'),
('P002', '旧产品2', 200.00, '电子产品');

-- 测试UPSERT逻辑
DO $$
DECLARE
test_json TEXT;
result INTEGER;
BEGIN
test_json := '{
"users": [
{"id": 1, "name": "新用户1", "email": "new1@test.com", "isActive": false},
{"id": 3, "name": "新增用户", "email": "new3@test.com", "isActive": true}
],
"products": [
{"productId": "P001", "productName": "新产品1", "price": 150.00, "category": "新分类"},
{"productId": "P003", "productName": "新增产品", "price": 300.00, "category": "新增分类"}
]
}';

result := InsertFromJSON(test_json);
RAISE NOTICE '执行结果: %', result;

-- 查看用户表结果
RAISE NOTICE '用户表数据:';
FOR rec IN SELECT * FROM users ORDER BY id LOOP
    RAISE NOTICE '  %: % - 邮箱: %, 状态: %, 更新时间: %', 
        rec.id, rec.name, rec.email, rec.isactive, rec.updated_at;
END LOOP;

-- 查看产品表结果
RAISE NOTICE '产品表数据:';
FOR rec IN SELECT * FROM products ORDER BY productid LOOP
    RAISE NOTICE '  %: % - 价格: %, 分类: %, 更新时间: %', 
        rec.productid, rec.productname, rec.price, rec.category, rec.updated_at;
END LOOP;

END
$$;
4. 更精细控制的版本
CREATE OR REPLACE FUNCTION InsertFromJSONAdvanced(
p_json_data TEXT,
p_user_strategy TEXT DEFAULT 'upsert', -- 'upsert', 'insert', 'update'
p_product_strategy TEXT DEFAULT 'upsert'
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 用户表处理
IF p_user_strategy = 'upsert' THEN
INSERT INTO users (id, name, email, isActive, updated_at)
SELECT
(value->>'id')::INTEGER,
value->>'name',
value->>'email',
(value->>'isActive')::BOOLEAN,
CURRENT_TIMESTAMP
FROM json_array_elements(p_json_data::json->'users')
ON CONFLICT (id)
DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = CURRENT_TIMESTAMP;

ELSIF p_user_strategy = 'insert' THEN
    INSERT INTO users (id, name, email, isActive)
    SELECT 
        (value->>'id')::INTEGER,
        value->>'name',
        value->>'email',
        (value->>'isActive')::BOOLEAN
    FROM json_array_elements(p_json_data::json->'users')
    ON CONFLICT (id) DO NOTHING;
    
ELSIF p_user_strategy = 'update' THEN
    UPDATE users SET 
        name = updates.name,
        email = updates.email,
        isActive = updates.isActive,
        updated_at = CURRENT_TIMESTAMP
    FROM (
        SELECT 
            (value->>'id')::INTEGER as id,
            value->>'name' as name,
            value->>'email' as email,
            (value->>'isActive')::BOOLEAN as isActive
        FROM json_array_elements(p_json_data::json->'users')
    ) AS updates
    WHERE users.id = updates.id;
END IF;

-- 产品表处理(类似逻辑)
IF p_product_strategy = 'upsert' THEN
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT 
        value->>'productId',
        value->>'productName',
        (value->>'price')::DECIMAL,
        value->>'category',
        CURRENT_TIMESTAMP
    FROM json_array_elements(p_json_data::json->'products')
    ON CONFLICT (productId) 
    DO UPDATE SET
        productName = EXCLUDED.productName,
        price = EXCLUDED.price,
        category = EXCLUDED.category,
        updated_at = CURRENT_TIMESTAMP;
-- ... 其他策略类似
END IF;

RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
5. 使用示例
-- 测试不同的操作策略
SELECT InsertFromJSONAdvanced(
'{"users": [{"id": 1, "name": "测试用户", "email": "test@test.com", "isActive": true}]}',
'upsert', -- 用户表使用UPSERT
'upsert' -- 产品表使用UPSERT
);

-- 只插入新用户,不更新已存在的
SELECT InsertFromJSONAdvanced(
'{"users": [{"id": 1, "name": "测试用户", "email": "test@test.com", "isActive": true}]}',
'insert', -- 只插入
'upsert'
);
6. 验证UPSERT结果
-- 查看详细的UPSERT统计
SELECT
'users' as table_name,
COUNT() as total_count,
COUNT(updated_at) as updated_count
FROM users
UNION ALL
SELECT
'products' as table_name,
COUNT(
) as total_count,
COUNT(updated_at) as updated_count
FROM products;

-- 查看最近更新的记录
SELECT 'users' as table_name, id as key, name, updated_at
FROM users
WHERE updated_at >= CURRENT_DATE
UNION ALL
SELECT 'products' as table_name, productId as key, productName, updated_at
FROM products
WHERE updated_at >= CURRENT_DATE
ORDER BY table_name, key;
主要变更点:
用户表:使用ON CONFLICT DO UPDATE实现UPSERT
产品表:同样使用UPSERT逻辑
更新时间:自动记录更新时间戳
分批处理:支持大数据量的分批UPSERT
操作统计:返回插入和更新的记录数量
现在两个表都使用UPSERT逻辑,主键重复的更新,不重复的插入。
上面的存储过程和sql语句在在PostgreSQL 9.2.4 兼容吗?
不兼容!PostgreSQL 9.2.4 与上面代码有严重的不兼容问题。让我详细说明:

  1. 主要不兼容点
    JSON 功能不兼容(最严重)
    -- PostgreSQL 9.2.4 没有这些JSON函数:
  • json_array_elements() -- 9.3+ 才引入
  • json_extract_path() -- 9.3+
  • json_build_object() -- 9.4+
  • jsonb 类型 -- 9.4+ 才引入
    UPSERT 功能不兼容
    -- ON CONFLICT DO UPDATE 是 9.5+ 的功能
    INSERT ... ON CONFLICT DO UPDATE -- 9.5+ 才支持
    其他语法不兼容
  • FILTER 子句 -- 9.4+ 才支持
  • psql 的 \gexec -- 9.6+ 才支持
  • 一些窗口函数优化 -- 9.2 功能有限
  1. PostgreSQL 9.2.4 兼容版本
    兼容的存储函数
    CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
    RETURNS INTEGER AS $$
    DECLARE
    users_json TEXT;
    products_json TEXT;
    user_rec RECORD;
    product_rec RECORD;
    user_data JSON;
    product_data JSON;
    BEGIN
    -- 9.2.4 需要手动解析JSON
    -- 提取users数组
    users_json := substring(p_json_data from '"users":[(.?)]');
    products_json := substring(p_json_data from '"products":[(.
    ?)]');

    -- 手动解析用户数据
    FOR user_rec IN
    SELECT * FROM regexp_split_to_table(
    substring(users_json from '^[(.*)]$'),
    '},{'
    ) AS user_obj
    LOOP
    -- 手动提取每个字段(简化版,实际需要更复杂的解析)
    INSERT INTO users (id, name, email, isActive)
    VALUES (
    (substring(user_rec.user_obj from '"id"😦\d+)'))::INTEGER,
    substring(user_rec.user_obj from '"name":"([^"]+)"'),
    substring(user_rec.user_obj from '"email":"([^"]+)"'),
    (substring(user_rec.user_obj from '"isActive":(true|false)'))::BOOLEAN
    );
    END LOOP;

    -- 类似解析产品数据...

    RETURN 1;
    EXCEPTION
    WHEN OTHERS THEN
    RAISE NOTICE '操作失败: %', SQLERRM;
    RETURN 0;
    END;
    $$ LANGUAGE plpgsql;
    更实用的方案:使用PL/Python(如果可用)
    -- 如果安装了PL/Python,可以这样处理
    CREATE OR REPLACE FUNCTION InsertFromJSONPython(p_json_data TEXT)
    RETURNS INTEGER AS $$
    import json
    import plpy

try:
data = json.loads(p_json_data)

# 处理用户数据
for user in data.get('users', []):
    plan = plpy.prepare("""
        INSERT INTO users (id, name, email, isActive) 
        VALUES ($1, $2, $3, $4)
    """, ["integer", "text", "text", "boolean"])
    
    plpy.execute(plan, [
        user['id'], 
        user['name'], 
        user['email'], 
        user['isActive']
    ])

# 处理产品数据(需要手动实现UPSERT)
for product in data.get('products', []):
    # 检查是否存在
    check_plan = plpy.prepare(
        "SELECT 1 FROM products WHERE productId = $1", 
        ["text"]
    )
    exists = plpy.execute(check_plan, [product['productId']])
    
    if exists:
        # 更新
        update_plan = plpy.prepare("""
            UPDATE products 
            SET productName = $1, price = $2, category = $3 
            WHERE productId = $4
        """, ["text", "numeric", "text", "text"])
        
        plpy.execute(update_plan, [
            product['productName'],
            float(product['price']),
            product['category'],
            product['productId']
        ])
    else:
        # 插入
        insert_plan = plpy.prepare("""
            INSERT INTO products (productId, productName, price, category) 
            VALUES ($1, $2, $3, $4)
        """, ["text", "text", "numeric", "text"])
        
        plpy.execute(insert_plan, [
            product['productId'],
            product['productName'],
            float(product['price']),
            product['category']
        ])

return 1

except Exception as e:
plpy.error("操作失败: " + str(e))
return 0
$$ LANGUAGE plpythonu;
3. 最兼容的方案:应用程序端解析
建议在应用程序中处理JSON解析:

Python示例

import psycopg2
import json

def insert_from_json_92(json_data, connection_string):
conn = psycopg2.connect(connection_string)
cursor = conn.cursor()

try:
    # 处理用户数据
    for user in json_data.get('users', []):
        # 手动实现UPSERT
        cursor.execute("""
            UPDATE users SET name = %s, email = %s, isActive = %s 
            WHERE id = %s;
            INSERT INTO users (id, name, email, isActive)
            SELECT %s, %s, %s, %s
            WHERE NOT EXISTS (SELECT 1 FROM users WHERE id = %s);
        """, (
            user['name'], user['email'], user['isActive'], user['id'],
            user['id'], user['name'], user['email'], user['isActive'], user['id']
        ))
    
    # 处理产品数据
    for product in json_data.get('products', []):
        cursor.execute("""
            UPDATE products SET productName = %s, price = %s, category = %s 
            WHERE productId = %s;
            INSERT INTO products (productId, productName, price, category)
            SELECT %s, %s, %s, %s
            WHERE NOT EXISTS (SELECT 1 FROM products WHERE productId = %s);
        """, (
            product['productName'], product['price'], product['category'], product['productId'],
            product['productId'], product['productName'], product['price'], product['category'], product['productId']
        ))
    
    conn.commit()
    return 1
    
except Exception as e:
    conn.rollback()
    print(f"错误: {e}")
    return 0
finally:
    cursor.close()
    conn.close()
  1. 升级建议
    强烈建议升级PostgreSQL版本,因为:
    安全风险:9.2.4 已于2017年停止支持,存在安全漏洞
    功能限制:缺少现代JSON处理功能
    性能问题:老版本性能较差
    当前支持的PostgreSQL版本:
    9.2:已停止支持
    9.6:延长支持结束
    12+:推荐使用
    14+:最新稳定版

  2. 如果必须使用9.2.4的临时方案
    -- 最简单的逐条处理(性能较差)
    CREATE OR REPLACE FUNCTION SimpleInsertFromJSON(p_json_data TEXT)
    RETURNS INTEGER AS $$
    DECLARE
    user_text TEXT;
    product_text TEXT;
    user_parts TEXT[];
    product_parts TEXT[];
    i INTEGER;
    BEGIN
    -- 非常基础的JSON解析(假设格式固定)
    user_text := substring(p_json_data from '"users":[(.?)]');
    product_text := substring(p_json_data from '"products":[(.
    ?)]');

    -- 分割用户对象(简单实现)
    user_parts := regexp_split_to_array(user_text, '},{');

    FOR i IN 1..array_length(user_parts, 1) LOOP
    -- 手动提取每个字段(极其繁琐且易错)
    -- 这里需要复杂的字符串解析...
    END LOOP;

    RETURN 1;
    EXCEPTION WHEN OTHERS THEN
    RETURN 0;
    END;
    $$ LANGUAGE plpgsql;
    总结:对于PostgreSQL 9.2.4,建议要么升级数据库版本,要么在应用程序中处理JSON解析,而不是在数据库函数中处理。
    如果是在openguass中有下面这些函数吗
    -- PostgreSQL 9.2.4 没有这些JSON函数:

  • json_array_elements() -- 9.3+ 才引入
  • json_extract_path() -- 9.3+
  • json_build_object() -- 9.4+
  • jsonb 类型 -- 9.4+ 才引入
    OpenGauss是基于PostgreSQL开发的,但JSON功能的支持情况有所不同。让我为您分析OpenGauss中的JSON函数支持情况:
  1. OpenGauss中的JSON函数支持
    OpenGauss 3.0+ 版本支持以下JSON函数:
    ✅ 支持的函数:
    -- 这些函数在OpenGauss中通常都支持
  • json_array_elements() -- ✅ 支持
  • json_extract_path() -- ✅ 支持(或类似功能)
  • json_build_object() -- ✅ 支持
  • jsonb 类型 -- ✅ 支持(OpenGauss有jsonb类型)
    OpenGauss特有的JSON函数:
    -- OpenGauss扩展的JSON函数
  • json_array_length() -- 获取JSON数组长度
  • json_each() -- 展开JSON对象为键值对
  • json_object_keys() -- 获取JSON对象的所有键
  • json_populate_record() -- 将JSON转换为记录
  • json_to_record() -- JSON转记录
  1. OpenGauss兼容的存储函数示例
    -- OpenGauss兼容的存储函数
    CREATE OR REPLACE FUNCTION InsertFromJSON(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    user_item JSON;
    product_item JSON;
    user_inserts INTEGER := 0;
    user_updates INTEGER := 0;
    product_inserts INTEGER := 0;
    product_updates INTEGER := 0;
    BEGIN
    -- OpenGauss支持json_array_elements
    FOR user_item IN
    SELECT * FROM json_array_elements(p_json_data::json->'users')
    LOOP
    -- OpenGauss支持ON CONFLICT(需要确认版本)
    INSERT INTO users (id, name, email, isActive, updated_at)
    VALUES (
    (user_item->>'id')::INTEGER,
    user_item->>'name',
    user_item->>'email',
    (user_item->>'isActive')::BOOLEAN,
    clock_timestamp()
    ) ON CONFLICT (id) DO UPDATE SET
    name = EXCLUDED.name,
    email = EXCLUDED.email,
    isActive = EXCLUDED.isActive,
    updated_at = clock_timestamp();

     -- 统计操作类型
     IF FOUND THEN
         user_inserts := user_inserts + 1;
     END IF;
    

    END LOOP;

    -- 类似处理产品数据
    FOR product_item IN
    SELECT * FROM json_array_elements(p_json_data::json->'products')
    LOOP
    INSERT INTO products (productId, productName, price, category, updated_at)
    VALUES (
    product_item->>'productId',
    product_item->>'productName',
    (product_item->>'price')::NUMERIC,
    product_item->>'category',
    clock_timestamp()
    ) ON CONFLICT (productId) DO UPDATE SET
    productName = EXCLUDED.productName,
    price = EXCLUDED.price,
    category = EXCLUDED.category,
    updated_at = clock_timestamp();

     IF FOUND THEN
         product_inserts := product_inserts + 1;
     END IF;
    

    END LOOP;

    RAISE NOTICE '处理完成: 用户操作%, 产品操作%', user_inserts, product_inserts;
    RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '操作失败: %', SQLERRM;
RETURN 0;
END;
$$;
3. OpenGauss版本检查
-- 检查OpenGauss版本和JSON支持
SELECT
version() as db_version,
(SELECT setting FROM pg_settings WHERE name = 'server_version') as server_version,
EXISTS(
SELECT 1 FROM pg_proc WHERE proname = 'json_array_elements'
) as has_json_array_elements,
EXISTS(
SELECT 1 FROM pg_type WHERE typname = 'jsonb'
) as has_jsonb_type;
4. OpenGauss批量处理优化版本
-- 更适合OpenGauss的批量处理版本
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
BEGIN
-- 批量插入用户数据(OpenGauss优化)
WITH user_data AS (
SELECT
(value->>'id')::INTEGER as id,
value->>'name' as name,
value->>'email' as email,
(value->>'isActive')::BOOLEAN as isActive
FROM json_array_elements(p_json_data::json->'users')
)
INSERT INTO users (id, name, email, isActive)
SELECT id, name, email, isActive FROM user_data
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email,
isActive = EXCLUDED.isActive,
updated_at = clock_timestamp();

-- 批量插入产品数据
WITH product_data AS (
    SELECT 
        value->>'productId' as productId,
        value->>'productName' as productName,
        (value->>'price')::NUMERIC as price,
        value->>'category' as category
    FROM json_array_elements(p_json_data::json->'products')
)
INSERT INTO products (productId, productName, price, category)
SELECT productId, productName, price, category FROM product_data
ON CONFLICT (productId) DO UPDATE SET
    productName = EXCLUDED.productName,
    price = EXCLUDED.price,
    category = EXCLUDED.category,
    updated_at = clock_timestamp();

RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '批量处理失败: %', SQLERRM;
RETURN 0;
END;
$$;
5. OpenGauss特有的JSON功能示例
-- OpenGauss增强的JSON处理
CREATE OR REPLACE FUNCTION AdvancedJSONProcessing(p_json_data TEXT)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
user_count INTEGER;
product_count INTEGER;
BEGIN
-- 使用OpenGauss的JSON函数
SELECT COUNT(*) INTO user_count
FROM json_array_elements(p_json_data::json->'users');

SELECT COUNT(*) INTO product_count 
FROM json_array_elements(p_json_data::json->'products');

RAISE NOTICE 'JSON数据统计: 用户%, 产品%', user_count, product_count;

-- 使用jsonb进行高效处理(如果数据量大)
PERFORM InsertFromJSONBatch(p_json_data);

RETURN 1;

END;
$$;
6. 兼容性检查脚本
-- 在OpenGauss中运行此脚本来检查JSON支持
DO $$
BEGIN
-- 检查关键JSON函数
IF EXISTS(SELECT 1 FROM pg_proc WHERE proname = 'json_array_elements') THEN
RAISE NOTICE '✅ json_array_elements 函数可用';
ELSE
RAISE NOTICE '❌ json_array_elements 函数不可用';
END IF;

IF EXISTS(SELECT 1 FROM pg_type WHERE typname = 'jsonb') THEN
    RAISE NOTICE '✅ jsonb 数据类型可用';
ELSE
    RAISE NOTICE '❌ jsonb 数据类型不可用';
END IF;

-- 测试ON CONFLICT语法
BEGIN
    CREATE TEMP TABLE test_conflict (id INT PRIMARY KEY, name TEXT);
    INSERT INTO test_conflict VALUES (1, 'test') 
    ON CONFLICT (id) DO UPDATE SET name = 'updated';
    RAISE NOTICE '✅ ON CONFLICT 语法支持';
EXCEPTION WHEN OTHERS THEN
    RAISE NOTICE '❌ ON CONFLICT 语法不支持';
END;

END
$$;
总结
OpenGauss通常支持这些JSON函数,但具体版本可能有差异:
OpenGauss 3.0+:基本都支持上述JSON函数
JSONB支持:OpenGauss有完整的jsonb类型支持
语法兼容:ON CONFLICT等PostgreSQL语法通常都支持
建议:在您的OpenGauss环境中先运行兼容性检查脚本,确认具体支持情况后再使用相应的函数。
我在pg12 使用你的存储过程
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');

SELECT COUNT(*) INTO total_products 
FROM json_array_elements(p_json_data::json->'products');

RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;

-- 分批插入用户数据
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
    INSERT INTO users (id, name, email, isActive)
    SELECT 
        (value->>'id')::INTEGER,
        value->>'name',
        value->>'email',
        (value->>'isActive')::BOOLEAN
    FROM (
        SELECT value, row_number() OVER () as rn
        FROM json_array_elements(p_json_data::json->'users')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);
    
    IF i % 10 = 0 THEN
        RAISE NOTICE '用户数据进度: %/%', 
            LEAST((i + 1) * p_batch_size, total_users), total_users;
    END IF;
END LOOP;

-- 分批UPSERT产品数据
FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT 
        value->>'productId',
        value->>'productName',
        (value->>'price')::DECIMAL,
        value->>'category',
        CURRENT_TIMESTAMP
    FROM (
        SELECT value, row_number() OVER () as rn
        FROM json_array_elements(p_json_data::json->'products')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
    ON CONFLICT (productId) 
    DO UPDATE SET
        productName = EXCLUDED.productName,
        price = EXCLUDED.price,
        category = EXCLUDED.category,
        updated_at = CURRENT_TIMESTAMP;
    
    IF i % 10 = 0 THEN
        RAISE NOTICE '产品数据进度: %/%', 
            LEAST((i + 1) * p_batch_size, total_products), total_products;
    END IF;
END LOOP;

RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$;

执行
-- 完整的测试代码(包含用户和产品表)
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();

-- 生成测试数据(3万条用户 + 1万条产品)
SELECT json_build_object(
    'users', (
        SELECT json_agg(json_build_object(
            'id', g, 
            'name', '用户' || g,
            'email', 'user' || g || '@example.com',
            'isActive', CASE WHEN g % 10 != 0 THEN true ELSE false END
        )) FROM generate_series(1, 30000) g
    ),
    'products', (
        SELECT json_agg(json_build_object(
            'productId', 'P' || lpad(g::text, 5, '0'),
            'productName', '产品' || g,
            'price', (random() * 10000)::numeric(10,2),
            'category', CASE 
                WHEN g % 4 = 0 THEN '电子产品'
                WHEN g % 4 = 1 THEN '家具'
                WHEN g % 4 = 2 THEN '服装'
                ELSE '食品'
            END
        )) FROM generate_series(1, 10000) g
    )
) INTO big_json;

result := InsertFromJSON(big_json);
end_time := clock_timestamp();

RAISE NOTICE '处理3万用户 + 1万产品记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;

-- 验证插入结果
RAISE NOTICE '用户表记录数: %', (SELECT count(*) FROM users);
RAISE NOTICE '产品表记录数: %', (SELECT count(*) FROM products);

END
$$;

-- 查看用户表数据
SELECT * FROM users;

-- 查看产品表数据
SELECT * FROM products;
都为空

问题在于存储函数名不匹配!您在测试中调用的是 InsertFromJSON(big_json),但创建的函数名是 InsertFromJSONBatch。
解决方案:
方案1:修改函数名为一致
-- 将函数名改为 InsertFromJSON
CREATE OR REPLACE FUNCTION InsertFromJSON(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
BEGIN
-- 获取总记录数
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');

SELECT COUNT(*) INTO total_products 
FROM json_array_elements(p_json_data::json->'products');

RAISE NOTICE '开始处理: 用户%, 产品%', total_users, total_products;

-- 清空表数据(如果需要)
-- DELETE FROM products;
-- DELETE FROM users;

-- 分批插入用户数据
FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
    INSERT INTO users (id, name, email, isActive)
    SELECT 
        (value->>'id')::INTEGER,
        value->>'name',
        value->>'email',
        (value->>'isActive')::BOOLEAN
    FROM (
        SELECT value, row_number() OVER () as rn
        FROM json_array_elements(p_json_data::json->'users')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);
    
    IF i % 10 = 0 THEN
        RAISE NOTICE '用户数据进度: %/%', 
            LEAST((i + 1) * p_batch_size, total_users), total_users;
    END IF;
END LOOP;

-- 分批UPSERT产品数据
FOR i IN 0..((total_products - 1) / p_batch_size) LOOP
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT 
        value->>'productId',
        value->>'productName',
        (value->>'price')::DECIMAL,
        value->>'category',
        CURRENT_TIMESTAMP
    FROM (
        SELECT value, row_number() OVER () as rn
        FROM json_array_elements(p_json_data::json->'products')
    ) t
    WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size)
    ON CONFLICT (productId) 
    DO UPDATE SET
        productName = EXCLUDED.productName,
        price = EXCLUDED.price,
        category = EXCLUDED.category,
        updated_at = CURRENT_TIMESTAMP;
    
    IF i % 10 = 0 THEN
        RAISE NOTICE '产品数据进度: %/%', 
            LEAST((i + 1) * p_batch_size, total_products), total_products;
    END IF;
END LOOP;

RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %', SQLERRM;
RETURN 0;
END;
$$;
方案2:修改测试代码调用正确的函数名
-- 修改测试代码,调用正确的函数名
DO $$
DECLARE
start_time TIMESTAMP;
end_time TIMESTAMP;
result INTEGER;
big_json TEXT;
BEGIN
start_time := clock_timestamp();

-- 生成测试数据(3万条用户 + 1万条产品)
SELECT json_build_object(
    'users', (
        SELECT json_agg(json_build_object(
            'id', g, 
            'name', '用户' || g,
            'email', 'user' || g || '@example.com',
            'isActive', CASE WHEN g % 10 != 0 THEN true ELSE false END
        )) FROM generate_series(1, 30000) g
    ),
    'products', (
        SELECT json_agg(json_build_object(
            'productId', 'P' || lpad(g::text, 5, '0'),
            'productName', '产品' || g,
            'price', (random() * 10000)::numeric(10,2),
            'category', CASE 
                WHEN g % 4 = 0 THEN '电子产品'
                WHEN g % 4 = 1 THEN '家具'
                WHEN g % 4 = 2 THEN '服装'
                ELSE '食品'
            END
        )) FROM generate_series(1, 10000) g
    )
) INTO big_json;

-- 这里改为调用 InsertFromJSONBatch
result := InsertFromJSONBatch(big_json, 1000);  -- 添加批次大小参数
end_time := clock_timestamp();

RAISE NOTICE '处理3万用户 + 1万产品记录耗时: %', end_time - start_time;
RAISE NOTICE '执行结果: %', result;

-- 验证插入结果
RAISE NOTICE '用户表记录数: %', (SELECT count(*) FROM users);
RAISE NOTICE '产品表记录数: %', (SELECT count(*) FROM products);

END
$$;
调试步骤:

  1. 首先检查函数是否存在
    -- 检查函数是否存在
    SELECT proname, proargtypes, prosrc
    FROM pg_proc
    WHERE proname LIKE '%insertfromjson%';

-- 或者查看所有相关函数
\df insertfromjson
2. 简化测试(先测试小数据量)
-- 先测试小数据量,确保基本功能正常
DO $$
DECLARE
small_json TEXT;
result INTEGER;
BEGIN
small_json := '{
"users": [
{"id": 1, "name": "测试用户1", "email": "test1@example.com", "isActive": true},
{"id": 2, "name": "测试用户2", "email": "test2@example.com", "isActive": false}
],
"products": [
{"productId": "P00001", "productName": "测试产品1", "price": 100.50, "category": "电子产品"},
{"productId": "P00002", "productName": "测试产品2", "price": 200.00, "category": "家具"}
]
}';

result := InsertFromJSONBatch(small_json, 1000);
RAISE NOTICE '小数据测试结果: %', result;

-- 立即查看数据
RAISE NOTICE '用户表: %', (SELECT count(*) FROM users);
RAISE NOTICE '产品表: %', (SELECT count(*) FROM products);

END
$$;
3. 添加详细的调试信息
-- 创建带详细调试的函数版本
CREATE OR REPLACE FUNCTION InsertFromJSONBatch(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT 1000
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
total_users INTEGER;
total_products INTEGER;
i INTEGER;
user_records JSON;
product_records JSON;
BEGIN
RAISE NOTICE '开始执行存储过程';

-- 检查JSON结构
RAISE NOTICE 'JSON数据前500字符: %', substring(p_json_data from 1 for 500);

-- 检查users字段是否存在
IF (p_json_data::json->'users') IS NULL THEN
    RAISE NOTICE '错误: JSON中缺少users字段';
    RETURN 0;
END IF;

-- 检查products字段是否存在
IF (p_json_data::json->'products') IS NULL THEN
    RAISE NOTICE '错误: JSON中缺少products字段';
    RETURN 0;
END IF;

-- 获取总记录数
SELECT COUNT(*) INTO total_users 
FROM json_array_elements(p_json_data::json->'users');

SELECT COUNT(*) INTO total_products 
FROM json_array_elements(p_json_data::json->'products');

RAISE NOTICE '解析结果: 用户%, 产品%', total_users, total_products;

-- 如果记录数为0,直接返回
IF total_users = 0 AND total_products = 0 THEN
    RAISE NOTICE '警告: 没有找到要处理的数据';
    RETURN 1;
END IF;

-- 分批处理用户数据
IF total_users > 0 THEN
    FOR i IN 0..((total_users - 1) / p_batch_size) LOOP
        INSERT INTO users (id, name, email, isActive)
        SELECT 
            (value->>'id')::INTEGER,
            value->>'name',
            value->>'email',
            (value->>'isActive')::BOOLEAN
        FROM (
            SELECT value, row_number() OVER () as rn
            FROM json_array_elements(p_json_data::json->'users')
        ) t
        WHERE rn BETWEEN (i * p_batch_size + 1) AND ((i + 1) * p_batch_size);
        
        RAISE NOTICE '用户批次 % 完成, 影响行数: %', i + 1, GET DIAGNOSTICS;
    END LOOP;
END IF;

-- 类似处理产品数据...

RETURN 1;

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '处理失败: %, SQLSTATE: %', SQLERRM, SQLSTATE;
RETURN 0;
END;
$$;
4. 逐步执行排查
-- 逐步测试每个环节
DO $$
DECLARE
test_json TEXT;
user_count INTEGER;
product_count INTEGER;
BEGIN
test_json := '{
"users": [{"id": 1, "name": "test", "email": "test@test.com", "isActive": true}],
"products": [{"productId": "P001", "productName": "test", "price": 100, "category": "test"}]
}';

-- 测试JSON解析
SELECT COUNT(*) INTO user_count 
FROM json_array_elements(test_json::json->'users');

SELECT COUNT(*) INTO product_count 
FROM json_array_elements(test_json::json->'products');

RAISE NOTICE '解析测试: 用户%, 产品%', user_count, product_count;

-- 测试单条插入
INSERT INTO users (id, name, email, isActive) 
VALUES (999, '测试', 'test@test.com', true);

RAISE NOTICE '单条插入测试完成';

END
$$;
请先尝试方案2(修改测试代码调用正确的函数名),如果还有问题,运行调试步骤来定位具体问题。
一般情况下 需要在存储过程中控制每次插入数据量的批次吗
这是一个很好的问题!一般情况下,确实需要在存储过程中控制批次插入,但具体是否必要取决于多个因素。

  1. 需要批次控制的情况
    大数据量场景(推荐批次控制)
    -- 当单次处理数据量较大时(经验值):
  • 1万条记录以上 ✅ 推荐分批
  • 单个JSON超过10MB ✅ 推荐分批
  • 涉及复杂计算或关联 ✅ 推荐分批
    长事务风险场景
    -- 以下情况需要批次控制:
  • 业务高峰期执行
  • 系统资源紧张时
  • 需要最小化锁等待时间
  • 要求可中断的长时间操作
  1. 不需要批次控制的简单场景
    小数据量场景
    -- 以下情况可以直接单次插入:
  • 几百到几千条记录

  • JSON数据小于1MB

  • 非关键业务时段

  • 系统资源充足
    简单版本(无批次控制)
    CREATE OR REPLACE FUNCTION InsertFromJSONSimple(p_json_data TEXT)
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    BEGIN
    -- 用户表:UPSERT操作
    INSERT INTO users (id, name, email, isActive, updated_at)
    SELECT
    (value->>'id')::INTEGER,
    value->>'name',
    value->>'email',
    (value->>'isActive')::BOOLEAN,
    CURRENT_TIMESTAMP
    FROM json_array_elements(p_json_data::json->'users')
    ON CONFLICT (id) DO UPDATE SET
    name = EXCLUDED.name,
    email = EXCLUDED.email,
    isActive = EXCLUDED.isActive,
    updated_at = CURRENT_TIMESTAMP;

    -- 产品表:UPSERT操作
    INSERT INTO products (productId, productName, price, category, updated_at)
    SELECT
    value->>'productId',
    value->>'productName',
    (value->>'price')::DECIMAL,
    value->>'category',
    CURRENT_TIMESTAMP
    FROM json_array_elements(p_json_data::json->'products')
    ON CONFLICT (productId) DO UPDATE SET
    productName = EXCLUDED.productName,
    price = EXCLUDED.price,
    category = EXCLUDED.category,
    updated_at = CURRENT_TIMESTAMP;

    RETURN 1;
    EXCEPTION
    WHEN OTHERS THEN
    RAISE NOTICE '操作失败: %', SQLERRM;
    RETURN 0;
    END;
    $$;

  1. 批次大小的经验值
    数据量
    推荐批次大小
    适用场景
    1,000条以下
    一次性处理
    小数据量,简单操作
    1,000-10,000条
    500-1,000条/批
    一般业务数据
    10,000-100,000条
    1,000-2,000条/批
    大数据量同步
    100,000条以上
    2,000-5,000条/批
    数据迁移、ETL

  2. 智能批次控制方案
    CREATE OR REPLACE FUNCTION InsertFromJSONSmart(
    p_json_data TEXT,
    p_auto_batch BOOLEAN DEFAULT true -- 是否自动判断是否需要分批
    )
    RETURNS INTEGER
    LANGUAGE plpgsql
    AS $$
    DECLARE
    total_records INTEGER;
    recommended_batch_size INTEGER;
    json_size INTEGER;
    BEGIN
    -- 估算总记录数和JSON大小
    SELECT
    COALESCE(json_array_length(p_json_data::json->'users'), 0) +
    COALESCE(json_array_length(p_json_data::json->'products'), 0)
    INTO total_records;

    json_size := octet_length(p_json_data);

    -- 智能判断批次大小
    IF NOT p_auto_batch THEN
    -- 不使用分批,直接处理
    RETURN InsertFromJSONSimple(p_json_data);
    END IF;

    -- 根据数据量推荐批次大小
    IF total_records <= 1000 OR json_size <= 1048576 THEN -- 1MB
    -- 小数据量,直接处理
    RETURN InsertFromJSONSimple(p_json_data);
    ELSIF total_records <= 10000 THEN
    recommended_batch_size := 1000;
    ELSIF total_records <= 50000 THEN
    recommended_batch_size := 2000;
    ELSE
    recommended_batch_size := 5000;
    END IF;

    RAISE NOTICE '智能批次控制: 总记录%, JSON大小%, 推荐批次%',
    total_records, pg_size_pretty(json_size::bigint), recommended_batch_size;

    -- 调用分批版本
    RETURN InsertFromJSONBatch(p_json_data, recommended_batch_size);

EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE '智能处理失败,尝试直接处理: %', SQLERRM;
-- 失败时回退到简单版本
RETURN InsertFromJSONSimple(p_json_data);
END;
$$;
5. 实际项目中的建议
推荐方案:参数化批次控制
-- 最实用的版本:提供参数,默认智能判断
CREATE OR REPLACE FUNCTION InsertFromJSON(
p_json_data TEXT,
p_batch_size INTEGER DEFAULT NULL -- NULL表示自动判断
)
RETURNS INTEGER
LANGUAGE plpgsql
AS $$
DECLARE
actual_batch_size INTEGER;
total_users INTEGER;
BEGIN
-- 确定实际批次大小
IF p_batch_size IS NOT NULL AND p_batch_size > 0 THEN
actual_batch_size := p_batch_size;
ELSE
-- 自动判断:小数据直接处理,大数据分批
SELECT COUNT(*) INTO total_users
FROM json_array_elements(p_json_data::json->'users');

    IF total_users <= 5000 THEN
        -- 小数据量,直接处理(更高效)
        RETURN InsertFromJSONSimple(p_json_data);
    ELSE
        actual_batch_size := 1000;  -- 默认批次大小
    END IF;
END IF;

-- 使用分批处理
RETURN InsertFromJSONBatch(p_json_data, actual_batch_size);

END;
$$;
6. 性能对比考虑
-- 批次控制 vs 单次插入的性能权衡:

-- ✅ 批次控制的优点:
-- 1. 减少长事务锁等待
-- 2. 内存使用更平稳
-- 3. 可以显示进度
-- 4. 错误时损失更小

-- ❌ 批次控制的缺点:
-- 1. 代码更复杂
-- 2. 小数据量时性能稍差
-- 3. 需要更多的事务控制
7. 我的最终建议
对于您的场景(2-3万条记录),我推荐使用智能批次控制:
-- 创建三个层次的函数:
-- 1. 简单版本(小数据量)
-- 2. 分批版本(大数据量)
-- 3. 智能入口函数(自动选择)

-- 这样既保证性能,又保持代码简洁
总结:对于2-3万条记录,使用批次控制(1000-2000条/批)是比较稳妥的选择,但也要提供直接处理的简单版本作为备选。

posted @ 2025-12-04 21:35  camryriverstar  阅读(0)  评论(0)    收藏  举报