Numpy随机数据生成

基本随机数生成

1. 均匀分布随机数

import numpy as np

# 生成[0,1)之间的随机数
print(np.random.random())        # 单个随机数: 0.374
print(np.random.random(5))       # 一维数组: [0.95  0.73  0.59  0.156 0.058]
print(np.random.random((2,3)))   # 二维数组: 2×3矩阵

# 指定范围的随机数
print(np.random.uniform(1, 10))     # [1,10)之间: 7.2
print(np.random.uniform(1, 10, 5))  # 生成5个数

2. 整数随机数

import numpy as np

# randint(low, high, size)
print(np.random.randint(1, 11))      # [1,11)之间整数: 7
print(np.random.randint(1, 11, 5))   # 5个随机整数
print(np.random.randint(1, 11, (2,3))) # 2×3矩阵

常见分布

1. 正态分布（高斯分布）

import numpy as np

# normal(均值, 标准差, 形状)
print(np.random.normal())           # 标准正态分布 N(0,1)
print(np.random.normal(5, 2))       # 均值5，标准差2
print(np.random.normal(0, 1, 100))  # 100个标准正态分布数


# randn 生成标准正态分布（均值为0，标准差为1）随机数的函数
print(np.random.randn())        # 输出: -0.234
print(np.random.randn(5))       # 输出: [ 0.123 -1.456  0.789 -0.234  1.567]
print(np.random.randn(2, 3))    # 输出: 2×3矩阵
print(np.random.randn(2, 2, 3)) # 输出: 2×2×3张量

2. 其他分布

import numpy as np

# 指数分布
print(np.random.exponential(2.0, 5))  # λ=2.0的指数分布

# 泊松分布  
print(np.random.poisson(3, 5))       # λ=3的泊松分布

# 二项分布
print(np.random.binomial(10, 0.5, 5)) # n=10, p=0.5的二项分布

数组操作

1. 随机选择

import numpy as np

# 从数组中随机选择
arr = np.array([1, 2, 3, 4, 5])
print(np.random.choice(arr))         # 随机选1个: 3
print(np.random.choice(arr, 3))      # 随机选3个(可重复)
print(np.random.choice(arr, 3, replace=False))  # 不重复选3个

# 带权重选择
weights = [0.1, 0.2, 0.3, 0.2, 0.2]
print(np.random.choice(arr, 3, p=weights))

2. 随机排列

import numpy as np

# 打乱数组
arr = np.array([1, 2, 3, 4, 5])
np.random.shuffle(arr)    # 原地打乱
print(arr)                # [3 1 5 2 4]

# 返回打乱后的副本
arr = np.array([1, 2, 3, 4, 5])
shuffled = np.random.permutation(arr)
print(shuffled)           # [2 4 1 5 3]
print(arr)               # 原数组不变: [1 2 3 4 5]

设置随机种子

import numpy as np

# 设置随机种子，确保结果可重现
np.random.seed(42)
print(np.random.random(3))  # 每次运行结果相同

# 使用Generator（推荐的新方法）
rng = np.random.default_rng(42)
print(rng.random(3))        # 使用生成器

实际应用示例

1. 模拟数据生成

import numpy as np
import matplotlib.pyplot as plt

# 生成模拟数据
np.random.seed(42)
n_samples = 1000

# 身高数据（正态分布）
heights = np.random.normal(170, 10, n_samples)  # 均值170cm，标准差10cm

# 年龄数据（均匀分布）  
ages = np.random.randint(18, 65, n_samples)

# 收入数据（对数正态分布）
incomes = np.random.lognormal(10, 0.5, n_samples)

print(f"平均身高: {heights.mean():.1f}cm")
print(f"平均年龄: {ages.mean():.1f}岁") 
print(f"平均收入: {incomes.mean():.0f}")

2. 随机采样

import numpy as np

# 从大数据集中随机采样
data = np.arange(10000)  # 模拟大数据集
sample_indices = np.random.choice(len(data), 100, replace=False)
sample = data[sample_indices]
print(f"采样了{len(sample)}个数据点")

3. 蒙特卡洛模拟

import numpy as np

# 估算π值
def estimate_pi(n_points=1000000):
    # 在单位正方形内随机生成点
    x = np.random.uniform(-1, 1, n_points)
    y = np.random.uniform(-1, 1, n_points)
    
    # 计算在单位圆内的点数
    inside_circle = (x**2 + y**2) <= 1
    pi_estimate = 4 * np.sum(inside_circle) / n_points
    
    return pi_estimate

np.random.seed(42)
pi_est = estimate_pi(1000000)
print(f"π的估计值: {pi_est:.4f}")
print(f"实际π值: {np.pi:.4f}")
print(f"误差: {abs(pi_est - np.pi):.4f}")

4. 数据增强

import numpy as np

# 为机器学习添加噪声
def add_noise(data, noise_level=0.1):
    noise = np.random.normal(0, noise_level, data.shape)
    return data + noise

# 原始数据
original_data = np.array([1, 2, 3, 4, 5])
noisy_data = add_noise(original_data, 0.1)
print("原始数据:", original_data)
print("加噪声后:", noisy_data)

常用函数总结

import numpy as np

# 均匀分布
np.random.random(size)          # [0,1)均匀分布
np.random.uniform(low, high)    # [low,high)均匀分布
np.random.randint(low, high)    # 整数均匀分布

# 正态分布
np.random.normal(mean, std)     # 正态分布
np.random.standard_normal()     # 标准正态分布

# 选择和排列
np.random.choice(array)         # 随机选择
np.random.shuffle(array)        # 原地打乱
np.random.permutation(array)    # 返回打乱的副本

# 设置种子
np.random.seed(seed)           # 设置随机种子
rng = np.random.default_rng()  # 创建生成器（推荐）

posted @ 2025-09-21 10:26 栗悟饭与龟功気波阅读(33) 评论(0) 收藏举报

刷新页面返回顶部

Numpy随机数据生成

Numpy随机数据生成

基本随机数生成

1. 均匀分布随机数

2. 整数随机数

常见分布

1. 正态分布（高斯分布）

2. 其他分布

数组操作

1. 随机选择

2. 随机排列

设置随机种子

实际应用示例

1. 模拟数据生成

2. 随机采样

3. 蒙特卡洛模拟

4. 数据增强

常用函数总结

公告