点击查看代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
点击查看代码
def systematic_train_test_split(X, y, test_size=0.2, random_state=None):
"""
按 y 排序后系统抽样划分训练集和测试集
参数:
X (np.ndarray): 特征矩阵
y (np.ndarray): 目标值
test_size (float): 测试集比例(默认0.2)
random_seed (int): 随机种子(用于起始点随机化)
返回:
X_train, X_test, y_train, y_test: 划分后的数据
"""
# 按 y 值排序并获取索引
sorted_indices = np.argsort(y)
X_sorted = X[sorted_indices]
y_sorted = y[sorted_indices]
# 计算系统抽样间隔和起始点
n_samples = len(y)
n_test = int(n_samples * test_size)
k = n_samples // n_test # 抽样间隔
# 随机选择起始点(若需可重复性,设置 random_seed)
if random_state is not None:
np.random.seed(random_state)
start = np.random.randint(0, k)
# 生成测试集索引
test_indices = np.arange(start, n_samples, k)
# 确保测试集数量不超过 n_test(避免浮点数计算误差)
test_indices = test_indices[:n_test]
# 生成训练集索引(非测试集的样本)
mask = np.ones(n_samples, dtype=bool)
mask[test_indices] = False
train_indices = np.where(mask)[0]
# 划分数据
X_train, X_test = X_sorted[train_indices], X_sorted[test_indices]
y_train, y_test = y_sorted[train_indices], y_sorted[test_indices]
return X_train, X_test, y_train, y_test
def evaluation(y_true, y_pred): # 输出R2, R, RMSE, Bias
from sklearn.metrics import mean_squared_error
# 计算 R²
def calculate_r2(y_true, y_pred):
ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
return r2
# 计算 Pearson 相关系数 R
def calculate_r(y_true, y_pred):
correlation_matrix = np.corrcoef(y_true, y_pred)
return correlation_matrix[0, 1]
# 计算 RMSE
def calculate_rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))
# 计算 Bias
def calculate_bias(y_true, y_pred):
return np.mean(y_pred - y_true)
return calculate_r2(y_true, y_pred), calculate_r(y_true, y_pred), calculate_rmse(y_true, y_pred), calculate_bias(y_true, y_pred)
#-------------------------------------------------机器学习模型训练部分-------------------------------------------------------#
sldf_full = pd.read_csv(sldf_path)
rho_list_full = sldf_full[:, 4:11].values.astype("float32")
X_input = np.column_stack(rho_list_full)
print("X的形状为:", X_input.shape)
X_train, X_test, y_train, y_test = systematic_train_test_split(X_input, theta_full, test_size=0.2, random_state=42)
# 线性回归模型训练
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
R2_train, R_train, RMSE_train, Bias_train = evaluation(y_train, y_train_pred)
R2_test, R_test, RMSE_test, Bias_test = evaluation(y_test, y_test_pred)
# 绘图评估
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_test_pred, color='red', label='Test',
edgecolors='black', alpha=0.6, s=60)# 绘制测试集
plt.scatter(y_train, y_train_pred, color='black', label='Train',
edgecolors='black', alpha=0.5, s=40)# 绘制训练集
all_values = np.concatenate([y_train, y_test, y_train_pred, y_test_pred]) # 绘制理想拟合线 (y=x)
min_val = all_values.min()
max_val = all_values.max()
plt.plot([min_val, max_val], [min_val, max_val],
color='black', linestyle='--', linewidth=2,
label='Ideal fit')
plt.xlabel('Measured', fontsize=12)
plt.ylabel('Predicted', fontsize=12)
plt.title('Model Performance', fontsize=14)
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
textstr = f'R²_test = {R2_test:.3f}\nRMSE_test = {RMSE_test:.3f}\nR²_train = {R2_train:.3f}\nRMSE_train = {RMSE_train:.3f}'
props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
plt.text(0.05, 0.95, textstr, transform=plt.gca().transAxes,
fontsize=10, verticalalignment='top', bbox=props)
plt.tight_layout()
plt.show()
# plt.savefig(os.path.join(output_dir, f'title.png'), bbox_inches='tight')
#-------------------------------------------------遥感图像应用部分-----------------------------------------------------------#
def imageshow(image, title):
plt.figure(figsize=(15, 8))
plt.imshow(image, cmap='viridis')
plt.colorbar()
plt.title(title)
plt.show()
sr_o_image = "七波段卫星图像"
sr_e_image = "掩膜文件"
output_srre_image_dir = "输出"
output_smc_image_dir = "输出"
with rasterio.open(sr_e_image, 'r') as ds:
sre_data = ds.read(1)
mask = np.isfinite(sre_data)
with rasterio.open(sr_o_image, 'r') as ds:
bands = ds.read(range(1,8)).astype(np.float32) # (7, 4110, 11661)
height, width = ds.height, ds.width
bands = np.clip(bands, 0, 1.0)
bands_masked = np.where(mask, bands, 0.0)
X_2d = bands_masked.reshape(7,-1)
X_2d = X_2d.T # 转置后形状: (n_samples, n_features)
pred = model.predict(X_2d)
pred_back = pred.T.reshape(1, height, width)
pred_back = np.clip(pred_back, 0, 0.5) #显示值范围
print(f"转回来的形状: {pred_back.shape}") # (7, 4110, 11661)
imageshow(X_back[0], "title")