X_global = df_diff_cleaned[feature_cols].values
    y_global = df_diff_cleaned[target_col].values
    # try:
    model_global = LinearRegression(fit_intercept=False).fit(X_global, y_global)
    # 改进的基础模型
    param_dist = {
        'hidden_layer_sizes': [
            (8, 4), (12, 8), (16, 5),
            (16,), (8,)  # 不同比例的两层配置
        ],
        'activation': ['tanh'],
        'learning_rate_init': loguniform(5e-4, 5e-2),  # 学习率范围调整
        'alpha': loguniform(1e-4, 5e-1),  # 正则化强度增大
        'max_iter': [300, 500, 800],  # 减少最大迭代次数
        'validation_fraction': [0.2],  # 增加验证集比例
    }
    model_nn = MLPRegressor(
        verbose=False,
        random_state=42,
        # 小数据下关闭自适应学习率(避免不稳定)
    )
    loo = LeaveOneOut()
    # 只做一次随机搜索
    random_search = RandomizedSearchCV(
        estimator=model_nn,
        param_distributions=param_dist,
        n_iter=2,  # 可以考虑适当减少
        cv=loo,
        scoring='neg_mean_squared_error',
        n_jobs=-1,
        verbose=1,
        random_state=42
    )

    # 使用全部数据进行超参数搜索
    random_search.fit(X_global, y_global)
    best_model = random_search.best_estimator_
    coef_df = create_coefficient_csv_with_ratios(
        base_models1, feature_cols, output_path, df,
        base_recipe_to_group, base_high_overlap,model_global,model_scaler  # 新增的overlap数据参数
    )