# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import PolynomialFeatures
# from sklearn.linear_model import LinearRegression
# import matplotlib.pyplot as plt
# # Read the data from the Excel file
# data = pd.read_excel(r'E:\\孙晓宇\\测试钞A1~E7共62开\\测试钞_磁扫特征统计结果\\combine.xlsx')
# # print(data.iloc[0])
# # Define the dependent variable
# y= data['平均值']
# # Define the independent variables
# X = data[['油墨类型','BLOCK_width', 'width_ratio', 'BLOCK_depth','BLOCK_carval','BLOCK_angle','类型']]
# # generate polynomial features of degree 2
# poly = PolynomialFeatures(degree=2, include_bias=True)
# X_poly = poly.fit_transform(X)
# # fit a linear regression model on the polynomial features
# model = LinearRegression().fit(X_poly, y)
# # use the model to make predictions for the new set of independent variables
# predictions = model.predict(X_poly)
# # print the predicted values
# print(predictions)
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# Read the data from the Excel file
data = pd.read_excel(r'E:\\孙晓宇\\测试钞A1~E7共62开\\测试钞_磁扫特征统计结果\\combine.xlsx')
# print(data.iloc[0])
# Define the dependent variable
y= data['标准差']
# Define the independent variables
X = data[['油墨类型','BLOCK_width', 'width_ratio', 'BLOCK_depth','BLOCK_carval','BLOCK_angle','类型']]
# generate polynomial features of degree 2
poly = PolynomialFeatures(degree=10, include_bias=True)
X_poly = poly.fit_transform(X)
# fit a linear regression model on the polynomial features
model = LinearRegression().fit(X_poly, y)
# use the model to make predictions for the new set of independent variables
predictions = model.predict(X_poly)
# print the predicted values
print(predictions)
# calculate the residual errors
residuals = y - predictions
# plot the predicted values against the actual values
plt.scatter(y, predictions)
# plot a horizontal line at y=0 to show the line of perfect prediction
plt.plot([min(y), max(y)], [min(y), max(y)], 'k--', lw=2)
# plot the residual errors as a scatter plot
plt.scatter(y, residuals, c='r', s=10, alpha=0.5)
# add axis labels and a title
plt.xlabel('Actual values')
plt.ylabel('Predicted values')
plt.title('Prediction error')
# show the plot
plt.show()