数学建模习题9.5

`import pandas as pd
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

列名列表

column_names = ["城市1", "城市2", "城市3", "城市4"]

生成因子组合

factor_combinations = [
f"位置{i+1}广告形式行+1高档" for i in range(3) for _ in range(2)
] + [
f"位置{i+1}广告形式行{i+1}低档" for i in range(3) for _ in range(2)
]

读取Excel文件

df_raw = pd.read_excel('9.5.xlsx', header=None, names=column_names, index_col=0)

设置索引

df_raw.index = factor_combinations

提取位置因子

location_factor = [int(x.split('位置')[1].split('广告形式')[0].strip()) for x in df_raw.index if '位置' in x]

提取广告形式因子

ad_factor = []
for x in df_raw.index:
parts = x.split('广告形式')
if len(parts) > 1:
row_part = parts[1].split('高档')[0].split('低档')[0].strip()
if '行+1' in row_part:
ad_factor.append(1) # 假设'行+1'对应1
elif '行' in row_part:
ad_factor.append(int(row_part.replace('行', '').strip())) # 移除'行'并转换为整数
else:
ad_factor.append(0) # 如果没有'广告形式'，则默认为0

提取装饰档次因子

decoration_factor = ['高档' if '高档' in x else '低档' for x in df_raw.index]

确保所有数组长度相同

assert len(location_factor) == len(ad_factor) == len(decoration_factor) == len(df_raw.values.flatten()), "Arrays are not of the same length"

准备数据

data = {
'销售量': df_raw.values.flatten(),
'位置': location_factor,
'广告形式': ad_factor,
'装饰档次': decoration_factor
}

创建DataFrame

df = pd.DataFrame(data)

创建城市因子

city_factor = [f'城市{i+1}' for i in range(4)] * (len(location_factor) // 4)
assert len(city_factor) == len(df), "City factor array is not of the same length as other arrays"
df['城市'] = city_factor

建立模型并进行方差分析

model = ols('销售量 ~ C(位置) + C(广告形式) + C(装饰档次) + C(城市)', data=df).fit()
anova_table = anova_lm(model, typ=2)
print(anova_table)

找出显著因素

significant_factors = anova_table[anova_table['PR(>F)'] < 0.05]['source']
print("在显著水平0.05下，以下因素对销售量有显著差异：")
print(significant_factors)
print("学号：05")`

posted on 2024-12-20 09:31 VVV1 阅读(21) 评论(0) 收藏举报