1 #!/usr/bin/env python3
2 #-*- coding:utf-8 -*-
3 #############################################
4 #File Name: ci_ex1.py
5 #Brief: 示例1
6 #Author: frank
7 #Email: frank0903@aliyun.com
8 #Created Time:2018-08-13 19:33:11
9 #Blog: http://www.cnblogs.com/black-mamba
10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
11 #############################################
12 import matplotlib.pyplot as plt
13 import numpy as np
14 import scipy.stats
15 import random
16
17 #随机抽取36个苹果,计算200,000苹果的重量在100到124g范围的概率
18 def sampling_distribution(data,confidence=0.95,sampling_times=1,sampling_size=30):
19 list_sampling_mean = []
20
21 for i in np.arange(sampling_times):
22 #samples = np.random.choice(data,sampling_size)#重复抽样
23 samples = random.sample(list(data),sampling_size)#不重复抽样
24 #print("samples:{}".format(samples))
25 samples_mean = np.mean(samples)
26 list_sampling_mean.append(samples_mean)
27
28
29 #the sampling distribution of the sampling mean,样本均值的抽样分布
30 sam_mean = np.mean(list_sampling_mean)
31 print("样本均值分布的期望,sam_mean:{}".format(sam_mean))
32 sam_std = np.std(list_sampling_mean)
33 print("样本均值分布的标准差,sam_std:{}".format(sam_std))
34
35 #获取样本均值分布~N(sam_mean, sam_std^2)
36 norm = scipy.stats.norm(loc=sam_mean, scale=sam_std)
37
38 #获取距离sam_mean 4个sam_std的范围
39 x = np.arange(sam_mean-sam_std*4, sam_mean+sam_std*4, 1)
40 #获取x对应的概率密度函数值
41 y = norm.pdf(x)
42 #显示样本均值分布
43 plt.plot(x, y,'b--',alpha=0.7,label='sample distribution of the sample mean')
44
45 #获取置信区间,方法1
46 confidence *= 100
47 c_low = (100 - confidence) / 2
48 c_high = 100 - c_low
49 c_interval = np.percentile(list_sampling_mean,[c_low, c_high])
50 print("95%置信区间端点对应的x坐标:{}".format(c_interval))
51
52
53 #获取置信区间,方法2
54 c_interval1 = [norm.isf(1-0.025),norm.isf(1-0.975)]
55 print("95%置信区间端点对应的x坐标:{}".format(c_interval1))
56 print("[{},{}]".format(sam_mean-sam_std*1.8, sam_mean+sam_std*1.8))
57
58 #绘制置信区间对应的竖线
59 a = c_interval[0]
60 b = c_interval[1]
61 plt.vlines(a, 0, norm.pdf(a),'r')
62 plt.vlines(b, 0, norm.pdf(b),'r')
63
64 a1 = c_interval1[0]
65 b1 = c_interval1[1]
66 plt.vlines(a1, 0, norm.pdf(a1),'g',alpha=0.5)
67 plt.vlines(b1, 0, norm.pdf(b1),'g',alpha=0.5)
68
69 fill_x = np.linspace(a1,b1,100)
70 #print("type of fill_x:{}".format(type(fill_x)))
71 fill_y = norm.pdf(fill_x)
72 #print("type of fill_y:{}".format(type(fill_y)))
73 plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
74
75
76 #模拟总体200,000个苹果的重量数据
77 #用不同的数据测试
78 #data1 = np.random.randint(10, 200, size=200000)
79 data1 = np.random.randint(50, 200, size=200000)
80
81 #总体均值
82 population_mean = np.mean(data1)
83 print("population mean:{}".format(population_mean))
84
85 #总体标准差
86 population_std = np.std(data1)
87 print("population std:{}".format(population_std))
88
89 #总体分布曲线
90 norm1 = scipy.stats.norm(population_mean, population_std)
91 x = np.arange(population_mean-population_std*3.5, population_mean+population_std*3.5, 1)
92 y = norm1.pdf(x)
93 plt.plot(x, y,'r--',label='population distribution',alpha=0.7)
94
95
96 sampling_distribution(data1,sampling_times=1000,sampling_size=36)
97
98 plt.legend()
99 plt.show()
ci_ex1.py 演示了小马哥课堂-统计学-置信区间 的示例1
![]()
![]()
1 #!/usr/bin/env python3
2 #-*- coding:utf-8 -*-
3 #############################################
4 #File Name: ci_ex2.py
5 #Brief: 演示示例2
6 #Author: frank
7 #Email: frank0903@aliyun.com
8 #Created Time:2018-08-13 20:14:44
9 #Blog: http://www.cnblogs.com/black-mamba
10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
11 #############################################
12 import numpy as np
13 import matplotlib.pyplot as plt
14 import matplotlib as mpl
15 import scipy.stats
16
17 #设置中文字体
18 zhfont = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
19
20 #95%置信水平的标准正态分布的置信区间演示
21 norm = scipy.stats.norm()
22 #x轴
23 x = np.linspace(norm.ppf(0.001),norm.ppf(0.999), 10000)
24 #y轴
25 y= norm.pdf(x)
26 #显示标准正态分布曲线
27 plt.plot(x,y)
28
29 #画置信区间的界限
30 plt.vlines(-1.96,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed')
31 plt.vlines(1.96,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed')
32
33 #显示 置信区间
34 fill_x = np.linspace(-1.96,1.96,100)
35 #print("type of fill_x:{}".format(type(fill_x)))
36 fill_y = norm.pdf(fill_x)
37 #print("type of fill_y:{}".format(type(fill_y)))
38 plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
39
40 #已知置信水平,求置信区间
41
42 #方法1:不太准确
43 confidence = 95
44 c_low = (100-confidence)/2
45 c_high = 100-c_low
46 c_interval = np.percentile(x,[c_low, c_high])
47 print(c_interval)
48 plt.vlines(c_interval[0] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
49 plt.vlines(c_interval[1] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
50
51 #方法2:更好的方式
52 c_interval = [norm.isf(1-0.025),norm.isf(1-0.975)]
53 print(c_interval)
54 plt.vlines(c_interval[0] ,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed',alpha=0.5)
55 plt.vlines(c_interval[1] ,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed',alpha=0.5)
56
57 plt.title('标准正态分布的95%置信区间',fontproperties=zhfont)
58 plt.savefig("ci_ex2.jpg")
59 #print("pdf(1.96):{}".format(norm.pdf(1.96)))
60 #print("cdf(1.96):{}".format(norm.cdf(1.96)))
61 #print("sf(1.96):{}".format(norm.sf(1.96)))
62 #print("isf(0.025):{}".format(norm.isf(0.025)))
63 plt.show()
ci_ex2.py 演示了小马哥课堂-统计学-置信区间 的示例2
![]()
![]()