机器学习——秃头图片识别

（一）选题背景

现在人们的生活质量越来越好，各种各样的身体健康也越来越注重，而秃发已经变成了人们，更为注重的一个问题。“双11”期间，不少年轻人将植发物品、假发片等悄悄放进购物车，年纪轻轻感到“秃”然，成为这一代青年人的隐痛。数据显示，我国2.5亿脱发人群中有65%为男性。脱发逐渐低龄化，30岁以下占近70%。所以识别自己的头发状态很重要，可以设计一款识别是否秃头的小程序，来帮助年轻人及时的发现进行治疗。

（二）机器学习设计案例设计方案

数据集中包含 20 万张光头人像的图像，分为测试集、训练集、验证集三个文件夹，每个文件夹也包括 Bald 和 NotBald 两种图像。

下载地址：200K图像👨‍🦲👨‍🦲秃头图像数据集 - Heywhale.com

从对应的网站中下载相关的数据集，查看数据集，在python的环境中，对数据进行预处理，利用keras，构建网络，训练模型，最后导入图片测试模型

（三）机器学习的实现步骤

1、导入需要用到的库

 1 import numpy as np
 2 import pandas as pd
 3 import os
 4 import tensorflow as tf
 5 import matplotlib.pyplot as plt
 6 from pathlib import Path
 7 from sklearn.model_selection import train_test_split
 8 from keras.models import Sequential
 9 from keras.layers import Activation
10 from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
11 from keras.applications.resnet import preprocess_input
12 
13 from keras.models import load_model
14 from keras.utils import image_utils
15 from keras import optimizers

2、查看随机图像

 1 data_dir = "D:/JupyterFile/baldhead/Dataset/Validation"
 2 train_ds = tf.keras.preprocessing.image_dataset_from_directory(
 3     data_dir,
 4     validation_split=0.1,
 5     subset="training",
 6     seed=123,
 7     image_size=(224, 224),
 8     batch_size=32)
 9 plt.figure(figsize=(10, 5))  # 图形的宽为10高为5
10 
11 for images, labels in train_ds.take(1):
12     for i in range(8):
13         ax = plt.subplot(2, 4, i + 1)  
14         plt.imshow(images[i].numpy().astype("uint8"))
15         plt.axis("off")

3、检查一下每个分组（训练 / 测试）中分别包含多少张图像

 1 train_path="D:/JupyterFile/baldhead/Dataset/Train/"
 2 print('测试集秃头图片:', len(os.listdir(train_path+"Bald")))
 3 print('测试集不秃头图片:', len(os.listdir(train_path+"NotBald")))
 4 
 5 valid_path="D:/JupyterFile/baldhead/Dataset/Validation/"
 6 print('验证集秃头图片:', len(os.listdir(valid_path+"Bald")))
 7 print('验证集不秃头图片:', len(os.listdir(valid_path+"NotBald")))
 8 
 9 test_path="D:/JupyterFile/baldhead/Dataset/test/"
10 print('测试集图片:', len(os.listdir(test_path)))

4、搭建网络

 1 from keras import layers
 2 from keras import models
 3 model = models.Sequential()
 4 
 5 model.add(layers.Conv2D(32,(3,3),activation = 'relu',input_shape = (224,224,3)))
 6 model.add(layers.MaxPooling2D((2,2)))
 7 
 8 model.add(layers.Conv2D(64,(3,3),activation = 'relu'))
 9 model.add(layers.MaxPooling2D((2,2)))
10 
11 model.add(layers.Conv2D(128,(3,3),activation = 'relu'))
12 model.add(layers.MaxPooling2D((2,2)))
13 
14 model.add(layers.Conv2D(128,(3,3),activation = 'relu'))
15 model.add(layers.MaxPooling2D((2,2)))
16 
17 model.add(layers.Flatten())
18 model.add(layers.Dense(512,activation = 'relu'))
19 model.add(layers.Dense(1,activation = 'sigmoid'))

5、查看特征图的维度随着每层变化

1 model.summary()

6、配置训练方法

1 model.compile(loss='binary_crossentropy',
2               optimizer=optimizers.RMSprop(lr=1e-4),
3               metrics=['acc'])

7、图像在输入神经网络之前进行数据处理，建立训练和验证数据

 1 #归一化
 2 train_datagen = ImageDataGenerator(rescale = 1./255)
 3 test_datagen = ImageDataGenerator(rescale = 1./255)
 4 
 5 #训练集图片目录路径
 6 train_dir = 'D:/JupyterFile/baldhead/Dataset/Train'     
 7 
 8 
 9 train_generator = train_datagen.flow_from_directory(
10     train_dir,
11     target_size = (224,224),
12     batch_size = 20,
13     class_mode = 'binary')
14 
15 #验证集图片目录路径
16 validation_dir = 'D:/JupyterFile/baldhead/Dataset/Validation'  
17 
18 validation_generator = test_datagen.flow_from_directory(
19     validation_dir,
20     target_size = (224,224),
21     batch_size = 20,
22     class_mode = 'binary')
23 
24 
25 for data_batch,labels_batch in train_generator:
26     print('data batch shape:',data_batch.shape)
27     print('data batch shape:',labels_batch.shape)
28     break

8、训练模型50次

1 history = model.fit(
2                     train_generator,
3                     steps_per_epoch = 100,
4                     epochs = 50,
5                     validation_data = validation_generator,
6                     validation_steps = 50)

9、训练过程产生的数据保存为h5文件

1 model.save('D:/JupyterFile/baldhead/Dataset/result.h5')

10、训练结果的精度和损失曲线图

 1 accuracy = history.history['acc']
 2 loss = history.history['loss']
 3 val_loss = history.history['val_loss']
 4 val_accuracy = history.history['val_acc']
 5 plt.figure(figsize=(18, 6))
 6 plt.subplot(2, 2, 1)
 7 plt.plot(range(50), accuracy, label='Training Accuracy')
 8 plt.plot(range(50), val_accuracy, label='Validation Accuracy')
 9 plt.legend(loc='upper right')
10 plt.title('Accuracy : Training and Validation ')
11 
12 plt.subplot(2, 2, 2)
13 plt.plot(range(50), loss ,label='Training Loss')
14 plt.plot(range(50), val_loss, label='Validation Loss')
15 plt.title('Loss : Training and Validation ')
16 plt.legend(loc='upper right')
17 plt.show()

11、改变测试图片尺寸

 1 #将图片缩小到（224，224）的大小
 2 def convertjpg(jpgfile,outdir,width=224,height=224): 
 3     img=Image.open(jpgfile)
 4     try:
 5         new_img=img.resize((width,height),Image.BILINEAR)   
 6         new_img.save(os.path.join(outdir,os.path.basename(new_file)))
 7     except Exception as e:
 8         print(e)
 9 
10 jpgfile="D:/JupyterFile/baldhead/Dataset/44.jpg"        
11 new_file="D:/JupyterFile/baldhead/Dataset/new_44.jpg"
12 
13 #图像大小改变到（224,224），文件名保存
14 convertjpg(jpgfile,r"D:/JupyterFile/baldhead/Dataset") 
15 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_44.jpg')
16 
17 plt.imshow(img_scale)

12、测试

 1 model = load_model('D:/JupyterFile/baldhead/Dataset/result.h5')
 2 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_44.jpg')
 3 img_scale = img_scale.reshape(1,224,224,3).astype('float32')
 4 img_scale = img_scale/255        #归一化到0-1之间
 5 
 6 result = model.predict(img_scale) #取图片信息
 7 
 8 #print(result)
 9 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_44.jpg')
10 
11 plt.imshow(img_scale)        #显示图片
12 
13 if result>0.9:
14     print('该图片人物不秃头：',result)
15 else:
16     print('该图片人物秃头：',1-result)

13、改变图片尺寸

 1 import matplotlib.pyplot as plt
 2 from PIL import Image
 3 import os.path
 4 
 5 #将图片缩小到（224，224）的大小
 6 def convertjpg(jpgfile,outdir,width=224,height=224): 
 7     img=Image.open(jpgfile)
 8     try:
 9         new_img=img.resize((width,height),Image.BILINEAR)   
10         new_img.save(os.path.join(outdir,os.path.basename(new_file)))
11     except Exception as e:
12         print(e)
13 
14 jpgfile="D:/JupyterFile/baldhead/Dataset/11.jpg"        
15 new_file="D:/JupyterFile/baldhead/Dataset/new_11.jpg"
16 #图像大小改变到（224,224），文件名保存
17 convertjpg(jpgfile,r"D:/JupyterFile/baldhead/Dataset") 
18 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_11.jpg')
19 
20 plt.imshow(img_scale)

14、秃头测试

 1 model = load_model('D:/JupyterFile/baldhead/Dataset/result.h5')
 2 #model.summary()
 3 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_11.jpg')
 4 img_scale = img_scale.reshape(1,224,224,3).astype('float32')
 5 img_scale = img_scale/255   
 6 
 7 #取图片信息
 8 result = model.predict(img_scale) 
 9 
10 #print(result)
11 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_11.jpg')
12 
13 plt.imshow(img_scale)        
14 
15 if result>0.8:
16     print('该图片人物不秃头：',result)
17 else:
18     print('该图片人物秃头：',1-result)

15、多张图片判断

 1 test_dir="D:/JupyterFile/baldhead/Dataset/Test/"
 2 
 3 # 设置中文
 4 plt.rcParams['font.family'] = ['SimHei']
 5 # 设置子图
 6 figure,ax = plt.subplots(nrows=1, ncols=4, sharex=True, sharey=True,figsize=(24,24))
 7 # 导入训练模型
 8 model = load_model('D:/JupyterFile/baldhead/Dataset/result.h5')
 9 
10 # 图片处理，图片的大小改为224*224
11 def convertjpg(jpgfile):
12     img=Image.open(jpgfile)
13     try:
14         new_img=img.resize((224,224),Image.BILINEAR)
15         return new_img
16     except Exception as e:
17         print(e)
18 
19 # 用循环取出test_dir中的图片进行预测
20 
21 for i in range(1,5): # 选择要测试图片的数量
22     
23     img = str(i)+'.jpg'
24     print(img)
25     img_show = convertjpg(test_dir + img)
26     img_scale = image_utils.img_to_array(img_show) 
27     img_scale = img_scale.reshape(1,224,224,3) 
28     img_scale = img_scale.astype('float32')/255 # 归一化
29 
30     result = model.predict(img_scale)# 预测函数
31     if result>0.8:
32         print('该图片人物不秃头：',result)
33     else:
34         print('该图片人物秃头：',1-result)
35     
36 
37     ax = ax.flatten()   # 将子图从多维变成一维
38 
39     ax[i-1].imshow(img_show, cmap='Greys', interpolation='nearest')
40     # 子图标题
41     if result>0.8:
42         ax[i-1].set_title("该图片人物不秃头",fontsize=24)
43     else:
44         ax[i-1].set_title("该图片人物秃头",fontsize=24)
45     
46 
47     # 去掉刻度
48     ax[0].set_xticks([])
49     ax[0].set_yticks([])
50 plt.show()

（四）总结

在课程设计的过程中，还是多多少少会有点问题，比如图片的格式没有设置好，导致总是报错；keras的版本问题，导致部分功能不能实现。但都完美的解决了。在这个过程中，巩固了之前学习的知识，也对机器学习有了更深层次的理解。最终的训练模型达到了预期的效果。

#全部代码附上

  1 import numpy as np
  2 import pandas as pd
  3 import os
  4 import tensorflow as tf
  5 import matplotlib.pyplot as plt
  6 from pathlib import Path
  7 from sklearn.model_selection import train_test_split
  8 from keras.models import Sequential
  9 from keras.layers import Activation
 10 from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
 11 from keras.applications.resnet import preprocess_input
 12 
 13 from keras.models import load_model
 14 from keras.utils import image_utils
 15 from keras import optimizers
 16 
 17 data_dir = "D:/JupyterFile/baldhead/Dataset/Validation"
 18 train_ds = tf.keras.preprocessing.image_dataset_from_directory(
 19     data_dir,
 20     validation_split=0.1,
 21     subset="training",
 22     seed=123,
 23     image_size=(224, 224),
 24     batch_size=32)
 25 plt.figure(figsize=(10, 5))  # 图形的宽为10高为5
 26 
 27 for images, labels in train_ds.take(1):
 28     for i in range(8):
 29         ax = plt.subplot(2, 4, i + 1)
 30         plt.imshow(images[i].numpy().astype("uint8"))
 31         plt.axis("off")
 32         
 33 train_path="D:/JupyterFile/baldhead/Dataset/Train/"
 34 print('测试集秃头图片:', len(os.listdir(train_path+"Bald")))
 35 print('测试集不秃头图片:', len(os.listdir(train_path+"NotBald")))
 36 
 37 valid_path="D:/JupyterFile/baldhead/Dataset/Validation/"
 38 print('验证集秃头图片:', len(os.listdir(valid_path+"Bald")))
 39 print('验证集不秃头图片:', len(os.listdir(valid_path+"NotBald")))
 40 
 41 test_path="D:/JupyterFile/baldhead/Dataset/test/"
 42 print('测试集图片:', len(os.listdir(test_path)))
 43 
 44 from keras import layers
 45 from keras import models
 46 model = models.Sequential()
 47 
 48 model.add(layers.Conv2D(32,(3,3),activation = 'relu',input_shape = (224,224,3)))
 49 model.add(layers.MaxPooling2D((2,2)))
 50 
 51 model.add(layers.Conv2D(64,(3,3),activation = 'relu'))
 52 model.add(layers.MaxPooling2D((2,2)))
 53 
 54 model.add(layers.Conv2D(128,(3,3),activation = 'relu'))
 55 model.add(layers.MaxPooling2D((2,2)))
 56 
 57 model.add(layers.Conv2D(128,(3,3),activation = 'relu'))
 58 model.add(layers.MaxPooling2D((2,2)))
 59 
 60 model.add(layers.Flatten())
 61 model.add(layers.Dense(512,activation = 'relu'))
 62 model.add(layers.Dense(1,activation = 'sigmoid'))
 63 
 64 model.summary()
 65 
 66 model.compile(loss='binary_crossentropy',              
 67               optimizer=optimizers.RMSprop(lr=1e-4),
 68               metrics=['acc'])
 69 
 70 #归一化
 71 train_datagen = ImageDataGenerator(rescale = 1./255)
 72 test_datagen = ImageDataGenerator(rescale = 1./255)
 73 
 74 #训练集图片目录路径
 75 train_dir = 'D:/JupyterFile/baldhead/Dataset/Train'
 76 
 77 
 78 train_generator = train_datagen.flow_from_directory(
 79     train_dir,
 80     target_size = (224,224),
 81     batch_size = 20,
 82     class_mode = 'binary')
 83 
 84 #验证集图片目录路径
 85 validation_dir = 'D:/JupyterFile/baldhead/Dataset/Validation'
 86 
 87 validation_generator = test_datagen.flow_from_directory(
 88     validation_dir,
 89     target_size = (224,224),
 90     batch_size = 20,
 91     class_mode = 'binary')
 92 
 93 
 94 for data_batch,labels_batch in train_generator:
 95     print('data batch shape:',data_batch.shape)
 96     print('data batch shape:',labels_batch.shape)
 97     break
 98 
 99 history = model.fit(
100                     train_generator,
101                     steps_per_epoch = 100,
102                     epochs = 50,
103                     validation_data = validation_generator,
104                     validation_steps = 50)
105 
106 model.save('D:/JupyterFile/baldhead/Dataset/result.h5')
107 
108 accuracy = history.history['acc']
109 loss = history.history['loss']
110 val_loss = history.history['val_loss']
111 val_accuracy = history.history['val_acc']
112 plt.figure(figsize=(18, 6))
113 plt.subplot(2, 2, 1)
114 plt.plot(range(50), accuracy, label='Training Accuracy')
115 plt.plot(range(50), val_accuracy, label='Validation Accuracy')
116 plt.legend(loc='upper right')
117 plt.title('Accuracy : Training and Validation ')
118 
119 plt.subplot(2, 2, 2)
120 plt.plot(range(50), loss ,label='Training Loss')
121 plt.plot(range(50), val_loss, label='Validation Loss')
122 plt.title('Loss : Training and Validation ')
123 plt.legend(loc='upper right')
124 plt.show()
125 
126 #将图片缩小到（224，224）的大小
127 def convertjpg(jpgfile,outdir,width=224,height=224):
128     img=Image.open(jpgfile)
129     try:
130         new_img=img.resize((width,height),Image.BILINEAR)
131         new_img.save(os.path.join(outdir,os.path.basename(new_file)))
132     except Exception as e:
133         print(e)
134 
135 jpgfile="D:/JupyterFile/baldhead/Dataset/44.jpg"
136 new_file="D:/JupyterFile/baldhead/Dataset/new_44.jpg"
137 
138 #图像大小改变到（224,224），文件名保存
139 convertjpg(jpgfile,r"D:/JupyterFile/baldhead/Dataset")
140 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_44.jpg')
141 
142 plt.imshow(img_scale)
143 
144 model = load_model('D:/JupyterFile/baldhead/Dataset/result.h5')
145 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_44.jpg')
146 img_scale = img_scale.reshape(1,224,224,3).astype('float32')
147 img_scale = img_scale/255        #归一化到0-1之间
148 
149 result = model.predict(img_scale) #取图片信息
150 
151 #print(result)
152 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_44.jpg')
153 
154 plt.imshow(img_scale)        #显示图片
155 
156 if result>0.9:
157     print('该图片人物不秃头：',result)
158 else:
159     print('该图片人物秃头：',1-result)
160 
161 import matplotlib.pyplot as plt
162 from PIL import Image
163 import os.path
164 
165 #将图片缩小到（224，224）的大小
166 def convertjpg(jpgfile,outdir,width=224,height=224):
167     img=Image.open(jpgfile)
168     try:
169         new_img=img.resize((width,height),Image.BILINEAR)
170         new_img.save(os.path.join(outdir,os.path.basename(new_file)))
171     except Exception as e:
172         print(e)
173 
174 jpgfile="D:/JupyterFile/baldhead/Dataset/11.jpg"
175 new_file="D:/JupyterFile/baldhead/Dataset/new_11.jpg"
176 #图像大小改变到（224,224），文件名保存
177 convertjpg(jpgfile,r"D:/JupyterFile/baldhead/Dataset")
178 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_11.jpg')
179 
180 plt.imshow(img_scale)
181 
182 model = load_model('D:/JupyterFile/baldhead/Dataset/result.h5')
183 #model.summary()
184 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_11.jpg')
185 img_scale = img_scale.reshape(1,224,224,3).astype('float32')
186 img_scale = img_scale/255
187 
188 #取图片信息
189 result = model.predict(img_scale)
190 
191 #print(result)
192 img_scale = plt.imread('D:/JupyterFile/baldhead/Dataset/new_11.jpg')
193 
194 plt.imshow(img_scale)
195 
196 if result>0.8:
197     print('该图片人物不秃头：',result)
198 else:
199     print('该图片人物秃头：',1-result)
200     
201 test_dir="D:/JupyterFile/baldhead/Dataset/Test/"
202 
203 # 设置中文
204 plt.rcParams['font.family'] = ['SimHei']
205 # 设置子图
206 figure,ax = plt.subplots(nrows=1, ncols=4, sharex=True, sharey=True,figsize=(24,24))
207 # 导入训练模型
208 model = load_model('D:/JupyterFile/baldhead/Dataset/result.h5')
209 
210 # 图片处理，图片的大小改为224*224
211 def convertjpg(jpgfile):
212     img=Image.open(jpgfile)
213     try:
214         new_img=img.resize((224,224),Image.BILINEAR)
215         return new_img
216     except Exception as e:
217         print(e)
218 
219 # 用循环取出test_dir中的图片进行预测
220 
221 for i in range(1,5): # 选择要测试图片的数量
222 
223     img = str(i)+'.jpg'
224     print(img)
225     img_show = convertjpg(test_dir + img)
226     img_scale = image_utils.img_to_array(img_show)
227     img_scale = img_scale.reshape(1,224,224,3)
228     img_scale = img_scale.astype('float32')/255 # 归一化
229 
230     result = model.predict(img_scale)# 预测函数
231     if result>0.8:
232         print('该图片人物不秃头：',result)
233     else:
234         print('该图片人物秃头：',1-result)
235 
236 
237     ax = ax.flatten()   # 将子图从多维变成一维
238 
239     ax[i-1].imshow(img_show, cmap='Greys', interpolation='nearest')
240     # 子图标题
241     if result>0.8:
242         ax[i-1].set_title("该图片人物不秃头",fontsize=24)
243     else:
244         ax[i-1].set_title("该图片人物秃头",fontsize=24)
245 
246 
247     # 去掉刻度
248     ax[0].set_xticks([])
249     ax[0].set_yticks([])
250 plt.show()

posted @ 2022-12-21 13:46 Eddly 阅读(321) 评论(0) 收藏举报

刷新页面返回顶部

Eddly

机器学习——秃头图片识别

（一）选题背景

（二）机器学习设计案例设计方案

（三）机器学习的实现步骤

（四）总结

公告