Python | 使用Keras实现秃头识别
一、选题的背景
如今脱发已经成为了热门话题,在朋友圈、微博等社交平台经常看到年轻人们的哀嚎:身上全是头发,地上全是头发,大风一吹空中可以飘着头发,唯独自己的头上没有头发。一颗心还在红尘,头发却已迫不及待的“出家”。人们每天都对着自己日益发亮的头顶担忧不已,头发已经成为了人们心中地地道道“烦恼丝”。所以,研究自己的头发已成现代年轻人的当务之急,我的选题背景也就随之而来。
二、机器学习案例设计方案
1.本次选题采用机器学习Bald Classification Dataset的数据集
据介绍,Bald Classification Dataset(秃头数据集)由 Ashish Jangra 于今年 5 月发布。
数据集中包含 20 万张光头人像的图像,分为测试集、训练集、验证集三个文件夹,每个文件夹也包括 Bald 和 NotBald 两种图像。
其中秃头人像数据集的来源,主要是欧美公众人物,包含政商界、娱乐圈、体育界人士。
Bald Classification Dataset
发布人员: Ashish Jangra
包含数量:20 万张秃头人像
数据格式:JPG
数据大小:1.3 G
发布时间:2020 年 5 月
下载地址:https://hyper.ai/datasets/12385
数据集作者:发量惊人的印度小哥
发布者 Ashish Jangra 也是个停不下来的人。他是一位 95 后,2016 年开始就读于 Lovely Professional University(印度拉夫里科技大学)的计算机专业。
2.采用的机器学习框架描述
本项目使用的卷积神经网络(Convolutional Neural Network, CNN),卷积神经网络是深度学习技术中极具代表的网络结构之一, 在图像处理领域取得了很大的成功。
本项目的扩展可以使用Keras搭建部署一个简单易用的深度学习图像网页应用, 可以通过网页导入图片判断改图片任务是否秃头。
3.涉及到的技术难点与解决思路
首先,数据集作业处理的图片尺寸都是178*218,当要验证自己所导入的图片时要注意尺寸问题。此外由于带着帽子的图片会难以判断是否图片,这个问题暂没有很好的处理方式,只能在测试时尽量毕竟带着帽子的图片进行测试
三、机器学习实现步骤
#查看训练集和验证集中各有多少图片
1 import os 2 train_path="work/Dataset/Train/" 3 print('total training bald images:', len(os.listdir(train_path+"Bald"))) 4 print('total training notbald images:', len(os.listdir(train_path+"NotBald"))) 5 valid_path="work/Dataset/validation/" 6 print('total validation bald images:', len(os.listdir(valid_path+"Bald"))) 7 print('total validation notbald images:', len(os.listdir(valid_path+"NotBald"))) 8 9 test_path="work/Dataset/Test/" 10 print('total test images:', len(os.listdir(test_path)))
#数据增强
1 import keras 2 3 from keras.preprocessing.image import ImageDataGenerator 4 5 IMSIZE=224 6 7 train_generator = ImageDataGenerator( 8 9 rescale=1./255, #设置放缩因子为1/255,把像素值放缩到0和1之间 10 11 shear_range=0.5, #拉伸变换 12 13 rotation_range=30, #左右旋转 14 15 zoom_range=0.2, #放大或缩小的比例 16 17 width_shift_range=0.2, #水平方向上平移的尺度 18 19 height_shift_range=0.2, #垂直方向上平移的尺度 20 21 horizontal_flip=True).flow_from_directory( 22 23 'work/Dataset/Train/', 24 25 target_size=(IMSIZE, IMSIZE), 26 27 batch_size=100, 28 29 class_mode= 'categorical')
#建立模型
1 from keras import layers 2 from keras import models 3 model = models.Sequential() 4 model.add(layers.Conv2D(32,(3,3),activation = 'relu',input_shape = (IMSIZE,IMSIZE,3))) 5 model.add(layers.MaxPooling2D((2,2))) 6 model.add(layers.Conv2D(64,(3,3),activation = 'relu')) 7 model.add(layers.MaxPooling2D((2,2))) 8 model.add(layers.Conv2D(128,(3,3),activation = 'relu')) 9 model.add(layers.MaxPooling2D((2,2))) 10 model.add(layers.Conv2D(128,(3,3),activation = 'relu')) 11 model.add(layers.MaxPooling2D((2,2))) 12 model.add(layers.Flatten()) 13 model.add(layers.Dense(512,activation = 'relu')) 14 model.add(layers.Dense(1,activation = 'sigmoid'))
#查询记录
1 model.summary()
#设置优化器,损失函数,准确率评判标准
1 from keras.optimizers import Adam 2 model.compile(loss='binary_crossentropy', 3 optimizer=Adam(lr=0.001), 4 metrics=['accuracy'])
#归一化图片,设置生成器,修改输入的训练集,验证集的图片的尺寸,并输出图片尺寸
1 from keras.preprocessing.image import ImageDataGenerator 2 train_datagen = ImageDataGenerator(rescale = 1./255) 3 test_datagen = ImageDataGenerator(rescale = 1./255) 4 train_dir = 'work/Dataset/Train' 5 train_generator = train_datagen.flow_from_directory( 6 train_dir, 7 target_size = (IMSIZE,IMSIZE), 8 batch_size = 20, 9 class_mode = 'binary') 10 validation_dir = 'work/Dataset/Validation' 11 validation_generator = test_datagen.flow_from_directory( 12 validation_dir, 13 target_size = (IMSIZE,IMSIZE), 14 batch_size = 20, 15 class_mode = 'binary') 16 for data_batch,labels_batch in train_generator: 17 print('data batch shape:',data_batch.shape) 18 print('data batch shape:',labels_batch.shape) 19 break
#开始模型训练
1 history = model.fit_generator(train_generator, 2 steps_per_epoch = 100, 3 epochs = 20, 4 validation_data = validation_generator, 5 validation_steps = 50) 6 model.save('bald.h5')
训练集上准确率到达了98.41%,基本符合我的要求,开始验证
#处理图片尺寸,处理好后并进行显示
1 import matplotlib.pyplot as plt 2 from PIL import Image 3 import os.path 4 5 def convertjpg(jpgfile,outdir,width=IMSIZE,height=IMSIZE): 6 img=Image.open(jpgfile) 7 try: 8 new_img=img.resize((width,height),Image.BILINEAR) 9 new_img.save(os.path.join(outdir,os.path.basename(jpgfile))) 10 except Exception as e: 11 print(e) 12 13 jpgfile = 'work/Dataset/data/a.jpg' 14 convertjpg(jpgfile,"work/Dataset") 15 img_scale = plt.imread('work/Dataset/a.jpg') 16 print(img_scale.shape) 17 plt.imshow(img_scale)
图片尺寸已经修改为224*224
#开始验证
1 from keras.models import load_model 2 model = load_model('bald.h5') 3 img_scale = img_scale.reshape(1,IMSIZE,IMSIZE,3).astype('float32') 4 img_scale = img_scale/255 5 result = model.predict(img_scale) 6 print(result) 7 if result>0.8: 8 print('该图片人物秃头') 9 else: 10 print('该图片人物不秃头')
#接下来验证一下有头发的图片
#修改尺寸并显示
1 import matplotlib.pyplot as plt 2 from PIL import Image 3 import os.path 4 5 def convertjpg(jpgfile1,outdir,width=IMSIZE,height=IMSIZE): 6 img=Image.open(jpgfile1) 7 try: 8 new_img=img.resize((width,height),Image.BILINEAR) 9 new_img.save(os.path.join(outdir,os.path.basename(jpgfile1))) 10 except Exception as e: 11 print(e) 12 13 jpgfile1 = 'work/Dataset/data/b.jpg' 14 convertjpg(jpgfile1,"work/Dataset") 15 img_scale1 = plt.imread('work/Dataset/b.jpg') 16 print(img_scale1.shape) 17 plt.imshow(img_scale1) 18 19 import matplotlib.pyplot as plt 20 from PIL import Image 21 import os.path 22 23 def convertjpg(jpgfile1,outdir,width=IMSIZE,height=IMSIZE): 24 img=Image.open(jpgfile1) 25 try: 26 new_img=img.resize((width,height),Image.BILINEAR) 27 new_img.save(os.path.join(outdir,os.path.basename(jpgfile1))) 28 except Exception as e: 29 print(e) 30 31 jpgfile1 = 'work/Dataset/data/b.jpg' 32 convertjpg(jpgfile1,"work/Dataset") 33 img_scale1 = plt.imread('work/Dataset/b.jpg') 34 print(img_scale1.shape) 35 plt.imshow(img_scale1)
#验证
1 from keras.models import load_model 2 model = load_model('bald.h5') 3 img_scale1 = img_scale1.reshape(1,IMSIZE,IMSIZE,3).astype('float32') 4 img_scale1 = img_scale1/255 5 result1 = model.predict(img_scale1) 6 print(result1) 7 if result1>0.8: 8 print('该图片人物秃头') 9 else: 10 print('该图片人物不秃头')
#代码汇总
1 import os 2 train_path="work/Dataset/Train/" 3 print('total training bald images:', len(os.listdir(train_path+"Bald"))) 4 print('total training notbald images:', len(os.listdir(train_path+"NotBald"))) 5 valid_path="work/Dataset/validation/" 6 print('total validation bald images:', len(os.listdir(valid_path+"Bald"))) 7 print('total validation notbald images:', len(os.listdir(valid_path+"NotBald"))) 8 9 test_path="work/Dataset/Test/" 10 print('total test images:', len(os.listdir(test_path))) 11 12 import keras 13 14 from keras.preprocessing.image import ImageDataGenerator 15 16 IMSIZE=224 17 18 train_generator = ImageDataGenerator( 19 20 rescale=1./255, #设置放缩因子为1/255,把像素值放缩到0和1之间 21 22 shear_range=0.5, #拉伸变换 23 24 rotation_range=30, #左右旋转 25 26 zoom_range=0.2, #放大或缩小的比例 27 28 width_shift_range=0.2, #水平方向上平移的尺度 29 30 height_shift_range=0.2, #垂直方向上平移的尺度 31 32 horizontal_flip=True).flow_from_directory( 33 34 'work/Dataset/Train/', 35 36 target_size=(IMSIZE, IMSIZE), 37 38 batch_size=100, 39 40 class_mode= 'categorical') 41 42 model.summary() 43 44 from keras.optimizers import Adam 45 model.compile(loss='binary_crossentropy', 46 optimizer=Adam(lr=0.001), 47 metrics=['accuracy']) 48 49 from keras.preprocessing.image import ImageDataGenerator 50 train_datagen = ImageDataGenerator(rescale = 1./255) 51 test_datagen = ImageDataGenerator(rescale = 1./255) 52 train_dir = 'work/Dataset/Train' 53 train_generator = train_datagen.flow_from_directory( 54 train_dir, 55 target_size = (IMSIZE,IMSIZE), 56 batch_size = 20, 57 class_mode = 'binary') 58 validation_dir = 'work/Dataset/Validation' 59 validation_generator = test_datagen.flow_from_directory( 60 validation_dir, 61 target_size = (IMSIZE,IMSIZE), 62 batch_size = 20, 63 class_mode = 'binary') 64 for data_batch,labels_batch in train_generator: 65 print('data batch shape:',data_batch.shape) 66 print('data batch shape:',labels_batch.shape) 67 break 68 69 history = model.fit_generator(train_generator, 70 steps_per_epoch = 100, 71 epochs = 20, 72 validation_data = validation_generator, 73 validation_steps = 50) 74 model.save('bald.h5') 75 76 import matplotlib.pyplot as plt 77 from PIL import Image 78 import os.path 79 80 def convertjpg(jpgfile,outdir,width=IMSIZE,height=IMSIZE): 81 img=Image.open(jpgfile) 82 try: 83 new_img=img.resize((width,height),Image.BILINEAR) 84 new_img.save(os.path.join(outdir,os.path.basename(jpgfile))) 85 except Exception as e: 86 print(e) 87 88 jpgfile = 'work/Dataset/data/a.jpg' 89 convertjpg(jpgfile,"work/Dataset") 90 img_scale = plt.imread('work/Dataset/a.jpg') 91 print(img_scale.shape) 92 plt.imshow(img_scale) 93 94 from keras.models import load_model 95 model = load_model('bald.h5') 96 img_scale = img_scale.reshape(1,IMSIZE,IMSIZE,3).astype('float32') 97 img_scale = img_scale/255 98 result = model.predict(img_scale) 99 print(result) 100 if result>0.8: 101 print('该图片人物秃头') 102 else: 103 print('该图片人物不秃头') 104 105 import matplotlib.pyplot as plt 106 from PIL import Image 107 import os.path 108 109 def convertjpg(jpgfile1,outdir,width=IMSIZE,height=IMSIZE): 110 img=Image.open(jpgfile1) 111 try: 112 new_img=img.resize((width,height),Image.BILINEAR) 113 new_img.save(os.path.join(outdir,os.path.basename(jpgfile1))) 114 except Exception as e: 115 print(e) 116 117 jpgfile1 = 'work/Dataset/data/b.jpg' 118 convertjpg(jpgfile1,"work/Dataset") 119 img_scale1 = plt.imread('work/Dataset/b.jpg') 120 print(img_scale1.shape) 121 plt.imshow(img_scale1) 122 123 from keras.models import load_model 124 model = load_model('bald.h5') 125 img_scale1 = img_scale1.reshape(1,IMSIZE,IMSIZE,3).astype('float32') 126 img_scale1 = img_scale1/255 #归一化到0-1之间 127 result1 = model.predict(img_scale1) #取图片信息 128 print(result1) 129 if result1>0.8: 130 print('该图片人物秃头') 131 else: 132 print('该图片人物不秃头')
四、总结:
通过本次机器学习过程实现,巩固了之前学习的知识,也对机器学习有了更深层次的理解。本次案例是为了实现对秃头的识别,总体来说,功能较为简易,但是也让我体会到了Python功能的强大。案例使用Keras架构,最终实现对秃头图片的识别。本次机器学习案例使我受益匪浅!!!