7.简单分类识别

0.项目结构

数据集链接http://blog.csdn.net/qq_14845119/article/details/51913171

http://www.cs.toronto.edu/~kriz/cifar.html

1.准备数据集

load_matrix.py

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from PIL import Image
import numpy as np

# 读取数据集
def unpickle(file):
    import cPickle
    with open(file, 'rb') as fo:
        dict = cPickle.load(fo)
    return dict

# 生成训练样本和样本标签
def get_matrix_2():
    dc = unpickle("data_batch_1")
    count = len(dc['labels'])
    train_set_x = []
    train_set_y = []
    num = 0
    for cn in range(0,count):
        if dc['labels'][cn] == 5:
            train_set_x += [dc['data'][cn]]
            train_set_y.append(1)
            num += 1
        elif dc['labels'][cn] == 0:
            train_set_x += [dc['data'][cn]]
            train_set_y.append(0)
            num += 1
        if num == 209:
            break
    train_set_x = np.array(train_set_x).T / 255.0
    train_set_y = np.array(train_set_y)

    # 生成测试样本和样本标签
    dc = unpickle("test_batch")
    count = len(dc['labels'])
    test_set_x = []
    num = 0
    for cn in range(0, count):
        if dc['labels'][cn] == 5:
            test_set_x += [dc['data'][cn]]
            num += 1
            if num == 50:
                break
    test_set_x = np.array(test_set_x).T / 255.0
    test_set_y = np.ones(shape=(1, 50))

    # print train_set_x.shape
    # print train_set_y.shape
    # print test_set_x.shape
    # print train_set_y.shape

    return train_set_x, train_set_y, test_set_x, test_set_y

2.神经网络训练学习预测

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
import load_matrix

# 1 准备数据集
def load_data():
    # train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

    # 2 对数据集进行矩阵变换
    # train_set_x_orig = np.random.randn(12288, 209)
    # train_set_y = np.random.randn(1,209)
    # test_set_x_orig = np.random.randn(12288, 50)
    # test_set_y = np.random.randn(1,50)

    # print ("train_set_x shape: " + str(train_set_x_orig.shape))
    # print ("train_set_y shape: " + str(train_set_y.shape))
    # print ("test_set_x shape: " + str(test_set_x_orig.shape))
    # print ("test_set_y shape: " + str(test_set_y.shape))

    # 3对样本数据进行归一化
    return load_matrix.get_matrix_2()

# 4定义激活函数
def sigmoid(z):
    s = 1.0 / (1 + np.exp(-z))
    # s = np.tanh(z)
    return s

# 5初始化参数w和b
def initialize_with_zeros(dim):
    w = np.zeros((dim, 1))
    b = 0
    assert (w.shape == (dim, 1))
    assert (isinstance(b, float) or isinstance(b, int))

    return w, b

# 6定义正向和反向传播函数
def propagate(w, b, X, Y):
    m = X.shape[1]
    A = sigmoid(np.dot(w.T, X) + b)  # compute activation
    cost = -(np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))) / m  # compute cost

    dw = np.dot(X, (A - Y).T) / m
    db = np.sum(A - Y) / m

    assert (dw.shape == w.shape)
    assert (db.dtype == float)
    cost = np.squeeze(cost)
    assert (cost.shape == ())

    grads = {"dw": dw,
             "db": db}

    return grads, cost

# 7定义循环迭代函数
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
    costs = []
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)

        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db

        if i % 100 == 0:
            costs.append(cost)

        # Print the cost every 100 training examples
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" % (i, cost))

    params = {"w": w,
              "b": b}

    grads = {"dw": dw,
             "db": db}

    return params, grads, costs

# 8进行预测
def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)

    A = sigmoid(np.dot(w.T, X) + b)

    for i in range(A.shape[1]):
        Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0

    assert (Y_prediction.shape == (1, m))
    return Y_prediction

# 9定义模型
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):
    w, b = initialize_with_zeros(X_train.shape[0])
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost=False)
    w = parameters["w"]
    b = parameters["b"]

    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)

    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test,
         "Y_prediction_train": Y_prediction_train,
         "w": w,
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations}

    return d

# 输入图片返回是不是某个东西
def result(data):
    global d
    w = d['w']
    b = d['b']
    return predict(w, b, data)

# 开始训练获取参数
def start():
    global d
    train_set_x, train_set_y, test_set_x, test_set_y = load_data()
    w, b = initialize_with_zeros(train_set_x.shape[0])
    d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations=2000, learning_rate=0.005,
              print_cost=True)
    return d


def ImageToMatrix(filename):
    # 读取图片
    im = Image.open(filename)
    # 显示图片
    # im.show()
    R = []
    G = []
    B = []
    width,height = im.size
    pix = im.load()
    for x in range(width):
        for y in range(height):
            r, g, b = pix[x, y]
            R.append(r)
            G.append(g)
            B.append(b)

    R.extend(G)
    R.extend(B)
    data = R
    # im = im.convert("L")
    # data = im.getdata()
    # print len(data)
    print data
    data = np.matrix(data,dtype='float')/255.0
    return data
    # new_data = np.reshape(data,(width,height))
    # return new_data
#     new_im = Image.fromarray(new_data)
#     # 显示图片
#     new_im.show()
def MatrixToImage(data):
    data = data*255
    new_im = Image.fromarray(data.astype(np.uint8))
    return new_im


if __name__ == '__main__':
    train_set_x, train_set_y, test_set_x, test_set_y = load_data()
    start()
    # print ([test_set_x[:, 1]]).T
    data = ImageToMatrix("../../static/dog/1.png")
    print data
    # print data.shape
    # data = data.reshape(1,-1).T
    #
    result = result(data.T)
    if result[0] == 1:
        print 'dog'
    else:
        print 'not dog'


    # result = result(np.array([test_set_x[:,1]]).T)
    # if result[0] == 1:
    #     print 'dog'

posted @ 2017-09-21 21:56 桃源仙居阅读(132) 评论(0) 收藏举报

刷新页面返回顶部

桃源仙居

7.简单分类识别

0.项目结构

1.准备数据集

公告