Python 分类方法记录

使用GPU

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True

DNN

def baseline_model():
    model = Sequential()
    model.add(Dense(16, input_shape=(21, ), activation="relu"))
    model.add(Dense(16, activation="relu"))
    model.add(Dense(2, activation="sigmoid"))

    model.compile(optimizer=RMSprop(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])

    return model

def cross_validation(X, new_y, num_feat):
    print("X=", X[:10])
    print("X.values=", X.values[:10])
    # print("y=", new_y[:10]) 
    # X = X.values
    y = to_categorical(new_y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666)
    standScaler = StandardScaler()
    standScaler.fit(X_train)
    X_train = standScaler.transform(X_train)
    X_test = standScaler.transform(X_test)


    estimator = KerasClassifier(build_fn=baseline_model, epochs=10, batch_size=1, verbose=1)
    kfold = KFold(n_splits=5, shuffle=True, random_state=999)
    scores = cross_val_score(estimator, X_train, y_train, cv=kfold)
    print("Accuracy of cross validation, mean %.2f, std %.2f" %(scores.mean(), scores.std()))

    # clf = LogisticRegression(penalty = 'l2', solver = 'liblinear', class_weight = 'balanced')
    # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
    # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
    # clf = svm.SVC(kernel= 'linear', C = 2e3)
    # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
    # clf = GaussianNB()
    # scores = cross_val_score(clf, X, new_y, cv = 10)

    return scores

画准确率和损失曲线

def show_acc(history):
    plt.clf()
    history_dict = history.history
    acc = history_dict['binary_accuracy']
    val_acc = history_dict['val_binary_accuracy']

    epochs = range(1, len(val_acc) + 1)

    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Balidation acc')
    plt.xlabel('Epochs')
    plt.ylabel('Acc')
    plt.legend()

    plt.show()
def show_loss(history):
    plt.clf()
    history_dict = history.history
    print("print history.history = ", history_dict)
    loss = history_dict['loss']
    val_loss = history_dict['val_loss']

    epochs = range(1, len(val_loss) + 1)

    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

 

Tensorboard

def classify_data(X, y, class_names):
    y = to_categorical(y)
  
   # 归一化 X_train, X_test, y_train, y_test
= train_test_split(X, y, test_size=0.1, random_state=666) standScaler = StandardScaler() standScaler.fit(X_train) X_train = standScaler.transform(X_train) X_test = standScaler.transform(X_test) model = Sequential() model.add(Dense(16, input_shape=(21, ), activation="relu")) model.add(Dense(16, activation="relu")) model.add(Dense(2, activation="sigmoid")) model.summary() model.compile(optimizer=RMSprop(lr=0.001), loss=binary_crossentropy, metrics=['accuracy']) plot_model(model, show_shapes='True', to_file='model.png') callbacks = [keras.callbacks.TensorBoard(log_dir="my_log_dir", histogram_freq=1, embeddings_freq=1, embeddings_data=X[:20].astype("float32"))] history = model.fit(X_train, y_train, epoches=20, batch_size=1, validation_split=0.2, callbacks=callbacks)

画混淆矩阵

def plot_confusion_matrix(cm, classes,normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
        if normalize:
                cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
                print("Normalized confusion matrix")
        else:
                print('Confusion matrix, without normalization')

        print(cm)

        plt.imshow(cm, interpolation='nearest', cmap=cmap) # 负责对图像进行处理,并显示其格式,但是不显示图
片
        plt.title(title)
        plt.colorbar() # 显示色阶
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45) # x轴标注
        plt.yticks(tick_marks, classes) # y轴标注

        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        # 依次取出list1中的每1个元素,与list2中的每1个元素,组成元组, 然后将所有的元组组成一个列表返回
        # 矩阵行i为真实值,列j为预测值
        # 也就是x为预测值,y为真实值
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")

        plt.tight_layout()
        plt.ylabel('True label', fontsize = 14)
        plt.xlabel('Predicted label', fontsize = 14)

def classify_data(X, y, class_names):
        # 训练集测试集切分
        global f_cv_scores

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize, random_state = 42)

        # clf = LogisticRegression(penalty = 'l2', class_weight = 'balanced')
        # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")

        # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
        # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
        clf = svm.SVC(kernel= 'linear', C = 2e3)
        # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
        # clf = GaussianNB()
        y_pred = clf.fit(X_train, y_train).predict(X_test)

        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average = 'micro')
        f1 = f1_score(y_test, y_pred, average = 'weighted')
        acc = accuracy_score(y_test, y_pred)

 

Keras训练集、测试集与验证集

# 训练集与测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666) # 训练集中抽20%作为验证集
history
= model.fit(X_train, y_train, epochs=20, batch_size=1, shuffle=True, validation_split=0.1, verbose = 1, callbacks = None, validation_data = None))

 

posted @ 2019-09-03 17:46  叶小雨  阅读(846)  评论(0编辑  收藏  举报