gpt1用于文本分类
揭秘大模型:从原理到实战中有代码:
class DecoderLayerGPT1(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, dff, drop_rate=0.1):
super(DecoderLayerGPT1, self).__init__()
# 定义掩码多头注意力
self.mha1 = MutilHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(drop_rate)
self.dropout2 = layers.Dropout(drop_rate)
def call(self, inputs, training, look_ahead_mask):
# 掩码多头注意力
att1, att_weight1 = self.mha1(inputs, inputs, inputs, look_ahead_mask)
att1 = self.dropout1(att1, training=training)
out1 = self.layernorm1(inputs + att1)
ffn_out = self.ffn(out1)
ffn_out = self.dropout2(ffn_out, training=training)
out2 = self.layernorm2(out1 + ffn_out)
return out2, att_weight1
class GPT1(tf.keras.Model):
def __init__(self, n_layers, d_model, n_heads, diff, target_vocab_size,
max_seq_len, fine_tuning_class_num, drop_rate=0.1):
super(GPT1, self).__init__()
self.decoder = Decoder(n_layers, d_model, n_heads, diff,
target_vocab_size, max_seq_len, drop_rate)
# 预训练阶段输出
self.final_layer = tf.keras.layers.Dense(target_vocab_size)
# 微调阶段输出
self.fine_tuning_layer = tf.keras.layers.Dense(fine_tuning_class_num)
def call(self, targets, training, look_ahead_mask):
# 预训练阶段
decode_out, att_weights = self.decoder(targets, training, look_ahead_mask)
final_out = self.final_layer(decode_out)
# 微调阶段
fine_tuning_out = self.fine_tuning_layer(tf.keras.layers.Flatten()(final_out))
return final_out, fine_tuning_out, att_weights
# 预训练阶段目标函数
def loss_fun(y_ture, y_pred):
mask = tf.math.logical_not(tf.math.equal(y_ture, 0))
loss_ = loss_object(y_ture, y_pred)
mask = tf.cast(mask, dtype=loss_.dtype)
loss_ *= mask
return tf.reduce_mean(loss_)
# 微调阶段目标函数
def loss_fun_fine_tuning(y_ture, y_pred):
loss_ = loss_object_fine_tuning(y_ture, y_pred)
return tf.reduce_mean(loss_)
# 预训练阶段评估指标
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction='none')
train_loss = tf.keras.metrics.Mean(name='train_loss')
# 微调阶段评估指标
loss_object_fine_tuning = tf.keras.losses.CategoricalCrossentropy(
from_logits=True, reduction='none')
train_loss_fine_tuning = tf.keras.metrics.Mean(name='train_loss_fine_tuning')
train_accuracy_fine_tuning = tf.keras.metrics.CategoricalAccuracy(
name='train_accuracy_fine_tuning')
# 创建GPT-1模型
gpt1 = GPT1(num_layers, d_model, num_heads, dff, target_vocab_size,
max_seq_len, n_class, dropout_rate)
# 创建ckpt管理器
checkpoint_path = './checkpoint/train_cat'
ckpt = tf.train.Checkpoint(gpt1=gpt1, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=3)
if ckpt_manager.latest_checkpoint:
ckpt.restore(ckpt_manager.latest_checkpoint)
# 定义训练过程
def train_step(targets):
tar_inp = targets['title'][:, :-1]
tar_real = targets['title'][:, 1:]
cat_name = targets['cat']
combined_mask = create_mask(tar_inp)
with tf.GradientTape() as tape:
predictions, predict_fine_tuning, _ = gpt1(tar_inp, True, combined_mask)
loss = loss_fun(tar_real, predictions)
loss_fine_tuning = loss_fun_fine_tuning(cat_name, predict_fine_tuning)
loss_combine = loss + loss_fine_tuning
# 求梯度
gradients = tape.gradient(loss_combine, gpt1.trainable_variables)
# 反向传播
optimizer.apply_gradients(zip(gradients, gpt1.trainable_variables))
# 记录损失值和准确率
train_loss(loss)
train_accuracy(tar_real, predictions)
train_loss_fine_tuning(loss_fine_tuning)
train_accuracy_fine_tuning(cat_name, predict_fine_tuning)
# 训练多个轮次
for epoch in range(EPOCHS):
train_loss.reset_states()
train_accuracy.reset_states()
train_loss_fine_tuning.reset_states()
train_accuracy_fine_tuning.reset_states()
for batch, all_inputs in enumerate(train_dataset):
train_step(all_inputs)
if batch % 1000 == 0:
loss = train_loss.result()
loss_fine_tuning = train_loss_fine_tuning.result()
epoch 20, batch 0, loss:3.8556, loss_fine:0.5126, acc:0.8906
epoch 20, batch 1000, loss:3.6283, loss_fine:0.2713, acc:0.9259
epoch 20, batch 2000, loss:3.6260, loss_fine:0.2715, acc:0.9256
epoch 20, batch 3000, loss:3.6289, loss_fine:0.2736, acc:0.9248
epoch 20, batch 4000, loss:3.6265, loss_fine:0.2719, acc:0.9251
epoch 20, save model at ./checkpoint/train_cat/ckpt-10
# 预测函数实现
def predict_func(inp_sentence):
start_token = [tokenizer_title.vocab_size]
end_token = [tokenizer_title.vocab_size + 1]
inp_sentence = start_token + tokenizer_title.encode(inp_sentence) + end_token
n = MAX_LENGTH - len(inp_sentence)
inp_sentence = inp_sentence + [0 for k in range(n)]
inp_sentence = inp_sentence[:-1]
inp_sentence = tf.expand_dims(inp_sentence, 0)
combined_mask = create_mask(inp_sentence)
predictions, predict_fine_tuning, _ = gpt1(inp_sentence, False, combined_mask)
predicted_id = tf.cast(tf.argmax(predict_fine_tuning, axis=-1), tf.int32)
return predicted_id
# 根据predicted_id获取类别
def get_cat_name(sentence, plot=''):
result = predict_func(sentence)[0]
result = cat_name_all[result]
print('输入: {}'.format(sentence).replace(" ", ""))
print('预测输出: {}'.format(result))
# 获取真实的类别
def get_real_cat(label):
index = label.index(1)
return cat_name_all[index]
s = "文明的坐标|乌镇融合“古韵与现代”"
s = " ".join(jieba.cut(s))
get_cat_name(s)
print("==============================================================")
s = "2030年的未来科技预测:20项技术改变世界"
s = " ".join(jieba.cut(s))
get_cat_name(s)
print("==============================================================")
s = "糖醋汁怎样调?牢记黄金比例"54321",按照这个配方,一次成功"
s = " ".join(jieba.cut(s))
get_cat_name(s)
输入:文明的坐标|乌镇融合"古韵与现代"
预测输出:文化
==============================================================
输入:2030年的未来科技预测:20项技术改变世界
预测输出:科技
==============================================================
输入:糖醋汁怎样调?牢记黄金比例"54321",按照这个配方,一次成功
预测输出:美食

浙公网安备 33010602011771号