打印模型结构
tf.keras.utils.plot_model(bert_encoder, show_shapes=True, dpi=48)
tfrecord 读写
处理定长序列
def _int64_feature(value):
# value must be a numpy array.
return tf.train.Feature(int64_list=tf.train.Int64List(value=value.flatten()))
# Write an array to TFrecord.
a = np.random.randint(1000,size=(100,3))
writer = tf.io.TFRecordWriter('file')
for i in range(a.shape[0]): # i = 0 ~ 4
x_train = np.array(a[i])
feature = {'i' : _int64_feature(np.array([i])),
'data': _int64_feature(x_train)}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
keys_to_features = {'i':tf.io.FixedLenFeature([],tf.int64),
'data':tf.io.FixedLenFeature([3],tf.int64)}
parsed_features = tf.io.parse_single_example(example_proto, keys_to_features)
return parsed_features
for data in dataset.take(2):
print(_parse_function(data))
处理不定长序列
def _int64_feature(value):
# value must be a numpy array.
return tf.train.Feature(int64_list=tf.train.Int64List(value=value.flatten()))
# Write an array to TFrecord.
# a is an array which contains lists of variant length.
a = np.array([[0, 54, 91, 153, 177],
[0, 50, 89, 147, 196],
[0, 38, 79, 157],
[0, 49, 89, 147, 177],
[0, 32, 73, 145]])
writer = tf.io.TFRecordWriter('file')
for i in range(a.shape[0]): # i = 0 ~ 4
x_train = np.array(a[i])
feature = {'i' : _int64_feature(np.array([i])),
'data': _int64_feature(x_train)}
# Create an example protocol buffer
example = tf.train.Example(features=tf.train.Features(feature=feature))
# Serialize to string and write on the file
writer.write(example.SerializeToString())
writer.close()
def _parse_function(example_proto):
keys_to_features = {'i':tf.io.VarLenFeature(tf.int64),
'data':tf.io.VarLenFeature(tf.int64)}
parsed_features = tf.io.parse_single_example(example_proto, keys_to_features)
return (tf.sparse.to_dense(parsed_features['i']),
tf.sparse.to_dense(parsed_features['data']))
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(_parse_function)
for data in dataset.take(5):
print(data)
loss 计算
NCE
cosine_loss = tf.keras.losses.CosineSimilarity(axis=2,reduction=tf.keras.losses.Reduction.NONE)
loss_func = tf.keras.losses.CategoricalCrossentropy(from_logits=True,reduction=tf.keras.losses.Reduction.NONE)
def compute_loss(y_pred,tao=0.05):
batch_size = tf.shape(y_pred)[0]/2
label = tf.concat([tf.concat([tf.zeros((batch_size,batch_size)),tf.eye(batch_size)],axis=0),
tf.eye(batch_size*2,batch_size)],axis=1)
#print(label)
similarities = -cosine_loss(tf.expand_dims(y_pred,axis=1),tf.expand_dims(y_pred,axis=0))
print(similarities)
similarities = similarities- tf.eye(batch_size*2,dtype=tf.float64)*100000
#print(similarities)
similarities = similarities / tao
loss = loss_func(label, similarities)
#print(loss )
return tf.reduce_mean(loss)