Plotting the Coaching and Validation Loss Curves for the Transformer Mannequin
[ad_1]
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
from tensorflow.keras.metrics import Imply
from tensorflow import information, practice, math, reduce_sum, forged, equal, argmax, float32, GradientTape, operate
from keras.losses import sparse_categorical_crossentropy
from mannequin import TransformerModel
from prepare_dataset import PrepareDataset
from time import time
from pickle import dump
# Outline the mannequin parameters
h = 8 # Variety of self-attention heads
d_k = 64 # Dimensionality of the linearly projected queries and keys
d_v = 64 # Dimensionality of the linearly projected values
d_model = 512 # Dimensionality of mannequin layers’ outputs
d_ff = 2048 # Dimensionality of the inside totally linked layer
n = 6 # Variety of layers within the encoder stack
# Outline the coaching parameters
epochs = 20
batch_size = 64
beta_1 = 0.9
beta_2 = 0.98
epsilon = 1e–9
dropout_rate = 0.1
# Implementing a studying fee scheduler
class LRScheduler(LearningRateSchedule):
def __init__(self, d_model, warmup_steps=4000, **kwargs):
tremendous(LRScheduler, self).__init__(**kwargs)
self.d_model = forged(d_model, float32)
self.warmup_steps = warmup_steps
def __call__(self, step_num):
# Linearly rising the educational fee for the primary warmup_steps, and reducing it thereafter
arg1 = step_num ** –0.5
arg2 = step_num * (self.warmup_steps ** –1.5)
return (self.d_model ** –0.5) * math.minimal(arg1, arg2)
# Instantiate an Adam optimizer
optimizer = Adam(LRScheduler(d_model), beta_1, beta_2, epsilon)
# Put together the coaching dataset
dataset = PrepareDataset()
trainX, trainY, valX, valY, train_orig, val_orig, enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size = dataset(‘english-german.pkl’)
print(enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size)
# Put together the coaching dataset batches
train_dataset = information.Dataset.from_tensor_slices((trainX, trainY))
train_dataset = train_dataset.batch(batch_size)
# Put together the validation dataset batches
val_dataset = information.Dataset.from_tensor_slices((valX, valY))
val_dataset = val_dataset.batch(batch_size)
# Create mannequin
training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)
# Defining the loss operate
def loss_fcn(goal, prediction):
# Create masks in order that the zero padding values are usually not included within the computation of loss
padding_mask = math.logical_not(equal(goal, 0))
padding_mask = forged(padding_mask, float32)
# Compute a sparse categorical cross-entropy loss on the unmasked values
loss = sparse_categorical_crossentropy(goal, prediction, from_logits=True) * padding_masks
# Compute the imply loss over the unmasked values
return reduce_sum(loss) / reduce_sum(padding_mask)
# Defining the accuracy operate
def accuracy_fcn(goal, prediction):
# Create masks in order that the zero padding values are usually not included within the computation of accuracy
padding_mask = math.logical_not(equal(goal, 0))
# Discover equal prediction and goal values, and apply the padding masks
accuracy = equal(goal, argmax(prediction, axis=2))
accuracy = math.logical_and(padding_mask, accuracy)
# Solid the True/False values to 32-bit-precision floating-point numbers
padding_mask = forged(padding_mask, float32)
accuracy = forged(accuracy, float32)
# Compute the imply accuracy over the unmasked values
return reduce_sum(accuracy) / reduce_sum(padding_mask)
# Embrace metrics monitoring
train_loss = Imply(title=‘train_loss’)
train_accuracy = Imply(title=‘train_accuracy’)
val_loss = Imply(title=‘val_loss’)
# Create a checkpoint object and supervisor to handle a number of checkpoints
ckpt = practice.Checkpoint(mannequin=training_model, optimizer=optimizer)
ckpt_manager = practice.CheckpointManager(ckpt, “./checkpoints”, max_to_keep=None)
# Initialise dictionaries to retailer the coaching and validation losses
train_loss_dict = {}
val_loss_dict = {}
# Rushing up the coaching course of
@operate
def train_step(encoder_input, decoder_input, decoder_output):
with GradientTape() as tape:
# Run the ahead go of the mannequin to generate a prediction
prediction = training_model(encoder_input, decoder_input, coaching=True)
# Compute the coaching loss
loss = loss_fcn(decoder_output, prediction)
# Compute the coaching accuracy
accuracy = accuracy_fcn(decoder_output, prediction)
# Retrieve gradients of the trainable variables with respect to the coaching loss
gradients = tape.gradient(loss, training_model.trainable_weights)
# Replace the values of the trainable variables by gradient descent
optimizer.apply_gradients(zip(gradients, training_model.trainable_weights))
train_loss(loss)
train_accuracy(accuracy)
for epoch in vary(epochs):
train_loss.reset_states()
train_accuracy.reset_states()
val_loss.reset_states()
print(“nStart of epoch %d” % (epoch + 1))
start_time = time()
# Iterate over the dataset batches
for step, (train_batchX, train_batchY) in enumerate(train_dataset):
# Outline the encoder and decoder inputs, and the decoder output
encoder_input = train_batchX[:, 1:]
decoder_input = train_batchY[:, :–1]
decoder_output = train_batchY[:, 1:]
train_step(encoder_input, decoder_input, decoder_output)
if step % 50 == 0:
print(f‘Epoch {epoch + 1} Step {step} Loss {train_loss.end result():.4f} Accuracy {train_accuracy.end result():.4f}’)
# Run a validation step after each epoch of coaching
for val_batchX, val_batchY in val_dataset:
# Outline the encoder and decoder inputs, and the decoder output
encoder_input = val_batchX[:, 1:]
decoder_input = val_batchY[:, :–1]
decoder_output = val_batchY[:, 1:]
# Generate a prediction
prediction = training_model(encoder_input, decoder_input, coaching=False)
# Compute the validation loss
loss = loss_fcn(decoder_output, prediction)
val_loss(loss)
# Print epoch quantity and accuracy and loss values on the finish of each epoch
print(“Epoch %d: Coaching Loss %.4f, Coaching Accuracy %.4f, Validation Loss %.4f” % (epoch + 1, train_loss.end result(), train_accuracy.end result(), val_loss.end result()))
# Save a checkpoint after each epoch
if (epoch + 1) % 1 == 0:
save_path = ckpt_manager.save()
print(“Saved checkpoint at epoch %d” % (epoch + 1))
# Save the skilled mannequin weights
training_model.save_weights(“weights/wghts” + str(epoch + 1) + “.ckpt”)
train_loss_dict[epoch] = train_loss.end result()
val_loss_dict[epoch] = val_loss.end result()
# Save the coaching loss values
with open(‘./train_loss.pkl’, ‘wb’) as file:
dump(train_loss_dict, file)
# Save the validation loss values
with open(‘./val_loss.pkl’, ‘wb’) as file:
dump(val_loss_dict, file)
print(“Complete time taken: %.2fs” % (time() – start_time))
[ad_2]