Plotting the Coaching and Validation Loss Curves for the Transformer Mannequin


from tensorflow.keras.optimizers import Adam

from tensorflow.keras.optimizers.schedules import LearningRateSchedule

from tensorflow.keras.metrics import Imply

from tensorflow import information, practice, math, reduce_sum, forged, equal, argmax, float32, GradientTape, operate

from keras.losses import sparse_categorical_crossentropy

from mannequin import TransformerModel

from prepare_dataset import PrepareDataset

from time import time

from pickle import dump



# Outline the mannequin parameters

h = 8  # Variety of self-attention heads

d_k = 64  # Dimensionality of the linearly projected queries and keys

d_v = 64  # Dimensionality of the linearly projected values

d_model = 512  # Dimensionality of mannequin layers’ outputs

d_ff = 2048  # Dimensionality of the inside totally linked layer

n = 6  # Variety of layers within the encoder stack


# Outline the coaching parameters

epochs = 20

batch_size = 64

beta_1 = 0.9

beta_2 = 0.98

epsilon = 1e9

dropout_rate = 0.1



# Implementing a studying fee scheduler

class LRScheduler(LearningRateSchedule):

    def __init__(self, d_model, warmup_steps=4000, **kwargs):

        tremendous(LRScheduler, self).__init__(**kwargs)


        self.d_model = forged(d_model, float32)

        self.warmup_steps = warmup_steps


    def __call__(self, step_num):


        # Linearly rising the educational fee for the primary warmup_steps, and reducing it thereafter

        arg1 = step_num ** 0.5

        arg2 = step_num * (self.warmup_steps ** 1.5)


        return (self.d_model ** 0.5) * math.minimal(arg1, arg2)



# Instantiate an Adam optimizer

optimizer = Adam(LRScheduler(d_model), beta_1, beta_2, epsilon)


# Put together the coaching dataset

dataset = PrepareDataset()

trainX, trainY, valX, valY, train_orig, val_orig, enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size = dataset(‘english-german.pkl’)


print(enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size)


# Put together the coaching dataset batches

train_dataset = information.Dataset.from_tensor_slices((trainX, trainY))

train_dataset = train_dataset.batch(batch_size)


# Put together the validation dataset batches

val_dataset = information.Dataset.from_tensor_slices((valX, valY))

val_dataset = val_dataset.batch(batch_size)


# Create mannequin

training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)



# Defining the loss operate

def loss_fcn(goal, prediction):

    # Create masks in order that the zero padding values are usually not included within the computation of loss

    padding_mask = math.logical_not(equal(goal, 0))

    padding_mask = forged(padding_mask, float32)


    # Compute a sparse categorical cross-entropy loss on the unmasked values

    loss = sparse_categorical_crossentropy(goal, prediction, from_logits=True) * padding_masks


    # Compute the imply loss over the unmasked values

    return reduce_sum(loss) / reduce_sum(padding_mask)



# Defining the accuracy operate

def accuracy_fcn(goal, prediction):

    # Create masks in order that the zero padding values are usually not included within the computation of accuracy

    padding_mask = math.logical_not(equal(goal, 0))


    # Discover equal prediction and goal values, and apply the padding masks

    accuracy = equal(goal, argmax(prediction, axis=2))

    accuracy = math.logical_and(padding_mask, accuracy)


    # Solid the True/False values to 32-bit-precision floating-point numbers

    padding_mask = forged(padding_mask, float32)

    accuracy = forged(accuracy, float32)


    # Compute the imply accuracy over the unmasked values

    return reduce_sum(accuracy) / reduce_sum(padding_mask)



# Embrace metrics monitoring

train_loss = Imply(title=‘train_loss’)

train_accuracy = Imply(title=‘train_accuracy’)

val_loss = Imply(title=‘val_loss’)


# Create a checkpoint object and supervisor to handle a number of checkpoints

ckpt = practice.Checkpoint(mannequin=training_model, optimizer=optimizer)

ckpt_manager = practice.CheckpointManager(ckpt, “./checkpoints”, max_to_keep=None)


# Initialise dictionaries to retailer the coaching and validation losses

train_loss_dict = {}

val_loss_dict = {}


# Rushing up the coaching course of


def train_step(encoder_input, decoder_input, decoder_output):

    with GradientTape() as tape:


        # Run the ahead go of the mannequin to generate a prediction

        prediction = training_model(encoder_input, decoder_input, coaching=True)


        # Compute the coaching loss

        loss = loss_fcn(decoder_output, prediction)


        # Compute the coaching accuracy

        accuracy = accuracy_fcn(decoder_output, prediction)


    # Retrieve gradients of the trainable variables with respect to the coaching loss

    gradients = tape.gradient(loss, training_model.trainable_weights)


    # Replace the values of the trainable variables by gradient descent

    optimizer.apply_gradients(zip(gradients, training_model.trainable_weights))






for epoch in vary(epochs):






    print(“nStart of epoch %d” % (epoch + 1))


    start_time = time()


    # Iterate over the dataset batches

    for step, (train_batchX, train_batchY) in enumerate(train_dataset):


        # Outline the encoder and decoder inputs, and the decoder output

        encoder_input = train_batchX[:, 1:]

        decoder_input = train_batchY[:, :1]

        decoder_output = train_batchY[:, 1:]


        train_step(encoder_input, decoder_input, decoder_output)


        if step % 50 == 0:

            print(f‘Epoch {epoch + 1} Step {step} Loss {train_loss.end result():.4f} Accuracy {train_accuracy.end result():.4f}’)


    # Run a validation step after each epoch of coaching

    for val_batchX, val_batchY in val_dataset:


        # Outline the encoder and decoder inputs, and the decoder output

        encoder_input = val_batchX[:, 1:]

        decoder_input = val_batchY[:, :1]

        decoder_output = val_batchY[:, 1:]


        # Generate a prediction

        prediction = training_model(encoder_input, decoder_input, coaching=False)


        # Compute the validation loss

        loss = loss_fcn(decoder_output, prediction)



    # Print epoch quantity and accuracy and loss values on the finish of each epoch

    print(“Epoch %d: Coaching Loss %.4f, Coaching Accuracy %.4f, Validation Loss %.4f” % (epoch + 1, train_loss.end result(), train_accuracy.end result(), val_loss.end result()))


    # Save a checkpoint after each epoch

    if (epoch + 1) % 1 == 0:


        save_path =

        print(“Saved checkpoint at epoch %d” % (epoch + 1))


        # Save the skilled mannequin weights

        training_model.save_weights(“weights/wghts” + str(epoch + 1) + “.ckpt”)


        train_loss_dict[epoch] = train_loss.end result()

        val_loss_dict[epoch] = val_loss.end result()


# Save the coaching loss values

with open(‘./train_loss.pkl’, ‘wb’) as file:

    dump(train_loss_dict, file)


# Save the validation loss values

with open(‘./val_loss.pkl’, ‘wb’) as file:

    dump(val_loss_dict, file)


print(“Complete time taken: %.2fs” % (time() start_time))


Leave a Reply

Your email address will not be published. Required fields are marked *