I am performing a sports prediction multi-class classification problem, and wanted to compare the differences in model performance between normalised and non-normalised data. You can see the 2 different datasets are X_train and X_train_normalised.
I wanted to know if my implementation below to compare them would be correct or could be improved in any way.
# one hot-encoding y_train and y_test
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
# create a model with 2 hidden layers using the traing and test data
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(100, input_dim=127, activation='relu')) # TODO was 127
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(64, activation='relu')) # TODO was 50
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(3, activation='softmax'))
model.summary()
# Model for normalized data
model_normalized = tf.keras.models.Sequential()
model_normalized.add(tf.keras.layers.Dense(100, input_dim=127, activation='relu'))
model_normalized.add(tf.keras.layers.Dropout(0.3))
model_normalized.add(tf.keras.layers.Dense(64, activation='relu'))
model_normalized.add(tf.keras.layers.Dropout(0.3))
model_normalized.add(tf.keras.layers.Dense(3, activation='softmax'))
model_normalized.summary()
# Compile the model
model.compile(loss = 'categorical_crossentropy', metrics=['accuracy', 'Precision', 'Recall'], optimizer='adam')
model_normalized.compile(loss = 'categorical_crossentropy', metrics=['accuracy', 'Precision', 'Recall'], optimizer='adam')
# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.1, callbacks=[early_stopping])
history_normalized = model_normalized.fit(X_train_normalized, y_train, epochs=100, batch_size=64, validation_split=0.1, callbacks=[early_stopping])
# Plot accuracy history
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# Plot loss history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# Plot accuracy history for normalized data
plt.plot(history_normalized.history['accuracy'])
plt.plot(history_normalized.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# Plot loss history for normalized data
plt.plot(history_normalized.history['loss'])
plt.plot(history_normalized.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
After evaluating both models using:
# Evaluate the model on the test data
score = model.evaluate(X_test, y_test, verbose=2)
score_normalized = model_normalized.evaluate(X_test_normalized, y_test, verbose=2)
The non normalised model gets 100% test accuracy, and the normalized model gets 82%.
I have as you can see in the above implementation implemented generalization methods such as dropout and made sure no duplicate target features are in my dataset. However is this normal to get 100% accuracy in test set also?