Simple Neural Network
29/10/2019

In this short tutorial, I show how to implement a simple neural network model able to classify hand-written digits. The implementation is based on the Keras framework and uses the MNIST open-source dataset of hand-written digits.
The Jupyter Notebook can be downloaded from here. You can also download the PDF slides of a lesson on the topic I taught at the University of St.Gallen in Switzerland.
Load Modules
import numpy as np
import pandas as pd
np.random.seed(2019)
%pylab inline
# For retina displays:
# %config InlineBackend.figure_format = 'retina'
This notebook requires the Keras module. You can install it with the shell command pip install keras.
from keras.models import Sequential
from keras.layers import Activation
from keras.optimizers import SGD
from keras.layers import Dense
from keras.utils import to_categorical
from keras.datasets import mnist
from keras.metrics import categorical_accuracy
from keras.callbacks import EarlyStopping
The module seaborn is not necessary. Just for nicer plots, install it with pip install seaborn.
import seaborn as sns
sns.set(style='white', font_scale=1.3, rc={
'lines.linewidth': 3,
'axes.grid': True, 'grid.linestyle': ':',
'axes.spines.left': True,
'axes.spines.bottom': True,
'axes.spines.right': True,
'axes.spines.top': True,
'axes.edgecolor': '.5',
})
Load MNIST Dataset
(x_trai, y_trai), (x_test, y_test) = mnist.load_data()
L_trai = len(x_trai)
L_test = len(x_test)
# Reshape the images into vectors of dim 28*28=784
X_trai = x_trai.reshape((L_trai, -1))
X_test = x_test.reshape((L_test, -1))
# Y needs to be converted into a vector with indicators
Y_test = to_categorical(y_test)
Y_trai = to_categorical(y_trai)
print("We have %d samples in the training set"%L_trai)
print("We have %d samples in the test set"%L_test)
We have 60000 samples in the training set
We have 10000 samples in the test set
Plot some random digits
NX, NY = 2, 6
fig, ax = plt.subplots(NX,NY,figsize=(1.5*NY,1.5*NX))
for i in range(NX):
for j in range(NY):
img = x_trai[np.random.randint(L_trai)]
ax[i][j].imshow(img, cmap='Greys');
ax[i][j].set_xticklabels([]); ax[i][j].set_xticks([])
ax[i][j].set_yticklabels([]); ax[i][j].set_yticks([])
plt.tight_layout()
#plt.savefig("digits_examples.pdf")

Shallow Neural Network
Only one hidden layer
# Define the shallow neural network
stop_rule = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
model = Sequential()
model.add(Dense(300, input_dim=784, activation="relu"))
model.add(Dense(10)) # 10 is the output dimension
model.add(Activation("softmax")) # Provides a probabilities for each of 0,..,9
model.compile(loss="categorical_crossentropy", optimizer=SGD(), metrics=[categorical_accuracy] )
history = model.fit(
X_trai, Y_trai, validation_split=0.2,
epochs=30, batch_size=512, verbose=1,
callbacks=[stop_rule],
)
df = pd.DataFrame(history.history).rename({
'loss':'Training Loss', 'val_loss':'Validation Loss'
},axis=1)
fig, ax1 = plt.subplots(1,1, figsize=(10,5))
df.iloc[1:][['Training Loss', 'Validation Loss']].plot(style='-o', ax=ax1, logy=True);

Out-of-sample predictions
preds = model.predict(X_test)
# The predicted class is the one with the highest prob
preds = np.argmax(preds,axis=1)
reals = np.argmax(Y_test,axis=1)
accuracy_in = df.iloc[-1]['categorical_accuracy']
accuracy_out = np.mean(preds==reals)
print("In-sample accuracy: {0:0.2f}% ".format(accuracy_in*100))
print("Out-of-sample accuracy: {0:0.2f}%".format(accuracy_out*100))
In-sample accuracy: 88.45%
Out-of-sample accuracy: 86.96%
NX, NY = 2, 6
fig, ax = plt.subplots(NX,NY,figsize=(1.5*NY,2*NX))
for i in range(NX):
for j in range(NY):
s = np.random.randint(len(preds))
img = x_test[s]
colors = 'Greens' if reals[s]==preds[s] else 'Reds'
ax[i][j].imshow(img, cmap=colors)
ax[i][j].set_title("Number: %d \nPredicted: %d"%(reals[s], preds[s]))
ax[i][j].set_xticklabels([]); ax[i][j].set_xticks([])
ax[i][j].set_yticklabels([]); ax[i][j].set_yticks([])
plt.tight_layout()

Go Deeper
4 hidden layers
stop_rule = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
# Define the neural network
deep_model = Sequential()
deep_model.add(Dense(300, input_dim=784, activation="relu"))
deep_model.add(Dense(150, activation="relu"))
deep_model.add(Dense(100, activation="relu"))
deep_model.add(Dense(50 , activation="relu"))
deep_model.add(Dense(10)) # 10 is the output dimension
deep_model.add(Activation("softmax")) # Provides a probabilities for each of 0,..,9
deep_model.compile(loss="categorical_crossentropy", optimizer=SGD(), metrics=[categorical_accuracy] )
history = deep_model.fit(
X_trai, Y_trai, validation_split=0.2,
epochs=30, batch_size=512, verbose=1,
callbacks=[stop_rule],
)
df = pd.DataFrame(history.history).rename({
'loss':'Training Loss', 'val_loss':'Validation Loss'
},axis=1)
df.index = df.index + 1
fig, ax1 = plt.subplots(1,1, figsize=(10,5))
df[['Training Loss', 'Validation Loss']].plot(style='-o', ax=ax1, logy=True);

Out-of-sample predictions
preds = deep_model.predict(X_test)
# The predicted class is the one with the highest prob
preds = np.argmax(preds,axis=1)
reals = np.argmax(Y_test,axis=1)
accuracy_in = df.iloc[-1]['categorical_accuracy']
accuracy_out = np.mean(preds==reals)
print("In-sample accuracy: {0:0.2f}% ".format(accuracy_in*100))
print("Out-of-sample accuracy: {0:0.2f}%".format(accuracy_out*100))
In-sample accuracy: 99.58%
Out-of-sample accuracy: 94.51%
NX, NY = 2, 6
fig, ax = plt.subplots(NX,NY,figsize=(1.5*NY,2*NX))
for i in range(NX):
for j in range(NY):
s = np.random.randint(len(preds))
img = x_test[s]
colors = 'Greens' if reals[s]==preds[s] else 'Reds'
ax[i][j].imshow(img, cmap=colors)
ax[i][j].set_title("Number: %d \nPredicted: %d"%(reals[s], preds[s]))
ax[i][j].set_xticklabels([]); ax[i][j].set_xticks([])
ax[i][j].set_yticklabels([]); ax[i][j].set_yticks([])
plt.tight_layout()

Author: Andrea Barbon
Back

