#!pip install tensorview
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, losses, metrics, models
import seaborn as sns
import pandas as pd
import sklearn.metrics as skMetrics
import tensorview as tv
import scipy.spatial as sp, scipy.cluster.hierarchy as hc
Data - CIFAR-10¶
CIFAR-10 is a small color image set with 10 classes mixing animals and transportation vehicules. Images are 32x32 pixels and 3 channels (RGB). There are total 60000 image, 50000 images allocated to train and 10000 to test. Labels are provided.
(xTrain, yTrain), (xTest, yTest) = datasets.cifar10.load_data()
# Normalize pixel values to be between 0 and 1
xTrain = xTrain / 255.
xTest = xTest / 255.0
classNames = np.array(['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck'])
xTrain.shape, xTest.shape
fig, axes = plt.subplots(5, 12, figsize=(16,8))
for i, ax in enumerate(axes.ravel()):
ax.imshow(xTrain[i])
ax.set_xlabel(classNames[yTrain[i][0]])
plt.setp(axes, xticks=[], yticks=[], frame_on=False);
Helpers¶
def plotClusteredHeatMap(X, classes, title=None, fmt='.2g', xlabel=None, ylabel=None,
vmin=None, vmax=None, cbar=True):
""" Use clustermap to plot heatmap with clustered rows and columns
"""
g = sns.clustermap(X, annot=True,
metric='euclidean', method='single', # z_score=1,
fmt=fmt, vmin=vmin, vmax=vmax, cbar=cbar, cmap=plt.cm.bwr,
xticklabels=classes, yticklabels=classes)
ax = g.ax_heatmap
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
if title:
ax.set_title(title)
if xlabel:
ax.set_xlabel(xlabel)
if ylabel:
ax.set_ylabel(ylabel)
def plotConfusionMatrix(yTrue, yEst, classes, title=None, fmt='.2g', ax=None):
""" Compute and plot the confusion matrix """
cmatrix = skMetrics.confusion_matrix(yTrue, yEst)
plotClusteredHeatMap(cmatrix, classes, title, fmt, \
xlabel='Estimations', ylabel='True values');
return cmatrix
The model¶
We will use the model of the Tensorflow tutorial [1] adding some regularization through a dropout layer. See the MNIST model notebook for more details on regularization and hyper-parameter tuning (HTML / Jupyter.
The model below is very similar to the LeNet5 model described in this later notebook. There are however 3 convolution layers instead of 2 in LeNet5, these layers are wider (6 and 16 units in LeNet5), and 2 dense (fully connected) compared to 3.
metricNames = ['Categorical cross-entropy', 'Accuracy']
metricNamesRaw = ['loss', 'accuracy']
model0 = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3), name="conv_0"),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu', name="conv_1"),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu', name="conv_2"),
layers.Flatten(),
layers.Dropout(0.5),
layers.Dense(64, activation='relu', name="dense_0"),
layers.Dense(10, activation='softmax', name="dense_1")
])
model0.compile(optimizer='adam',
loss=losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
model0.summary()
Training and validation are performed on the trainin set with a split ratio of 90-10. The test set is reserved for later evaluation of the optimized model.
nEpochs = 50
batchSize = 128
tvPlot = tv.train.PlotMetricsOnEpoch(metrics_name=metricNames,
cell_size=(6,4), columns=2, iter_num=nEpochs, wait_num=2)
history0 = model0.fit(xTrain, yTrain,
epochs=nEpochs, batch_size=batchSize,
validation_split=0.1,
verbose=0,
callbacks=[tvPlot]);
The validation loss and accuracy are better than the ones of the training, this is expected given the dropout layer.
Compared to the network described in the Tensorflow tutorial, the validation accuracy is not stalling. It probably means that the network is overfitting, regularization is good.
Evaluation of the model¶
loss, acc = model0.evaluate(xTest, yTest, verbose=0)
print("Model test accuracy = %.2f %%" % (acc*100))
weights0 = model0.get_weights()
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
for ax, layerIndex, weightIndex in zip(axes, [0, 1], [6, 8]):
ax.hist(weights0[weightIndex].ravel(), density=False, bins=40)
ax.set_title("Dense layer #%d coefficients" % layerIndex)
The distributions on the weights of the Dense layers is well balanced and there are not so many weights close to 0.
yEstSoftmax = model0.predict(xTest)
yEst = np.argmax(yEstSoftmax, axis=1)
yEstFail = yEst != yTest.ravel()
firstFail = yEstFail.tolist().index(True)
print("First failing estimation at #%d" % firstFail)
fig, ax = plt.subplots(1, 1, figsize=(4,4))
ax.imshow(xTest[firstFail])
ax.set_xlabel("%s/%s" % (classNames[yEst[firstFail]], classNames[yTest[firstFail][0]]));
The confusion matrix plot below is showing a hot diagonal but due to the clustering some columns and rows are swapped.
confusion = plotConfusionMatrix(yTest, yEst, classNames)
Following graph is showing the histograms of softmax probabilities on the test set for each subsets of actual true classes.
Ideally, the probability corresponding to the true class should be located around 1, peaky. This is not the case for most of the animals.
Also,the other probability histograms of other classes should be located as close as possible to 0. Which is not the case, for example, for dogs when the true class is cat.
dfEst = pd.DataFrame(yEstSoftmax, columns=classNames)
dfEst['true'] = classNames[yTest]
pal = sns.hls_palette(len(classNames))
fg = sns.FacetGrid(dfEst, row="true", aspect=6, height=2)
for column, color in zip(classNames, pal):
fg.map(sns.kdeplot, column, shade=True, clip_on=True, alpha=0.4, lw=2., bw=.03, color=color)
#plt.legend();
Save model¶
model0.save('models/CIFAR-10_CNN5.h5')
References¶
1.Tensorflow tutorial for CNN - https://www.tensorflow.org/tutorials/images/cnn