Binary classification from 2 features using Tensor Flow 2.0¶
Based on the same data model as in the Binary classification notebook (HTML / Jupyter).
The classification is on a single boundary defined by a continuous function and added white noise.
Learning goals:
- Learn about TensorFlow as a deep learning computation engine
- Symbolic description of a pipeline (versus value based)
- Understand the Tensors
- Powerful gradient optimizers
- The concepts of layers and metrics
This tutorial is an intermediate step between the home made binary regression, and the fully packaged regression implemented with the Keras API of TensorFlow in Binary classification from 2 features with Keras (HTML / Jupyter)
import tensorflow as tf # Requires TF v2.0+
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
import matplotlib.colors as pltcolors
from sklearn import metrics
import pandas
import seaborn as sns
Model¶
Quadratic function as a boundary between positive and negative values
Adding some unknown as a Gaussian noise
The values of X are uniformly distributed and independent
# Two features, Gaussian noise
nFeatures = 2
def generateBatch(N):
#
xMin = 0
xMax = 1
b = 0.1
std = 0.1
#
x = random.uniform(xMin, xMax, (N, nFeatures))
# 4th degree relation to shape the boundary
boundary = 2*(x[:,0]**4 + (x[:,0]-0.3)**3 + b)
# Adding some gaussian noise
labels = boundary + random.normal(0, std, N) > x[:,1]
return (x, labels)
Training data¶
N = 2000
# x has 1 dim in R, label has 1 dim in B
xTrain, labelTrain = generateBatch(N)
colors = ['blue','red']
fig = plt.figure(figsize=(12,4))
plt.subplot(1,3,1)
plt.scatter(xTrain[:,0], xTrain[:,1], c=labelTrain, cmap=pltcolors.ListedColormap(colors), marker=',', alpha=0.1)
plt.xlabel('x0')
plt.ylabel('x1')
plt.title('Generated train data')
plt.grid()
cb = plt.colorbar()
loc = np.arange(0,1,1./len(colors))
cb.set_ticks(loc)
cb.set_ticklabels([0,1])
plt.subplot(1,3,2)
plt.scatter(xTrain[:,0], labelTrain, marker=',', alpha=0.01)
plt.xlabel('x0')
plt.ylabel('label')
plt.grid()
plt.subplot(1,3,3)
plt.scatter(xTrain[:,1], labelTrain, marker=',', alpha=0.01)
plt.xlabel('x1')
plt.ylabel('label')
plt.grid()
count, bins, ignored = plt.hist(labelTrain*1.0, 10, density=True, alpha=0.5)
p = np.mean(labelTrain)
print('Bernouilli parameter of the distribution:', p)
Test data for verification of the model¶
xTest, labelTest = generateBatch(N)
testColors = ['navy', 'orangered']
Helpers¶
def plotHeatMap(X, classes, title=None, fmt='.2g', ax=None, xlabel=None, ylabel=None):
""" Fix heatmap plot from Seaborn with pyplot 3.1.0, 3.1.1
https://stackoverflow.com/questions/56942670/matplotlib-seaborn-first-and-last-row-cut-in-half-of-heatmap-plot
"""
ax = sns.heatmap(X, xticklabels=classes, yticklabels=classes, annot=True, fmt=fmt, cmap=plt.cm.Blues, ax=ax) #notation: "annot" not "annote"
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
if title:
ax.set_title(title)
if xlabel:
ax.set_xlabel(xlabel)
if ylabel:
ax.set_ylabel(ylabel)
def plotConfusionMatrix(yTrue, yEst, classes, title=None, fmt='.2g', ax=None):
plotHeatMap(metrics.confusion_matrix(yTrue, yEst), classes, title, fmt, ax, \
xlabel='Estimations', ylabel='True values');
Using TensorFlow 2.0¶
# Labels as float {0., 1.}
labelTrainF = np.multiply(labelTrain*1.0, 1.0)
labelTrainF.dtype
# (Mini) Batch size
nBatch = 128
# Number of batches per Epoch
nBatchPerEpoch = 10
# Safe guard to stop on number of epochs
nEpochMax = 2000
# Simple custom layer exposing the logistic regression model
class MyLogisticRegressionLayer(tf.keras.layers.Layer):
def __init__(self, *args, **kwargs):
super(MyLogisticRegressionLayer, self).__init__(*args, **kwargs)
def build(self, input_shape):
print('Logistic layer input_shape', input_shape)
self.w = self.add_weight(
shape=[input_shape[1], 1],
dtype=self.dtype,
initializer=tf.keras.initializers.ones(),
#regularizer=tf.keras.regularizers.l2(0.02),
trainable=True)
self.b = self.add_weight(
shape=1,
dtype=self.dtype,
initializer=tf.keras.initializers.ones(),
#regularizer=tf.keras.regularizers.l2(0.02),
trainable=True)
@tf.function
def call(self, x, training=None):
logits = tf.matmul(x, self.w) + self.b
predictions = tf.math.sigmoid(logits)
return predictions
Accuracy is defined as: $$ \mathcal{Accuracy} = \frac 1n \sum_{i=1}^n \mathbb{1}_{\hat{y}_i = y_i}$$
With :
- $\hat{y_i} \in \{0, 1\}$ the estimated label
- $y_i \in \{0, 1\}$ the actual label
- $\mathbb{1}$ the characteristic function wich is 1 when argument is true and 0 otherwise
# Using TensorFlow 2.0 style of metrics to implement accuracy
class MyBinaryAccuracy(tf.keras.metrics.Metric):
def __init__(self, name='my_accuracy', **kwargs):
super(MyBinaryAccuracy, self).__init__(name=name, **kwargs)
self.accuracySum = self.add_weight(name='accuracySum',
initializer='zeros')
self.accuracyCount = self.add_weight(name='accuracyCount',
initializer='zeros')
def update_state(self, labels, yEst):
labels = tf.cast(labels, tf.bool)
labelEst = tf.greater(yEst, 0.5)
values = tf.cast(tf.equal(labels, labelEst), self.dtype)
self.accuracySum.assign_add(tf.reduce_sum(values))
self.accuracyCount.assign_add(values.get_shape()[0])
def result(self):
return self.accuracySum / self.accuracyCount
The loss is computed as binary cross-entropy on the predictions:
As per the binary classification notebook, the formula is :
$$\begin{align} J{_\theta} & = - \sum_{i=0}^N{log \bigl(y_i * sig(w,b, x_i) + (1-y_i) * \left(1-sig(w,b, x_i)\right) \bigr)} \end{align}$$With : $$\begin{align} sig(w,b, x_i) &= \frac{1}{1+e^{-(w x_i + b)}} \\ y_i &\in \{0, 1\} \end{align}$$
# Model 1, instantiate the custom layer
model1 = tf.keras.Sequential([MyLogisticRegressionLayer(input_shape=[nFeatures], dtype=tf.float64)])
# Stochastic Gradient Descent Optimizer
optim1 = tf.keras.optimizers.SGD(0.01, momentum=0.01)
# Accuracy metric instance
accuracy = MyBinaryAccuracy()
# Perform a train step on a mini-batch
# This function's code is rewritten by TensorFlow 2.0 and shall be compiled at every execution of the optimizer
@tf.function
def trainStep1(x, labels):
with tf.GradientTape() as tape:
predictions = model1(x, training=True)
# Sigmoid cross-entropy from predictions
loss = -tf.reduce_sum(tf.math.log((labels * predictions) + ((1 - labels) * (1 - predictions))))
gradients = tape.gradient(loss, model1.trainable_variables)
optim1.apply_gradients(zip(gradients, model1.trainable_variables))
return loss, predictions
# Loop on epochs and mini batch
hist = []
for epoch in range(nEpochMax+1):
cost_cumul = 0
accuracy.reset_states()
for b in range(0, nBatchPerEpoch):
(xTrain, labelTrain) = generateBatch(nBatch)
labelTrainF = (labelTrain*1.0).reshape(-1, 1)
cost, predictions = trainStep1(xTrain, labelTrainF)
cost_cumul += cost
accuracy.update_state(labelTrainF, predictions)
cost_epoch = cost_cumul / nBatchPerEpoch
W = model1.get_weights()
hist.append((cost_epoch.numpy(), accuracy.result().numpy(), W[1][0], W[0][0][0], W[0][1][0]))
# Displaying result on current Epoch
if (epoch) % 1000 == 0:
hNow = hist[-1]
print("Epoch %4d, Cost: %.3f, Accuracy: %.3f, b: %.3f, W: %.3f, %.3f" \
% (epoch, hNow[0], hNow[1], hNow[2], hNow[3], hNow[4]))
# Save history as a Panda Data Frame
df = pandas.DataFrame(hist, columns = ('cost', 'accuracy', 'b', 'w0', 'w1'))
bEst = df['b'][len(df)-1]
wEst = np.array([df['w0'][len(df)-1], df['w1'][len(df)-1]]).reshape(-1)
print('Estimated b =', bEst, ', w =', wEst)
The accuracy is quickly reaching a good level!
But weights' convergence is very noisy showing numerical instability.
plt.figure(figsize=(15,9))
plt.subplot(2,3,1)
plt.plot(df['b'], marker=',');
plt.title('b')
plt.grid()
plt.subplot(2,3,2)
plt.plot(df['w0'], marker=',');
plt.title('w0')
plt.grid()
plt.subplot(2,3,3)
plt.plot(df['w1'], marker=',');
plt.title('w1')
plt.grid()
plt.subplot(2,3,4)
plt.plot(df['cost'])
plt.grid()
plt.title('Cost')
plt.subplot(2,3,5)
plt.plot(df['accuracy'])
plt.grid()
plt.title('Accuracy');
Testing the model¶
yEst = model1(xTest).numpy()
labelEst = (yEst > 0.5).reshape(-1)
plt.figure(figsize=(8,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], xTest[:,1], c=labelEst, cmap=pltcolors.ListedColormap(testColors), marker='x', alpha=0.2);
plt.xlabel('x0')
plt.ylabel('x1')
plt.grid()
plt.title('Estimated')
cb = plt.colorbar()
loc = np.arange(0,1,1./len(testColors))
cb.set_ticks(loc)
cb.set_ticklabels([0,1]);
plt.subplot(1,2,2)
plt.scatter(xTest[:,0], xTest[:,1], c=labelTest, cmap=pltcolors.ListedColormap(colors), marker='x', alpha=0.1);
plt.xlabel('x0')
plt.ylabel('x1')
plt.grid()
plt.title('Generator')
cb = plt.colorbar()
loc = np.arange(0,1,1./len(colors))
cb.set_ticks(loc)
cb.set_ticklabels([0,1]);
plt.hist(labelEst*1.0, 10, density=True)
print('Bernouilli parameter =', np.mean(labelEst))
plotConfusionMatrix(labelTest, labelEst, np.array(['Blue', 'Red']));
print(metrics.classification_report(labelTest, labelEst))
Conclusion¶
As we can reads in this turorial, designing a simple machine learning pipeline starting from math functions requires a lot of learning and details. Much time is spent digging into the details of the Tensors to fix dimensions.
Fortunatly, TensorFlow is also providing a higher level API, Keras. Same problem is solved in the Binary classification from 2 features with Keras (HTML / Jupyter).
Where to go from here ?¶
Other linear implementations and simple neural nets using "raw" Python or SciKit Learn (Notebook), using Keras (Notebook) up to a simple neural net to achieve better fit, or the K Nearest Neighbors classifier (Notebook)
More complex multi-class models on the Czech and Norways flags using Keras (Notebook), showing one of the main motivations to neural networks.
Compare with the two feature linear regression using simple algorithms (Notebook), or using Keras (Notebook)