Local approximation of a bivariate quadratic expression using linear regression using Keras¶
Given two features, extract y (the label), perform linear regression Using the same model as in the main bivariate linear regression notebook
Learning goals:¶
- Learn about Keras
- Port the linear regression gradient descent to Keras
- Get to know about the Kullback-Leibler Divergence metric
- Go further with a two layer neural network
import tensorflow as tf # TF 2.0 required
from tensorflow import keras # TF 2.0 required
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
from sklearn import metrics
import pandas
usingTensorBoard = False
In 2D, it would require many points to uniformly cover the x plan
It is then prefered to draw x points from a 2D uniform distribution (Monte Carlo experiment)
Ploting y as function of $x_0, x_1$ is then more challenging as $x_0$ and $x_1$ are not continuous monotonic vectors. Scatter plot is favored.
nFeatures = 2
# f(x) as a bivariate polynom
fPoly = np.array([[-0.002, 0], [-0.06, 0.1], [0.2, 0], [1, 0]])
# Generator
def generateBatch(N):
#
xMin = np.array([0, -0.5])
xMax = np.array([0.5, 0.5])
#
b = 0.35
std = 0.01
#
x = random.uniform(xMin, xMax, (N, nFeatures))
yClean = (x[:,0]-0.2)**4 + (x[:,0]-0.1)**3 + 0.1*x[:,1]**2 + b
y = yClean + random.normal(0, std, N)
return (x, y, yClean)
N = 100000
xTrain, yTrain, yTrainClean = generateBatch(N);
xTest, yTest, yTestClean = generateBatch(N);
fig = plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.scatter(xTrain[:,0], yTrainClean, marker='.');
plt.xlabel('x0')
plt.subplot(1,2,2)
plt.scatter(xTrain[:,1], yTrainClean, marker='.');
plt.xlabel('x1')
plt.title('Bivariate model without noise');
It looks like a tobogan from the side ($x_0$) and from the front ($x_1$)
Single neuron model with Keras¶
# Number of epochs
nEpoch = 8
nBatch = 128 # 32 is default
# Model
model1 = keras.models.Sequential([
keras.layers.Dense(1, activation='linear', input_shape=[nFeatures])
])
model1.compile(optimizer='adam',
loss=keras.losses.mean_squared_error,
metrics=['kullback_leibler_divergence'])
# Tensor board
callbacks = []
if usingTensorBoard:
ks = keras.callbacks.TensorBoard(log_dir="./logs/",
histogram_freq=1, write_graph=True, write_grads=True, batch_size=1)
callbacks = [ks]
Using Kullback-Leibler divergence as an extra measure of the model.
References:
- https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
- (French) https://www.science-emergence.com/Articles/Divergence-de-Kullback-Leibler-avec-python-et-matplotlib/
$D_{KL}(P \Vert Q)=\int_{-\infty}^{+\infty} p(x) log\bigl(\frac{p(x)}{q(x)}\bigr)$
# Fit
hist1 = model1.fit(xTrain, yTrain, epochs=nEpoch, batch_size=nBatch, validation_split = 0.2, verbose=0, callbacks=callbacks)
weights1, biases1 = model1.get_weights()
print('Est W =', weights1.reshape(-1), ', b =', biases1[0])
plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
plt.semilogy(hist1.history['loss'])
plt.semilogy(hist1.history['val_loss'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Loss')
plt.subplot(1,3,2)
plt.semilogy(hist1.history['kullback_leibler_divergence'])
plt.semilogy(hist1.history['val_kullback_leibler_divergence'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Kullback-Leibler divergence');
Test model¶
yEst1 = np.matmul(xTest, weights1) + biases1
fig = plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], yTestClean, marker='.')
plt.scatter(xTest[:,0], yEst1, marker='.', alpha=0.05)
plt.title('x0')
plt.legend(('ori clean', 'estimated'))
plt.subplot(1,2,2)
plt.scatter(xTest[:,1], yTestClean, marker='.')
plt.scatter(xTest[:,1], yEst1, marker='.', alpha=0.05)
plt.title('x1')
plt.legend(('ori clean', 'estimated'))
mse1 = metrics.mean_squared_error(yTest, yEst1)
print('Gradient descent MSE = {:.3e}'.format(mse1));
Using a 2 layer Neural Network for a better fit¶
References:
- About regularization, see https://www.tensorflow.org/tutorials/keras/overfit_and_underfit
# Number of epochs
nEpoch = 50
nBatch = 256 # 32 is default
# Model
model2 = keras.models.Sequential([
keras.layers.Dense(8, activation=keras.activations.softmax, input_shape=[nFeatures], # <---
bias_regularizer=keras.regularizers.l1(0.00001), # <---
kernel_regularizer=keras.regularizers.l1(0.00001)), # <----
keras.layers.Dense(1, activation='linear')
])
model2.compile(optimizer='adam',
loss=keras.losses.mean_squared_error,
metrics=['kullback_leibler_divergence'])
# Tensor board
callbacks = []
if usingTensorBoard:
ks = keras.callbacks.TensorBoard(log_dir="./logs/",
histogram_freq=1, write_graph=True, write_grads=True, batch_size=1)
callbacks = [ks]
# Fit
hist2 = model2.fit(xTrain, yTrain, epochs=nEpoch, batch_size=nBatch, validation_split = 0.2, verbose=0, callbacks=callbacks)
model2.summary()
model2.get_weights()
plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
plt.semilogy(hist2.history['loss'])
plt.semilogy(hist2.history['val_loss'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Loss')
plt.subplot(1,3,2)
plt.semilogy(hist2.history['kullback_leibler_divergence'])
plt.semilogy(hist2.history['val_kullback_leibler_divergence'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Kullback-Leibler divergence');
Test model with two layers¶
yEst2 = model2.predict(xTest).reshape(-1)
fig = plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,0], yEst2, marker='.', alpha=0.05)
plt.title('x0')
plt.legend(('ori clean', 'estimated'))
plt.subplot(1,2,2)
plt.scatter(xTest[:,1], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,1], yEst2, marker='.', alpha=0.05)
plt.title('x1')
plt.legend(('ori clean', 'estimated'))
mse2 = metrics.mean_squared_error(yTest, yEst2)
print('Neural MSE = {:.3e}'.format(mse2));
Conclusion on the two layer model¶
Adding a second layers leads to better fitting of the generation function (polynomial degree 4).
This is however mostly due to the non-linear activation function (Softmax), this is shown below with the same model but with a Relu activation. The predicted data is showing the elbows of the multiple Relu.
Two layer model using Relu¶
# Number of epochs
nEpoch = 50
nBatch = 256 # 32 is default
# Model
model3 = keras.models.Sequential([
keras.layers.Dense(8, activation='relu', input_shape=[nFeatures], # <---
bias_regularizer=keras.regularizers.l1(0.00001),
kernel_regularizer=keras.regularizers.l1(0.00001)),
keras.layers.Dense(1, activation='linear')
])
model3.compile(optimizer='adam',
loss=keras.losses.mean_squared_error,
metrics=['kullback_leibler_divergence'])
# Tensor board
callbacks = []
if usingTensorBoard:
ks = keras.callbacks.TensorBoard(log_dir="./logs/",
histogram_freq=1, write_graph=True, write_grads=True, batch_size=1)
callbacks = [ks]
# Fit
hist3 = model3.fit(xTrain, yTrain, epochs=nEpoch, batch_size=nBatch, validation_split = 0.2, verbose=0, callbacks=callbacks)
plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
plt.semilogy(hist3.history['loss'])
plt.semilogy(hist3.history['val_loss'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Loss')
plt.subplot(1,3,2)
plt.semilogy(hist3.history['kullback_leibler_divergence'])
plt.semilogy(hist3.history['val_kullback_leibler_divergence'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Kullback-Leibler divergence');
yEst3 = model3.predict(xTest).reshape(-1)
fig = plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,0], yEst3, marker='.', alpha=0.05)
plt.title('x0')
plt.legend(('ori clean', 'estimated'))
plt.subplot(1,2,2)
plt.scatter(xTest[:,1], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,1], yEst3, marker='.', alpha=0.05)
plt.title('x1')
plt.legend(('ori clean', 'estimated'))
mse3 = metrics.mean_squared_error(yTest, yEst3)
print('Neural MSE = {:.3e}'.format(mse3));
To try on your own:¶
- Add more unit (neurons) to the first layer
- Add an hidden layer
Where to go from here ?¶
Other two feature linear implementation using "raw" Python (Notebook)
Compare with the two feature binary classification using logistic regression using Keras (Notebook)