Local approximation of a bivariate quadratic expression using linear regression using Keras¶

Given two features, extract y (the label), perform linear regression Using the same model as in the main bivariate linear regression notebook

Learning goals:¶

Learn about Keras
Port the linear regression gradient descent to Keras
Get to know about the Kullback-Leibler Divergence metric
Go further with a two layer neural network

import tensorflow as tf # TF 2.0 required
from tensorflow import keras # TF 2.0 required
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
from sklearn import metrics
import pandas

usingTensorBoard = False

In 2D, it would require many points to uniformly cover the x plan

It is then prefered to draw x points from a 2D uniform distribution (Monte Carlo experiment)

Ploting y as function of $x_0, x_1$ is then more challenging as $x_0$ and $x_1$ are not continuous monotonic vectors. Scatter plot is favored.

$$\begin{align} f(x) &= (x_0−0.2)^4 + (x_0−0.1)^3 + 0.1 x_1^2 + 0.35 \\ &= x_0^4 + 0.2 x_0^3 − 0.06 x_0^2 − 0.002 x_0 + 0.1 x_1^2 + 0.3506 \end{align}$$

nFeatures = 2
# f(x) as a bivariate polynom
fPoly = np.array([[-0.002, 0], [-0.06, 0.1], [0.2, 0], [1, 0]])
# Generator
def generateBatch(N):
    #
    xMin = np.array([0, -0.5])
    xMax = np.array([0.5, 0.5])
    #
    b = 0.35
    std = 0.01
    #
    x = random.uniform(xMin, xMax, (N, nFeatures))
    yClean = (x[:,0]-0.2)**4 + (x[:,0]-0.1)**3 + 0.1*x[:,1]**2 + b
    y =  yClean + random.normal(0, std, N) 
    return (x, y, yClean)

N = 100000
xTrain, yTrain, yTrainClean = generateBatch(N);

xTest, yTest, yTestClean = generateBatch(N);

fig = plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.scatter(xTrain[:,0], yTrainClean, marker='.');
plt.xlabel('x0')
plt.subplot(1,2,2)
plt.scatter(xTrain[:,1], yTrainClean, marker='.');
plt.xlabel('x1')
plt.title('Bivariate model without noise');

It looks like a tobogan from the side ($x_0$) and from the front ($x_1$)

Single neuron model with Keras¶

# Number of epochs
nEpoch = 8
nBatch = 128 # 32 is default

# Model
model1 = keras.models.Sequential([
  keras.layers.Dense(1, activation='linear', input_shape=[nFeatures])
])
model1.compile(optimizer='adam',
              loss=keras.losses.mean_squared_error, 
              metrics=['kullback_leibler_divergence'])

# Tensor board
callbacks = []
if usingTensorBoard:
    ks = keras.callbacks.TensorBoard(log_dir="./logs/", 
                                     histogram_freq=1, write_graph=True, write_grads=True, batch_size=1)
    callbacks = [ks]

Using Kullback-Leibler divergence as an extra measure of the model.

References:

$D_{KL}(P \Vert Q)=\int_{-\infty}^{+\infty} p(x) log\bigl(\frac{p(x)}{q(x)}\bigr)$

# Fit
hist1 = model1.fit(xTrain, yTrain, epochs=nEpoch, batch_size=nBatch, validation_split = 0.2, verbose=0, callbacks=callbacks)

weights1, biases1 = model1.get_weights()
print('Est W =', weights1.reshape(-1), ', b =', biases1[0])

Est W = [0.11365225 0.00045345] , b = 0.34393284

plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
plt.semilogy(hist1.history['loss'])
plt.semilogy(hist1.history['val_loss'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Loss')
plt.subplot(1,3,2)
plt.semilogy(hist1.history['kullback_leibler_divergence'])
plt.semilogy(hist1.history['val_kullback_leibler_divergence'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Kullback-Leibler divergence');

Test model¶

yEst1 = np.matmul(xTest, weights1) + biases1

fig = plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], yTestClean, marker='.')
plt.scatter(xTest[:,0], yEst1, marker='.', alpha=0.05)
plt.title('x0')
plt.legend(('ori clean', 'estimated'))
plt.subplot(1,2,2)
plt.scatter(xTest[:,1], yTestClean, marker='.')
plt.scatter(xTest[:,1], yEst1, marker='.', alpha=0.05)
plt.title('x1')
plt.legend(('ori clean', 'estimated'))
mse1 = metrics.mean_squared_error(yTest, yEst1)
print('Gradient descent MSE = {:.3e}'.format(mse1));

Gradient descent MSE = 2.567e-04

Using a 2 layer Neural Network for a better fit¶

References:

About regularization, see https://www.tensorflow.org/tutorials/keras/overfit_and_underfit

# Number of epochs
nEpoch = 50
nBatch = 256 # 32 is default

# Model
model2 = keras.models.Sequential([
    keras.layers.Dense(8, activation=keras.activations.softmax, input_shape=[nFeatures],      # <---
                        bias_regularizer=keras.regularizers.l1(0.00001),      # <---
                        kernel_regularizer=keras.regularizers.l1(0.00001)), # <----
    keras.layers.Dense(1, activation='linear')
])
model2.compile(optimizer='adam',
              loss=keras.losses.mean_squared_error, 
              metrics=['kullback_leibler_divergence'])

# Tensor board
callbacks = []
if usingTensorBoard:
    ks = keras.callbacks.TensorBoard(log_dir="./logs/", 
                                     histogram_freq=1, write_graph=True, write_grads=True, batch_size=1)
    callbacks = [ks]
    
# Fit
hist2 = model2.fit(xTrain, yTrain, epochs=nEpoch, batch_size=nBatch, validation_split = 0.2, verbose=0, callbacks=callbacks)

model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 8)                 24        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
=================================================================
Total params: 33
Trainable params: 33
Non-trainable params: 0
_________________________________________________________________

model2.get_weights()

[array([[-5.9156574e-04, -9.4933707e-01, -7.1687612e-04, -1.0327047e+00,
         -7.3904550e-04, -6.4879522e-04,  1.8037283e+00, -8.6817262e-04],
        [ 1.1798851e-04,  1.1310233e+00,  8.9924975e-04, -9.8094767e-01,
          3.1008647e-04,  6.1978877e-05, -1.2418784e-03,  5.0762389e-04]],
       dtype=float32),
 array([-0.00043835,  0.00064997, -0.0009306 ,  0.00560384, -0.00041394,
        -0.00052826, -0.00544376, -0.00071103], dtype=float32),
 array([[-0.7158145 ],
        [ 0.9077484 ],
        [ 0.03333439],
        [ 1.0335735 ],
        [-0.542008  ],
        [-0.5094827 ],
        [ 0.9561239 ],
        [-0.12001117]], dtype=float32),
 array([0.21647635], dtype=float32)]

plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
plt.semilogy(hist2.history['loss'])
plt.semilogy(hist2.history['val_loss'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Loss')
plt.subplot(1,3,2)
plt.semilogy(hist2.history['kullback_leibler_divergence'])
plt.semilogy(hist2.history['val_kullback_leibler_divergence'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Kullback-Leibler divergence');

Test model with two layers¶

yEst2 = model2.predict(xTest).reshape(-1)

fig = plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,0], yEst2, marker='.', alpha=0.05)
plt.title('x0')
plt.legend(('ori clean', 'estimated'))
plt.subplot(1,2,2)
plt.scatter(xTest[:,1], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,1], yEst2, marker='.', alpha=0.05)
plt.title('x1')
plt.legend(('ori clean', 'estimated'))
mse2 = metrics.mean_squared_error(yTest, yEst2)
print('Neural MSE = {:.3e}'.format(mse2));

Neural MSE = 1.257e-04

Conclusion on the two layer model¶

Adding a second layers leads to better fitting of the generation function (polynomial degree 4).

This is however mostly due to the non-linear activation function (Softmax), this is shown below with the same model but with a Relu activation. The predicted data is showing the elbows of the multiple Relu.

Two layer model using Relu¶

# Number of epochs
nEpoch = 50
nBatch = 256 # 32 is default

# Model
model3 = keras.models.Sequential([
    keras.layers.Dense(8, activation='relu', input_shape=[nFeatures],      # <---
                        bias_regularizer=keras.regularizers.l1(0.00001),      
                        kernel_regularizer=keras.regularizers.l1(0.00001)), 
    keras.layers.Dense(1, activation='linear')
])
model3.compile(optimizer='adam',
              loss=keras.losses.mean_squared_error, 
              metrics=['kullback_leibler_divergence'])

# Tensor board
callbacks = []
if usingTensorBoard:
    ks = keras.callbacks.TensorBoard(log_dir="./logs/", 
                                     histogram_freq=1, write_graph=True, write_grads=True, batch_size=1)
    callbacks = [ks]
    
# Fit
hist3 = model3.fit(xTrain, yTrain, epochs=nEpoch, batch_size=nBatch, validation_split = 0.2, verbose=0, callbacks=callbacks)

plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
plt.semilogy(hist3.history['loss'])
plt.semilogy(hist3.history['val_loss'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Loss')
plt.subplot(1,3,2)
plt.semilogy(hist3.history['kullback_leibler_divergence'])
plt.semilogy(hist3.history['val_kullback_leibler_divergence'])
plt.grid()
plt.legend(('train', 'validation'))
plt.title('Kullback-Leibler divergence');

yEst3 = model3.predict(xTest).reshape(-1)

fig = plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.scatter(xTest[:,0], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,0], yEst3, marker='.', alpha=0.05)
plt.title('x0')
plt.legend(('ori clean', 'estimated'))
plt.subplot(1,2,2)
plt.scatter(xTest[:,1], yTestClean, marker='.', alpha=0.05)
plt.scatter(xTest[:,1], yEst3, marker='.', alpha=0.05)
plt.title('x1')
plt.legend(('ori clean', 'estimated'))
mse3 = metrics.mean_squared_error(yTest, yEst3)
print('Neural MSE = {:.3e}'.format(mse3));

Neural MSE = 1.063e-04

To try on your own:¶

Add more unit (neurons) to the first layer
Add an hidden layer

Where to go from here ?¶

Other two feature linear implementation using "raw" Python (Notebook)

Compare with the two feature binary classification using logistic regression using Keras (Notebook)

data-science

Notebooks and Python about data science

If you like this project please add your Star