Graph view of a CNN¶

import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, models, layers
import tensorflow as tf
import seaborn as sns
import networkx as nx
import pygraphviz as pgv
from functools import reduce

from bokeh.plotting import figure
from bokeh.palettes import RdBu
from bokeh.io import output_notebook, show
from bokeh.models.mappers import LinearColorMapper, LogColorMapper
from bokeh.models import ColumnDataSource
from bokeh.transform import linear_cmap
output_notebook()

Model and data¶

(xTrain, yTrain),(xTest, yTest) = datasets.cifar10.load_data()
xTrain = xTrain / 255.
xTest  = xTest  / 255.

classNames = ['plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

xTrain.shape, xTest.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

model0 = models.load_model('models/CIFAR-10_CNN5.h5')
model0.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv_0 (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv_1 (Conv2D)              (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv_2 (Conv2D)              (None, 4, 4, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_0 (Dense)              (None, 64)                65600     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                650       
=================================================================
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________

Graph creation¶

Creation of a directed (acyclic) graph. The vertices are all neurons of the network. The edges are the weights of the from neuron output on the to neuron input.

Specificities :

In case of convolutional 2D layer: the conv filter is duplicated for each x-y position on the output (pixel) space
In case of max pooling 2D layer: each value of the input matrix is assume to have a weight of 1 on the output. The corresponding adjacency matrix is a thick diagonal matrix
Activation functions are not applied

Within the graph, nodes (neurons) are numbered from 0 and up accross layers. Layer boundaries are saved in the _layerpositions dictionary listing the first neurons at the output of each layer.

g = nx.DiGraph()

# Select initial layer of the DNN
from_layer = 0
input_offset = 0
input_dim = reduce((lambda x, y: x * y), model0.layers[from_layer].get_input_shape_at(0)[1:])
output_offset = input_offset + input_dim
output0_offset = output_offset

# Add input nodes to graph
in_nodes = [i for i in np.arange(input_offset, output_offset)]
g.add_nodes_from(in_nodes)

layer_positions = { 'begin': input_offset}
layer_seq = []
for layer in model0.layers[from_layer:]:
    if isinstance(layer, layers.Dense):
        
        layer_positions[layer.name] = output_offset
        layer_seq.append(layer.name)
        w = layer.weights[0].numpy()
        print('Dense', layer.name, input_offset, output_offset, w.shape[1])
        
        nodes = [output_offset + j for j in np.arange(w.shape[1])]
        g.add_nodes_from(nodes)
        
        edges = [(input_offset + i, output_offset + j, w[i,j]) \
                    for i in range(w.shape[0]) 
                    for j in range(w.shape[1])]
        g.add_weighted_edges_from(edges)
        input_offset = output_offset
        output_offset += w.shape[1] # layer.get_output_shape_at(0)[1]
        
    elif isinstance(layer, layers.Conv2D):
         
        layer_positions[layer.name] = output_offset
        layer_seq.append(layer.name)
        
        in_shape = layer.get_input_shape_at(0)
        out_shape = layer.get_output_shape_at(0)
        out_cnt = out_shape[1] * out_shape[2] * out_shape[3]
       
        print('Conv2D', layer.name, input_offset, output_offset, out_cnt)
    
        w = layer.weights[0].numpy()
        
        nodes = [output_offset + j for j in np.arange(out_cnt)]
        g.add_nodes_from(nodes)
        
        # Assuming conv padding='valid', no stride
        edges = [(input_offset + i + m + in_shape[1] * (j + n + in_shape[2] * k), 
                  output_offset + m + out_shape[1] * (n + out_shape[2] * l), 
                  w[i, j, k, l]) \
                for l in range(out_shape[3]) # Output filters
                for k in range(in_shape[3])  # Input filters
                for n in range(out_shape[2]) # Out image shape height
                for j in range(w.shape[1])   # Conv filter height
                for m in range(out_shape[1]) # Out image shape width
                for i in range(w.shape[0])   # Conv filter width
                ] 
        g.add_weighted_edges_from(edges)
        input_offset = output_offset
        output_offset += out_cnt
       # print(edges[::36])
    
    elif isinstance(layer, layers.MaxPooling2D):
        
        layer_positions[layer.name] = output_offset
        layer_seq.append(layer.name)
        
        in_shape = layer.get_input_shape_at(0)
        out_shape = layer.get_output_shape_at(0)
        out_cnt = out_shape[1] * out_shape[2] * out_shape[3]
        pool_shape = layer.pool_size
       
        print('MaxPooling2D', layer.name, input_offset, output_offset, out_cnt)
    
        nodes = [output_offset + j for j in np.arange(out_cnt)]
        g.add_nodes_from(nodes)
        
        # Apply constant weight to all pixels
        pooling_weight = 1
        edges = [(input_offset + i + m * pool_shape[0] + in_shape[1] * (j + n * pool_shape[1] + in_shape[2] * k), 
                  output_offset + m + out_shape[1] * (n + out_shape[2] * k), 
                  pooling_weight) \
                for k in range(in_shape[3]) # Input/output filters
                for n in range(out_shape[2]) # Out image shape height
                for j in range(pool_shape[1]) # Pool height
                for m in range(out_shape[1]) # Out image shape width
                for i in range(pool_shape[0]) # Pool width
                ] 
        
        g.add_weighted_edges_from(edges)
        #print(edges[:32])
        input_offset = output_offset
        output_offset += out_cnt
    
    elif isinstance(layer, layers.Flatten) or isinstance(layer, layers.Dropout):
        print('Ignored', layer.name)
    else:
        print('Not handled', type(layer))
        
layer_positions['end'] = output_offset

Conv2D conv_0 0 3072 28800
MaxPooling2D max_pooling2d 3072 31872 7200
Conv2D conv_1 31872 39072 10816
MaxPooling2D max_pooling2d_1 39072 49888 2304
Conv2D conv_2 49888 52192 1024
Ignored flatten
Ignored dropout
Dense dense_0 52192 53216 64
Dense dense_1 53216 53280 10

#l = model0.layers[4]
#l.get_input_shape_at(0), l.get_output_shape_at(0), #l.pool_size

nx.density(g)

0.0016151387935422858

layer_positions

{'begin': 0,
 'conv_0': 3072,
 'max_pooling2d': 31872,
 'conv_1': 39072,
 'max_pooling2d_1': 49888,
 'conv_2': 52192,
 'dense_0': 53216,
 'dense_1': 53280,
 'end': 53290}

Adjacency matrix¶

$adj_{i,j}$: coefficient of neuron $i$ on neuron $j$

adj = nx.adjacency_matrix(g)

if True: # Plot adjacency matrix with Bokeh
    if False:
        offset00 = layer_positions['begin']
        offset01 = layer_positions['conv_2']
        offset10 = layer_positions['conv_1'] 
        offset11 = layer_positions['max_pooling2d_1']
    else:
        offset00 = layer_positions['max_pooling2d_1'] 
        offset01 = layer_positions['dense_1']
        offset10 = layer_positions['conv_2'] 
        offset11 = layer_positions['end']
    dat = adj.toarray()[offset00:offset01,offset10:offset11]

    p = figure(plot_width=900, plot_height=900, x_range=(0, offset11 - offset10)) #, y_range=(offset01 - offset00, 0))

    color_mapper = LinearColorMapper(palette=RdBu[11], low=-0.1, high=0.1)

    # must give a vector of image data for image parameter
    p.image(image=[dat], x=0, y=0, dw=dat.shape[1], dh=dat.shape[0], color_mapper=color_mapper)

    p.xaxis.ticker = np.array(list(layer_positions.values())) - offset10
    p.xaxis.major_label_overrides = {v - offset10: k for k, v in layer_positions.items()}
    p.xaxis.axis_label = 'Neuron output'
    p.xaxis.major_label_orientation = np.pi/4
    
    p.yaxis.ticker = np.array(list(layer_positions.values())) - offset00
    p.yaxis.major_label_overrides = {v - offset00: k for k, v in layer_positions.items()}
    p.yaxis.axis_label = 'Neuron as input to next layer'
    
    print(dat.shape)
    
    show(p)

(3392, 1098)

Directed laplacian¶

According to Networkx documentation, for directed graphs : $𝐿=𝐼−(Φ^{1/2}𝑃Φ^{-1/2}+Φ^{-1/2}𝑃𝑇Φ^{1/2})/2$

where $I$ is the identity matrix, $P$ is the transition matrix of the graph, and $\Phi$ a matrix with the Perron vector of $P$ in the diagonal and zeros elsewhere.

Depending on the value of walk_type, $P$ can be the transition matrix induced by a random walk, a lazy random walk, or a random walk with teleportation (PageRank).

Ref: Fan Chung (2005). Laplacians and the Cheeger inequality for directed graphs. Annals of Combinatorics, 9(1), 2005

if False:
    lap = nx.directed_laplacian_matrix(g)

Centrality¶

eigen_cent = nx.eigenvector_centrality(g, weight='weight')

last_layer_centralities = True
if last_layer_centralities:
    xlow  = layer_positions['dense_1']
else:
    xlow  = layer_positions['begin'] #output0_offset
xhigh = len(list(eigen_cent.values()))

fig, ax = plt.subplots(1, 1, figsize=(15, 8))
dat = list(eigen_cent.values())
ax.set_title('Eigen centrality')
if last_layer_centralities:
    ax.plot(dat)
    ax.set_xticklabels(classNames)
    ax.set_xticks(np.arange(xlow, xhigh))
else:
    ax.semilogy(np.abs(dat))
    ax.set_xticklabels(list(layer_positions.keys()))
    ax.set_xticks(list(layer_positions.values()))
ax.set_xlim(xlow, xhigh)
ax.grid()

Paths statistics¶

nx.dag_longest_path(g, weight='weight')

next(nx.shortest_path_length(g))

Example : combine Dense layer 0 and 1¶

Excluding the activation, the combination of the dense layer 0 and 1 is a matrix multiplication. The goal of this combination is to find the main paths with largest weights accross the layers.

d0_w = model0.get_layer('dense_0').weights[0].numpy()
d1_w = model0.get_layer('dense_1').weights[0].numpy()
dense_0_1 = np.matmul(d0_w, d1_w)

color_mapper = LinearColorMapper(palette=RdBu[11], low=-1, high=1)
fig = figure(plot_width=900, height=100)
fig.image([dense_0_1.T], 0, 0, 1024, 10, color_mapper=color_mapper)           
show(fig)

Export to file through pygraphiz¶

if False:
    dg = nx.nx_agraph.to_agraph(g)
    dg.layout() #prog='dot')
    dg.draw('test1.png')

data-science

Notebooks and Python about data science

If you like this project please add your Star