View on GitHub

data-science

Notebooks and Python about data science

If you like this project please add your Star

GAN on C2C ski touring outing data with application to global warming prediction

Based on ski-touring outing reports from www.camptocamp.org in Haute-Savoie (France) and local temperature data reports in Megève, Generative Adversarial Networks are used to estimate the joint probability distribution of the outing features. GAN model is then modified to exhibit a Bayesian network structure (aka a graphical model) and pre-contrain the model on the type of outing (max elevation, difficulty), and temperature.

Learning goals:

  • Use GAN to model an unknown distribution
  • Mix GAN and graphical models
In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers, losses, optimizers, metrics, activations
import tensorview as tv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from plotly import express as px, graph_objects as go, subplots as sp
import itertools
from datetime import datetime
In [2]:
area_title = 'haute-savoie'
log_dir="logs/"
In [3]:
style_true = dict(color='cadetblue')
style_generated = dict(color='fuchsia')
style_fooled = dict(color='blue')
style_warm = dict(color='darkorange')
In [4]:
# Demappers: from numerical to ordinal labels (strings)
rating_unmapper = {0: 'awful', 1: 'poor', 2: 'average', 3: 'good', 4: 'excellent'}
ski_rating_unmapper = {**{3*i + j: f'{i+1}.{j+1}' for i,j in itertools.product(range(5), range (3))}, 15: '5.4', 16: '5.5', 17: '5.6'}

Read data and selet records and features

Data has already been downloaded from the API server of www.camptocamp.org in notebook DownloadC2cOutings (Jupyter / HTML)

In [5]:
df_outings = pd.read_parquet(f'data/C2C/outings_{area_title}.parquet')
In [6]:
# Features
features_c1 = {'ski_rating_num': 'Ski rating (numerical)', 'elevation_max': 'Elevation max'}
temperature_features = {'TEMPERATURE_MORNING_C': 'Morning temperature', 'temp_morning_7d': 'Morning temperature last 7 days', 'temp_morning_30d': 'Morning temperature last 30 days'}
features_c2 = {**temperature_features, 'day_of_season': 'Day of season'}
features_c3 = {'elevation_up_snow': 'Skis on, way up', 'elevation_down_snow': 'Skis off, way down', 'condition_rating_num': 'Condition rating (numerical)'}
used_cols_dict = {**features_c1, **features_c2, **features_c3}
used_cols = list(used_cols_dict.keys())

# Conditions to select outings:
# - Some serious impossible outliers on the elevation (could be in feet)
# - Filter on quality to retain fine, medium and great
condition = (df_outings.elevation_up_snow < 5000) & (df_outings.elevation_down_snow < 5000) & (df_outings.elevation_max < 5000) \
                & (df_outings.elevation_up_snow > 200) & (df_outings.elevation_down_snow > 200) & (df_outings.elevation_max > 200) \
               & ((df_outings.quality == 'fine') | (df_outings.quality == 'medium') | (df_outings.quality == 'great'))

df_sel = df_outings.loc[condition, used_cols]
# Remove rows containing at least 1 na
df_sel = df_sel[(~df_sel.isna().any(axis=1))]
# Permutation
df_sel_perm = np.random.permutation(df_sel)
len(df_sel)
Out[6]:
6656
In [7]:
len(used_cols)
Out[7]:
9
In [8]:
scaler = StandardScaler()
df_sel_scaled = scaler.fit_transform(df_sel_perm)
In [9]:
df_sel.describe()
Out[9]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num
count 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000 6656.000000
mean 6.382812 2528.794621 -5.308594 -4.789728 -4.721685 7.707482 1464.071514 1419.389573 2.945463
std 3.071074 632.926708 6.698309 5.377412 4.231169 46.914599 604.572651 538.620869 0.792452
min 0.000000 600.000000 -28.000000 -19.285714 -13.533333 -140.000000 415.000000 415.000000 0.000000
25% 4.000000 2116.000000 -10.000000 -8.321429 -7.366667 -25.000000 1128.000000 1128.000000 2.000000
50% 6.000000 2406.000000 -6.000000 -5.428571 -5.466667 7.000000 1296.500000 1280.000000 3.000000
75% 8.000000 2666.000000 -1.000000 -2.000000 -2.733333 39.000000 1453.000000 1450.000000 3.000000
max 16.000000 4825.000000 16.000000 16.285714 15.366667 167.000000 4810.000000 3842.000000 4.000000
In [133]:
sel_corr = df_sel.rename(columns=used_cols_dict).corr()
px.imshow(sel_corr, 
          title='Feature correlations in reference (train) data', height=500)

Model

In [11]:
batch_size = 512
latent_dim = 20
num_features = len(used_cols)
In [12]:
generator = models.Sequential([
    layers.Dense(32, input_dim=latent_dim, name='g_1', activation=activations.relu),
    layers.Dropout(0.3),
    layers.Dense(48, name='g_2', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(48, name='g_3', activation=activations.relu),
    layers.Dense(64, name='g_4', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(num_features, name='g_5')
], name='generator')
generator.compile()
generator.summary()
Model: "generator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
g_1 (Dense)                  (None, 32)                672       
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
g_2 (Dense)                  (None, 48)                1584      
_________________________________________________________________
dropout_1 (Dropout)          (None, 48)                0         
_________________________________________________________________
g_3 (Dense)                  (None, 48)                2352      
_________________________________________________________________
g_4 (Dense)                  (None, 64)                3136      
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
g_5 (Dense)                  (None, 9)                 585       
=================================================================
Total params: 8,329
Trainable params: 8,329
Non-trainable params: 0
_________________________________________________________________
In [13]:
discriminator = models.Sequential([
    layers.Dense(64, input_dim=num_features, name='d_1', activation=activations.relu),
    layers.Dropout(0.3),
    layers.Dense(48, name='d_2', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(48, name='d_3', activation=activations.relu),
    layers.Dense(32, name='d_4', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(1, name='d_5') # activation='sigmoid', 
], name='discriminator')
discriminator.compile()
In [14]:
tf.keras.utils.plot_model(generator, show_shapes=True, dpi=64)
Out[14]:

Train

In [15]:
epochs = 400
batch_per_epoch = len(df_sel_scaled) // batch_size
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
In [16]:
def generator_loss(disc_generated_output):
    return loss_object(tf.ones_like(disc_generated_output), disc_generated_output)
In [17]:
def discriminator_loss(disc_real_output, disc_generated_output):

    real_loss = loss_object(tf.ones_like(disc_real_output), disc_real_output)
    generated_loss = loss_object(tf.zeros_like(disc_generated_output), disc_generated_output)

    return real_loss + generated_loss
In [18]:
def get_summary_writer():
    return tf.summary.create_file_writer(log_dir + "fit/" + datetime.now().strftime("%Y%m%d-%H%M%S"))
In [19]:
@tf.function
def train_step(generator, discriminator, 
               generator_optimizer, discriminator_optimizer, 
               generator_latent, batch, 
               epoch, summary_writer):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        
        gen_latent = generator_latent()
        
        gen_output = generator(gen_latent, training=True)

        disc_real_output = discriminator(batch, training=True)
        disc_generated_output = discriminator(gen_output, training=True)

        gen_loss = generator_loss(disc_generated_output)
        disc_loss = discriminator_loss(disc_real_output, disc_generated_output)

        generator_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
        discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))

    with summary_writer.as_default():
        tf.summary.scalar('gen_loss', gen_loss, step=epoch)
        tf.summary.scalar('disc_loss', disc_loss, step=epoch)
        
    return gen_loss, disc_loss
In [20]:
generator_optimizer = tf.keras.optimizers.Adam(3e-4, beta_1=0.4)
discriminator_optimizer = tf.keras.optimizers.Adam(3e-4, beta_1=0.4)
In [21]:
tv_plot = tv.train.PlotMetrics(wait_num=200, columns=2, iter_num=epochs * batch_per_epoch)
summary = get_summary_writer()

def generator_latent():
    return tf.random.normal((batch_size, latent_dim), 0, 1)

for epoch in range(epochs):

    for b in range(batch_per_epoch):
        train_batch = df_sel_scaled[b * batch_size:(b+1) * batch_size]
        
        g_loss, d_loss = train_step(generator, discriminator, 
                                    generator_optimizer, discriminator_optimizer, 
                                    generator_latent, train_batch, 
                                    epoch, summary)
        # Plot
        tv_plot.update({ 'discriminator_loss': d_loss,# 'discriminator_acc': d_acc,
                        'generator_loss': g_loss, # 'generator_acc': g_acc
                       })
        tv_plot.draw()

    # saving (checkpoint) the model every 20 epochs
    #if (epoch + 1) % 20 == 0:
    #  checkpoint.save(file_prefix = checkpoint_prefix)

Helpers

In [22]:
def print_stats(sets, feature, label, format_float=False):
    if format_float:
        print(f'Mean/std {label}:', ', '.join([f'for {key}={data[feature].mean():.1f}/{data[feature].std():.1f}' for key, data in sets.items()]))
    else:
        print(f'Mean/std {label}:', ', '.join([f'for {key}={data[feature].mean():.3g}/{data[feature].std():.3g}' for key, data in sets.items()]))
        
def print_median_stats(sets, feature, label, format_float=False):
    if format_float:
        print(f'Median {label}:', ', '.join([f'for {key}={data[feature].median():.1f}' for key, data in sets.items()]))
    else:
        print(f'Median {label}:', ', '.join([f'for {key}={data[feature].median():.3g}' for key, data in sets.items()]))

Test

In [23]:
gen_latent = np.random.normal(0, 1, (20000, latent_dim))
gen_outings = generator.predict(gen_latent)
gen_outings_unscaled = scaler.inverse_transform(gen_outings)
In [24]:
scores = tf.sigmoid(discriminator.predict(gen_outings))
fooled = scores >= 0.5
In [25]:
fooled.numpy().mean()
Out[25]:
0.4951
In [26]:
df_generated = pd.DataFrame(gen_outings_unscaled, columns=used_cols)
df_generated['ski_rating'] = df_generated['ski_rating_num'].round().clip(0, 17).replace(ski_rating_unmapper)
df_generated['condition_rating'] = df_generated['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [27]:
df_generated.describe()
Out[27]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num
count 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000
mean 6.329458 2550.330078 -6.584331 -5.365985 -4.271259 7.749375 1436.505249 1395.551025 3.096871
std 2.427202 496.805237 5.278008 4.127868 3.172476 40.778919 454.142792 472.058655 0.528780
min 1.121215 1801.565552 -22.985956 -18.644091 -11.447632 -74.470627 846.487732 860.585632 0.995582
25% 4.255796 2251.947327 -10.080088 -8.092280 -6.486943 -31.786906 1194.599426 1152.632416 2.747383
50% 6.015798 2375.582764 -6.997122 -6.071299 -4.720405 10.136626 1288.305054 1239.336670 3.155049
75% 8.052196 2641.848206 -3.478484 -3.270709 -2.801459 40.611911 1447.167816 1390.713013 3.478468
max 15.449697 5769.944824 14.766308 11.831455 12.556527 127.992081 5182.363770 5475.750000 4.500382
In [134]:
generated_corr = df_generated.rename(columns=used_cols_dict).corr()
px.imshow(generated_corr, height=500)
In [135]:
corr_diff1 = (generated_corr - sel_corr).abs()
print(f'Mean absolute error on correlations: {corr_diff1.values.mean()}')

px.imshow(corr_diff1, zmax=0.3, color_continuous_scale='viridis',
          title='Absolute correlation differences between train and generated', height=500)
Mean absolute error on correlations: 0.05382476942375972
In [30]:
print_median_stats({'true': df_sel, 'generated': df_generated}, 'elevation_up_snow', 'Skis on, way up', True)
print_median_stats({'true': df_sel, 'generated': df_generated}, 'elevation_down_snow', 'Skis off, way down', True)
print_median_stats({'true': df_sel, 'generated': df_generated}, 'elevation_max', 'Elevation max', True)
fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Elevation max'], x_title='Elevations [m]')
bins = {'start': 0, 'end': 4800, 'size': 100}
fig.add_trace(go.Histogram(x=df_sel['elevation_up_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=1)
fig.add_trace(go.Histogram(x=df_generated['elevation_up_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=1)
fig.add_trace(go.Histogram(x=df_sel['elevation_down_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=2)
fig.add_trace(go.Histogram(x=df_generated['elevation_down_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=2)
fig.add_trace(go.Histogram(x=df_sel['elevation_max'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=3)
fig.add_trace(go.Histogram(x=df_generated['elevation_max'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=3)
Median Skis on, way up: for true=1296.5, for generated=1288.3
Median Skis off, way down: for true=1280.0, for generated=1239.3
Median Elevation max: for true=2406.0, for generated=2375.6
In [31]:
px.scatter_matrix(df_generated[['elevation_up_snow', 'elevation_down_snow', 'elevation_max']], opacity=0.1, title='Generated elevations correlations', labels=used_cols_dict)
In [32]:
print_stats({'true': df_sel, 'generated': df_generated}, 'day_of_season', 'day of season')

fig = go.Figure(layout=dict(title='Day of season (mid season = Feb 15th)', bargroupgap=0.01, 
                            xaxis=dict(title='Day relative to Feb 15th'), yaxis=dict(title='%')))
fig.add_histogram(x=df_sel.day_of_season, name='true', nbinsx=52, histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated.day_of_season, name='generated', nbinsx=52, histnorm='percent', marker=style_generated)
Mean/std day of season: for true=7.71/46.9, for generated=7.75/40.8

Ski ratings

In [33]:
print_stats({'true': df_sel, 'generated': df_generated}, 'ski_rating_num', 'ski rating (numerical)')
fig = go.Figure(layout=dict(title='Ski rating', bargroupgap=0.01, 
                            xaxis=dict(title='1.1 (easy) to 5.6 (extreme)', categoryorder='array', categoryarray=list(ski_rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].ski_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated.ski_rating, name='generated', histnorm='percent', marker=style_generated)
Mean/std ski rating (numerical): for true=6.38/3.07, for generated=6.33/2.43

Condition ratings

In [34]:
print_stats({'true': df_sel, 'generated': df_generated}, 'condition_rating_num', 'condition rating (numerical)')
fig = go.Figure(layout=dict(title='Condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].condition_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated.condition_rating, name='generated', histnorm='percent', marker=style_generated)
Mean/std condition rating (numerical): for true=2.95/0.792, for generated=3.1/0.529

Temperatures

In [35]:
print(f'Mean/std morning temperature, for true={df_sel.TEMPERATURE_MORNING_C.mean():.3g}/{df_sel.TEMPERATURE_MORNING_C.std():.3g}, for generated={df_generated.TEMPERATURE_MORNING_C.mean():.3g}/{df_generated.TEMPERATURE_MORNING_C.std():.3g}')
print(f'Mean/std last 7 day morning temperature, for true={df_sel.temp_morning_7d.mean():.3g}/{df_sel.temp_morning_7d.std():.3g}, for generated={df_generated.temp_morning_7d.mean():.3g}/{df_generated.temp_morning_7d.std():.3g}')
print(f'Mean/std last 30 day morning temperature, for true={df_sel.temp_morning_30d.mean():.3g}/{df_sel.temp_morning_30d.std():.3g}, for generated={df_generated.temp_morning_30d.mean():.3g}/{df_generated.temp_morning_30d.std():.3g}')

fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['today', 'last 7 day', 'last 30 day'],
                      x_title='Morning temperature [°C]')
# fig = go.Figure(layout=dict(title='Trou de la Mouche, morning temperature [°C]', bargroupgap=0.1, yaxis=dict(title='%')))
bins = {'start': -35, 'end': 30, 'size': 1}
fig.add_histogram(x=df_sel.TEMPERATURE_MORNING_C, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=1)
fig.add_histogram(x=df_generated.TEMPERATURE_MORNING_C, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_sel.temp_morning_7d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=2)
fig.add_histogram(x=df_generated.temp_morning_7d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_sel.temp_morning_30d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=3)
fig.add_histogram(x=df_generated.temp_morning_30d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=3)
fig.update_yaxes(title='%')
Mean/std morning temperature, for true=-5.31/6.7, for generated=-6.58/5.28
Mean/std last 7 day morning temperature, for true=-4.79/5.38, for generated=-5.37/4.13
Mean/std last 30 day morning temperature, for true=-4.72/4.23, for generated=-4.27/3.17
In [36]:
px.scatter_matrix(df_generated[['TEMPERATURE_MORNING_C', 'temp_morning_7d', 'temp_morning_30d', 'elevation_up_snow', 'elevation_down_snow']], opacity=0.1, 
                  title='Temperature-elevation correlations', height=600)

Part-2 Adding some independency constraints on the generator

Clique 1 features of the outing are set independent: maximum elevation and ski rating.

Clique 2 features (temperatures, day of season) are dependent on feature of clique 1 of other features.

The other elevations depend on the quantity and quality of snow and are thus dependent on the clique 1, clique 2 features and day within the season (sun light is dependent on the day of the year).

In [83]:
num_features_c1 = len(features_c1)
num_features_c2 = len(features_c2)
num_features_c3 = num_features - num_features_c1 - num_features_c2
num_latent2 = 10
gen_learning_rate2 = 0.002
In [84]:
def make_generator(n_feat: int, name: str, prefix: str, num_latent: int, num_element_base: int):
    return models.Sequential([
        layers.Dense(num_element_base * 2, input_dim=num_latent, name=f'{prefix}_1', activation=activations.relu),
        layers.Dropout(0.3),
        layers.Dense(num_element_base * 4, name=f'{prefix}_2', activation=activations.relu),
        layers.Dropout(0.2),
        layers.Dense(num_element_base * 6, name=f'{prefix}_4', activation=activations.relu),
        layers.Dropout(0.2),
        layers.Dense(n_feat, name=f'{prefix}_5')
    ], name=name)

gen_c1 = make_generator(num_features_c1, 'gen_c1', 'g1', num_latent2, 12)
gen_c2 = make_generator(num_features_c2, 'gen_c2', 'g2', num_latent2, 12)
gen_c3 = make_generator(num_features_c3, 'gen_c3', 'g3', num_latent2, 16)
In [85]:
# Clique #1: outing summit elevation and ski rating
num_latent_c1 = num_latent2
input_c1 = layers.Input(num_latent_c1, name='latent_c1')
c1 = gen_c1(input_c1)
# Clique #2: temperatures and day of season
num_latent_c2 = num_latent2 - num_features_c1
input_c2 = layers.Input(num_latent_c2, name='latent_c2')
input_c2b = layers.concatenate([c1, input_c2])
c2 = gen_c2(input_c2b)
# Clique #3: all other dependent features
num_latent_c3 = num_latent2 - num_features_c1 - num_features_c2
input_c3 = layers.Input(num_latent_c3, name='latent_c3')
input_c3b = layers.concatenate([c1, c2, input_c3])
c3 = gen_c3(input_c3b)
# Output of generator
gen2_output = layers.concatenate([c1, c2, c3], name='generator_output')
generator2 = models.Model([input_c1, input_c2, input_c3], gen2_output, name='generator2')
generator2.compile()
# generator2.summary()
In [86]:
discriminator2 = models.Sequential([
    layers.Dense(64, input_dim=num_features, name='d_1', activation=activations.relu),
    layers.Dropout(0.3),
    layers.Dense(48, name='d_2', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(48, name='d_3', activation=activations.relu),
    layers.Dense(32, name='d_4', activation=activations.relu),
    layers.Dropout(0.2),
    layers.Dense(1, name='d_5') # , activation='sigmoid' linear activation to output logits
], name='discriminator2')
discriminator2.compile()
In [87]:
tf.keras.utils.plot_model(generator2, show_shapes=True, dpi=64)
Out[87]:

Train 2

In [88]:
@tf.function
def train_step2(generator, discriminator, 
               generator_optimizer, discriminator_optimizer, 
               generator_latent, batch, 
               epoch, summary_writer):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        
        gen_latent = generator_latent()
        
        gen_output = generator(gen_latent, training=True)

        disc_real_output = discriminator(batch, training=True)
        disc_generated_output = discriminator(gen_output, training=True)

        gen_loss = generator_loss(disc_generated_output)
        disc_loss = discriminator_loss(disc_real_output, disc_generated_output)

        generator_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
        discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))

    with summary_writer.as_default():
        tf.summary.scalar('gen_loss', gen_loss, step=epoch)
        tf.summary.scalar('disc_loss', disc_loss, step=epoch)
        
    return gen_loss, disc_loss
In [89]:
generator_optimizer2 = tf.keras.optimizers.Adam(4e-4, beta_1=0.3)
discriminator_optimizer2 = tf.keras.optimizers.Adam(4e-4, beta_1=0.3)
In [90]:
tv_plot = tv.train.PlotMetrics(wait_num=200, columns=2, iter_num=epochs * batch_per_epoch)
summary2 = get_summary_writer()

def generator_latent2():
    return [tf.random.normal((batch_size, num_latent_c1), 0, 1), 
            tf.random.normal((batch_size, num_latent_c2), 0, 1), 
            tf.random.normal((batch_size, num_latent_c3), 0, 1)]

for epoch in range(epochs):

    for b in range(batch_per_epoch):
        train_batch = df_sel_scaled[b * batch_size:(b+1) * batch_size]
        
        g_loss, d_loss = train_step2(generator2, discriminator2, 
                                    generator_optimizer2, discriminator_optimizer2,
                                    generator_latent2, train_batch, epoch, summary2)
        # Plot
        tv_plot.update({ 'discriminator_loss': d_loss,# 'discriminator_acc': d_acc,
                        'generator_loss': g_loss, # 'generator_acc': g_acc
                       })
        tv_plot.draw()

    # saving (checkpoint) the model every 20 epochs
    #if (epoch + 1) % 20 == 0:
    #  checkpoint.save(file_prefix = checkpoint_prefix)

Test 2

In [91]:
num_test = 20000
gen_latent2 = [np.random.normal(0, 1, (num_test, num_latent_c1)), 
               np.random.normal(0, 1, (num_test, num_latent_c2)), 
               np.random.normal(0, 1, (num_test, num_latent_c3))]
gen_outings2 = generator2.predict(gen_latent2)
gen_outings2_unscaled = scaler.inverse_transform(gen_outings2)
In [92]:
scores2 = tf.sigmoid(discriminator2.predict(gen_outings2))
fooled2 = scores2 >= 0.5
In [93]:
scores2.numpy().mean(), fooled2.numpy().mean()
Out[93]:
(0.48947456, 0.2849)
In [94]:
df_generated2 = pd.DataFrame(gen_outings2_unscaled, columns=used_cols)
df_generated2['ski_rating'] = df_generated2['ski_rating_num'].round().clip(0, 17).replace(ski_rating_unmapper)
df_generated2['condition_rating'] = df_generated2['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [95]:
df_generated2_fooled = df_generated2.loc[fooled2.numpy()]
In [96]:
df_generated2.describe()
Out[96]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num
count 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000 20000.000000
mean 6.018500 2533.811768 -3.672743 -5.175060 -4.296206 5.517342 1509.276367 1487.295410 2.925581
std 2.547170 531.023438 5.188914 4.793602 3.286110 42.345776 445.931915 395.176849 0.609582
min 1.367608 1721.672119 -19.468065 -19.969391 -12.860351 -89.718338 1144.930908 792.404297 0.290894
25% 3.848887 2191.795166 -7.513802 -8.587943 -6.721653 -33.614052 1273.189301 1282.224365 2.518061
50% 5.342710 2320.483643 -3.899468 -5.354350 -4.595440 5.211840 1331.300781 1339.589844 3.012113
75% 8.251436 2742.826416 0.171668 -2.347818 -2.309042 43.057037 1498.687622 1488.140228 3.384658
max 16.077147 5386.062012 14.215258 12.219595 9.130256 121.562439 4407.071289 4530.750000 4.526022
In [136]:
generated_corr2 = df_generated2.rename(columns=used_cols_dict).corr()
px.imshow(generated_corr2, height=500)
In [137]:
corr_diff_2 = (generated_corr2 - sel_corr).abs()
print(f'Mean absolute error on correlations: {corr_diff_2.values.mean()}')

px.imshow(corr_diff_2, zmax=0.3, color_continuous_scale='viridis',
          title='Absolute correlation differences between train and generated', height=500)
Mean absolute error on correlations: 0.059734728680878695

Elevations

In [99]:
print_stats({'true': df_sel, 'generated': df_generated2}, 'elevation_up_snow', 'Skis on, way up', True)
print_stats({'true': df_sel, 'generated': df_generated2}, 'elevation_down_snow', 'Skis off, way down', True)
print_stats({'true': df_sel, 'generated': df_generated2}, 'elevation_max', 'Elevation max', True)
fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Elevation max'], x_title='Elevation [m]')
bins = {'start': 900, 'end': 4000, 'size': 100}
fig.add_trace(go.Histogram(x=df_sel['elevation_up_snow'], xbins=bins, name='real', histnorm='percent', marker=style_true), row=1, col=1)
fig.add_trace(go.Histogram(x=df_generated2['elevation_up_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=1)
# fig.add_trace(go.Histogram(x=df_generated2_fooled['elevation_up_snow'], xbins=bins, name='fooled', histnorm='percent', marker=style_fooled), row=1, col=1)
fig.add_trace(go.Histogram(x=df_sel['elevation_down_snow'], xbins=bins, name='real', histnorm='percent', marker=style_true), row=1, col=2)
fig.add_trace(go.Histogram(x=df_generated2['elevation_down_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=2)
# fig.add_trace(go.Histogram(x=df_generated2_fooled['elevation_down_snow'], xbins=bins, name='fooled', histnorm='percent', marker=style_fooled), row=1, col=2)
fig.add_trace(go.Histogram(x=df_sel['elevation_max'], xbins=bins, name='real', histnorm='percent', marker=style_true), row=1, col=3)
fig.add_trace(go.Histogram(x=df_generated2['elevation_max'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=3)
# fig.add_trace(go.Histogram(x=df_generated2_fooled['elevation_max'], xbins=bins, name='fooled', histnorm='percent', marker=style_fooled), row=1, col=3)
Mean/std Skis on, way up: for true=1464.1/604.6, for generated=1509.3/445.9
Mean/std Skis off, way down: for true=1419.4/538.6, for generated=1487.3/395.2
Mean/std Elevation max: for true=2528.8/632.9, for generated=2533.8/531.0

Day of season

In [100]:
print_stats({'true': df_sel, 'generated': df_generated2}, 'day_of_season', 'Day of season')

fig = go.Figure(layout=dict(title='Day of season (mid season = Feb 15th)', bargroupgap=0.01, 
                            xaxis=dict(title='Day relative to Feb 15th'), yaxis=dict(title='%')))
fig.add_histogram(x=df_sel.day_of_season, name='true', nbinsx=52, histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated2.day_of_season, name='generated', nbinsx=52, histnorm='percent', marker=style_generated)
# fig.add_histogram(x=df_generated2_fooled.day_of_season, name='fooled', nbinsx=52, histnorm='percent', marker=style_fooled)
Mean/std Day of season: for true=7.71/46.9, for generated=5.52/42.3

Ski ratings

In [101]:
print(f'Mean/std ski rating num, for true={df_sel.ski_rating_num.mean():.3g}/{df_sel.ski_rating_num.std():.3g}, for generated={df_generated2.ski_rating_num.mean():.3g}/{df_generated.ski_rating_num.std():.3g}')
fig = go.Figure(layout=dict(title='Ski rating', bargroupgap=0.01, 
                            xaxis=dict(title='1.1 (easy) to 5.6 (extreme)', categoryorder='array', categoryarray=list(ski_rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].ski_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated2.ski_rating, name='generated', histnorm='percent', marker=style_generated)
# fig.add_histogram(x=df_generated2_fooled.ski_rating, name='fooled', histnorm='percent', marker=style_fooled)
Mean/std ski rating num, for true=6.38/3.07, for generated=6.02/2.43

Condition ratings

In [102]:
print(f'Mean/std condition rating num, for true={df_sel.condition_rating_num.mean():.3g}/{df_sel.condition_rating_num.std():.3g}, for generated={df_generated2.condition_rating_num.mean():.3g}/{df_generated2.condition_rating_num.std():.3g}')
fig = go.Figure(layout=dict(title='Condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_outings[condition].condition_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_generated2.condition_rating, name='generated', histnorm='percent', marker=style_generated)
Mean/std condition rating num, for true=2.95/0.792, for generated=2.93/0.61

Temperatures

In [103]:
print(f'Mean/std morning temperature, for true={df_sel.TEMPERATURE_MORNING_C.mean():.3g}/{df_sel.TEMPERATURE_MORNING_C.std():.3g}, for generated={df_generated2.TEMPERATURE_MORNING_C.mean():.3g}/{df_generated2.TEMPERATURE_MORNING_C.std():.3g}')
print(f'Mean/std last 7 day morning temperature, for true={df_sel.temp_morning_7d.mean():.3g}/{df_sel.temp_morning_7d.std():.3g}, for generated={df_generated2.temp_morning_7d.mean():.3g}/{df_generated2.temp_morning_7d.std():.3g}')
print(f'Mean/std last 30 day morning temperature, for true={df_sel.temp_morning_30d.mean():.3g}/{df_sel.temp_morning_30d.std():.3g}, for generated={df_generated2.temp_morning_30d.mean():.3g}/{df_generated2.temp_morning_30d.std():.3g}')

fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['today', 'last 7 day', 'last 30 day'],
                      x_title='Morning temperature [°C]')
# fig = go.Figure(layout=dict(title='Trou de la Mouche, morning temperature [°C]', bargroupgap=0.1, yaxis=dict(title='%')))
bins = {'start': -35, 'end': 30, 'size': 1}
fig.add_histogram(x=df_sel.TEMPERATURE_MORNING_C, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=1)
fig.add_histogram(x=df_generated2.TEMPERATURE_MORNING_C, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_sel.temp_morning_7d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=2)
fig.add_histogram(x=df_generated2.temp_morning_7d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_sel.temp_morning_30d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=3)
fig.add_histogram(x=df_generated2.temp_morning_30d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=3)
fig.update_yaxes(title='%')
Mean/std morning temperature, for true=-5.31/6.7, for generated=-3.67/5.19
Mean/std last 7 day morning temperature, for true=-4.79/5.38, for generated=-5.18/4.79
Mean/std last 30 day morning temperature, for true=-4.72/4.23, for generated=-4.3/3.29
In [104]:
px.scatter_matrix(df_generated2[['TEMPERATURE_MORNING_C', 'temp_morning_7d', 'temp_morning_30d', 'elevation_up_snow', 'elevation_down_snow']], opacity=0.1, 
                  title='Temperature-elevation correlations for generated data', height=600)

Test on a given outing

Trou de la Mouche: https://www.camptocamp.org/waypoints/37312/fr/trou-de-la-mouche 508 ski outings

Trou de la Mouche, Paccaly => Grand Crêt: https://www.camptocamp.org/outings?r=46396&act=skitouring 100 ski outings

In [105]:
def scale_single(scaler, index, data):
    return (data - scaler.mean_[index]) / scaler.scale_[index]

def unscale_single(scaler, index, data):
    return (data * scaler.scale_[index]) + scaler.mean_[index]

def shift_scaled(scaler, index, data, offset):
    """ Shift all samples with a constant offset on a scaled data """
    unscaled = unscale_single(scaler, index, data)
    return scale_single(scaler, index, unscaled + offset)
In [106]:
route_title = 'trou-de-la-mouche'
df_route_true = pd.read_parquet(f'data/C2C/outings_{route_title}.parquet')
In [107]:
tdlm_label = 'Trou de la Mouche'
tdlm_elevation_max = 2453
tdlm_elevation_max_scaled = scale_single(scaler, used_cols.index('elevation_max'), tdlm_elevation_max)
tdlm_ski_rating = 6
tdlm_ski_rating_scaled = scale_single(scaler, used_cols.index('ski_rating_num'), tdlm_ski_rating)
tdlm_num = 5000
In [108]:
tdlm_c1 = np.ones([tdlm_num, 1]).dot([[tdlm_ski_rating_scaled, tdlm_elevation_max_scaled]])
In [109]:
tdlm_c2 = gen_c2.predict(np.c_[tdlm_c1, 
                               np.random.normal(0, 1, [tdlm_num, num_latent_c2])])
In [110]:
tdlm_c3 = gen_c3.predict(np.c_[tdlm_c1, 
                               tdlm_c2, 
                               np.random.normal(0, 1, [tdlm_num, num_latent_c3])])
In [111]:
df_tdlm = pd.DataFrame(scaler.inverse_transform(np.c_[tdlm_c1, 
                                                      tdlm_c2, 
                                                      tdlm_c3]), 
                       columns=used_cols)

#df_tdlm['ski_rating'] = df_tdlm['ski_rating_num'].round().clip(0, 17).replace(ski_rating_unmapper)
df_tdlm['condition_rating'] = df_tdlm['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)
In [112]:
tdlm_scores = tf.sigmoid(discriminator2.predict(np.c_[tdlm_c1, 
                                                      tdlm_c2, 
                                                      tdlm_c3]))
tdlm_fooled = tdlm_scores >= 0.5
tdlm_scores.numpy().mean(), tdlm_fooled.numpy().mean()
Out[112]:
(0.49593443, 0.3682)
In [113]:
df_tdlm_fooled = df_tdlm[tdlm_fooled.numpy()]
len(df_tdlm_fooled)
Out[113]:
1841
In [114]:
df_tdlm.head()
Out[114]:
ski_rating_num elevation_max TEMPERATURE_MORNING_C temp_morning_7d temp_morning_30d day_of_season elevation_up_snow elevation_down_snow condition_rating_num condition_rating
0 6.0 2453.0 -11.703131 -10.837787 -8.519437 -12.977037 1217.818785 1223.187886 2.689736 good
1 6.0 2453.0 2.970372 -0.236657 0.043440 55.340934 1885.975537 1894.001210 4.246062 excellent
2 6.0 2453.0 -3.678060 -5.534697 -5.347095 -33.149237 1339.128366 1345.718135 1.681390 average
3 6.0 2453.0 1.585559 -0.013359 -1.088601 52.007254 1362.770276 1382.598716 3.744000 excellent
4 6.0 2453.0 2.066717 0.135266 -0.441926 52.940486 1401.760691 1414.567226 2.695251 good

Elevations

In [115]:
fig = sp.make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Max'],
                      x_title="Trou de la Mouche, elevations' cumulative histogram [m]")
bins = {'start': 0, 'end': 2000, 'size': 25}
fig.add_trace(go.Histogram(x=df_route_true['elevation_up_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true), row=1, col=1)
fig.add_trace(go.Histogram(x=df_tdlm['elevation_up_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated), row=1, col=1)
# fig.add_trace(go.Histogram(x=df_tdlm_fooled['elevation_up_snow'], xbins=bins, name='fooled', histnorm='percent', cumulative_enabled=True, marker=style_fooled), row=1, col=1)
fig.add_trace(go.Histogram(x=df_route_true['elevation_down_snow'], xbins=bins, name='true', histnorm='percent', marker=style_true, showlegend=False), row=1, col=2)
fig.add_trace(go.Histogram(x=df_tdlm['elevation_down_snow'], xbins=bins, name='generated', histnorm='percent', marker=style_generated, showlegend=False), row=1, col=2)
# fig.add_trace(go.Histogram(x=df_tdlm_fooled['elevation_down_snow'], xbins=bins, name='fooled', histnorm='percent', cumulative_enabled=True, marker=style_fooled), row=1, col=2)

Day of season

In [116]:
print(f'Mean/std day of season, for true={df_route_true.day_of_season.mean():.3g}/{df_route_true.day_of_season.std():.3g}, for generated={df_generated.day_of_season.mean():.3g}/{df_generated.day_of_season.std():.3g}')

fig = go.Figure(layout=dict(title='Trou de la Mouche, day of season (mid season = Feb 15th)', bargroupgap=0.01, 
                            xaxis=dict(title='Day relative to Feb 15th'), yaxis=dict(title='%')))
fig.add_histogram(x=df_route_true.day_of_season, name='true', nbinsx=52, histnorm='percent', marker=style_true)
fig.add_histogram(x=df_tdlm.day_of_season, name='generated', nbinsx=52, histnorm='percent', marker=style_generated)
# fig.add_histogram(x=df_tdlm_fooled.day_of_season, name='fooled', nbinsx=52, histnorm='percent', marker=style_fooled)
Mean/std day of season, for true=14.2/40.6, for generated=7.75/40.8

Condition ratings

In [126]:
print(f'Mean/std condition rating num, for true={df_route_true.condition_rating_num.mean():.3g}/{df_route_true.condition_rating_num.std():.3g}, for generated={df_generated.condition_rating_num.mean():.3g}/{df_generated.condition_rating_num.std():.3g}')
fig = go.Figure(layout=dict(title='Trou de la Mouche, condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_route_true.condition_rating, name='true', histnorm='percent', marker=style_true)
fig.add_histogram(x=df_tdlm.condition_rating, name='generated', histnorm='percent', marker=style_generated)
# fig.add_histogram(x=df_tdlm_fooled.condition_rating, name='fooled', histnorm='percent', marker=style_fooled)
Mean/std condition rating num, for true=2.85/0.825, for generated=3.1/0.529

Temperatures Trou de la Mouche

In [118]:
print(f'Mean/std morning temperature, for true={df_route_true.TEMPERATURE_MORNING_C.mean():.3g}/{df_route_true.TEMPERATURE_MORNING_C.std():.3g}, for generated={df_tdlm.TEMPERATURE_MORNING_C.mean():.3g}/{df_tdlm.TEMPERATURE_MORNING_C.std():.3g}')
print(f'Mean/std last 7 day morning temperature, for true={df_route_true.temp_morning_7d.mean():.3g}/{df_route_true.temp_morning_7d.std():.3g}, for generated={df_tdlm.temp_morning_7d.mean():.3g}/{df_tdlm.temp_morning_7d.std():.3g}')
print(f'Mean/std last 30 day morning temperature, for true={df_route_true.temp_morning_30d.mean():.3g}/{df_route_true.temp_morning_30d.std():.3g}, for generated={df_tdlm.temp_morning_30d.mean():.3g}/{df_tdlm.temp_morning_30d.std():.3g}')

fig = sp.make_subplots(rows=1, cols=3, shared_yaxes=True, subplot_titles=['today', 'last 7 day', 'last 30 day'],
                      x_title='Trou de la Mouche, morning temperature [°C]')
# fig = go.Figure(layout=dict(title='Trou de la Mouche, morning temperature [°C]', bargroupgap=0.1, yaxis=dict(title='%')))
bins = {'start': -35, 'end': 30, 'size': 1}
fig.add_histogram(x=df_route_true.TEMPERATURE_MORNING_C, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=1)
fig.add_histogram(x=df_tdlm.TEMPERATURE_MORNING_C, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=1)
# fig.add_histogram(x=df_tdlm_fooled.TEMPERATURE_MORNING_C, name='fooled', histnorm='percent', xbins=bins, marker=style_fooled, row=1, col=1)
fig.add_histogram(x=df_route_true.temp_morning_7d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=2)
fig.add_histogram(x=df_tdlm.temp_morning_7d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=2)
# fig.add_histogram(x=df_tdlm_fooled.temp_morning_7d, name='fooled', histnorm='percent', xbins=bins, marker=style_fooled, row=1, col=2)
fig.add_histogram(x=df_route_true.temp_morning_30d, name='true', histnorm='percent', xbins=bins, marker=style_true, row=1, col=3)
fig.add_histogram(x=df_tdlm.temp_morning_30d, name='generated', histnorm='percent', xbins=bins, marker=style_generated, row=1, col=3)
# fig.add_histogram(x=df_tdlm_fooled.temp_morning_30d, name='fooled', histnorm='percent', xbins=bins, marker=style_fooled, row=1, col=3)
fig.update_yaxes(title='%')
Mean/std morning temperature, for true=-3.73/5.45, for generated=-4.5/5.04
Mean/std last 7 day morning temperature, for true=-4.17/4.87, for generated=-6.16/4.47
Mean/std last 30 day morning temperature, for true=-4.92/3.56, for generated=-5.12/3.14
In [119]:
px.scatter_matrix(df_tdlm[['TEMPERATURE_MORNING_C', 'temp_morning_7d', 'temp_morning_30d', 'elevation_up_snow', 'elevation_down_snow']], opacity=0.1, 
                  title='Trou de la Mouche, temperature-elevation correlations of generated data', height=600)

Global warming impact

All temperature are raised by 4 degrees

Global warming has been found to be doubled in the Alps:

In [148]:
delta_temp_range = [1, 2, 3, 3.5, 4, 6]
tdlm_warm_series = {}
tdlm_warm_medians = pd.DataFrame(columns=['delta_temperature', 
                                          'elevation_up_median', 
                                          'elevation_down_median'])
tdlm_warm_medians = tdlm_warm_medians.append({'delta_temperature': 0, 
                             'elevation_up_median': df_tdlm['elevation_up_snow'].median(),
                             'elevation_down_median': df_tdlm['elevation_down_snow'].median(),
                            'morning_temperature': df_tdlm['TEMPERATURE_MORNING_C'].median()}, 
                             ignore_index=True)
In [149]:
for delta_temp in delta_temp_range:
    c2_warm = [*[shift_scaled(scaler, used_cols.index(feature), tdlm_c2[:,i], delta_temp) for i, feature in enumerate(temperature_features.keys())], tdlm_c2[:,-1]]
    tdlm_c2_warm = pd.DataFrame(np.array(c2_warm).T, columns=features_c2)

    tdlm_c3_warm = gen_c3.predict(np.c_[tdlm_c1, 
                                        tdlm_c2_warm,
                                        np.random.normal(0, 1, [tdlm_num, num_latent_c3])])

    df_warm = pd.DataFrame(scaler.inverse_transform(np.c_[tdlm_c1, 
                                                          tdlm_c2_warm,
                                                          tdlm_c3_warm]), 
                           columns=used_cols)

    df_warm['condition_rating'] = df_warm['condition_rating_num'].round().clip(0, 4).replace(rating_unmapper)

    tdlm_warm_medians = tdlm_warm_medians.append({'delta_temperature': delta_temp, 
                             'elevation_up_median': df_warm['elevation_up_snow'].median(),
                             'elevation_down_median': df_warm['elevation_down_snow'].median(),
                             'morning_temperature': df_warm['TEMPERATURE_MORNING_C'].median()}, 
                             ignore_index=True)
    # tdlm_scores_warm = tf.sigmoid(discriminator2.predict(np.c_[tdlm_c1, 
    #                                                           tdlm_c2_warm, 
    #                                                           tdlm_c3_warm]))
    #tdlm_fooled_warm = tdlm_scores_warm >= 0.5
    #tdlm_scores_warm.numpy().mean(), tdlm_fooled_warm.numpy().mean()
    tdlm_warm_series.update({delta_temp: df_warm})
In [150]:
tdlm_warm_medians.set_index('delta_temperature', inplace=True)
In [151]:
px.line(tdlm_warm_medians[['elevation_up_median', 'elevation_down_median']].rename(columns={'elevation_up_median': 'Skis on',
               'elevation_down_median': 'Skis off'}), 
        labels={'delta_temperature': 'Temperature increase compared to 2010s [°C]', 'value':'[m]'},
       title='Median elevation for generated outing reports')

Elevations with global warming

In [152]:
delta_temp = 3.5
df_tdlm_warm = tdlm_warm_series[delta_temp]
In [153]:
df_tdlm_warm[['elevation_up_snow', 'elevation_down_snow']].quantile([0.50, 0.60, 0.70, 0.80])
Out[153]:
elevation_up_snow elevation_down_snow
0.5 1455.788946 1454.811904
0.6 1505.276276 1504.275263
0.7 1556.602639 1555.078354
0.8 1619.889539 1617.405797
In [154]:
print(f"Skis on on the way up median: now = {df_tdlm['elevation_up_snow'].median():.1f}, with global warming of {delta_temp}°C = {df_tdlm_warm['elevation_up_snow'].median():.1f}")
print(f"Skis on on the way up median: now = {df_tdlm['elevation_down_snow'].median():.1f}, with global warming of {delta_temp}°C = {df_tdlm_warm['elevation_down_snow'].median():.1f}")
fig = sp.make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=['Skis on, way up', 'Skis off, way down', 'Max'],
                      x_title='Trou de la Mouche, generated elevations [m] as cumulative histograms')
bins = {'start': 900, 'end': 2000, 'size': 25}
fig.add_histogram(x=df_tdlm['elevation_up_snow'], xbins=bins, name='now', histnorm='percent', cumulative_enabled=True, marker=style_generated, row=1, col=1)
fig.add_histogram(x=df_tdlm_warm['elevation_up_snow'], xbins=bins, name='with warming', cumulative_enabled=True, histnorm='percent', marker=style_warm, row=1, col=1)
fig.add_histogram(x=df_tdlm['elevation_down_snow'], xbins=bins, name='now', histnorm='percent', cumulative_enabled=True, marker=style_generated, row=1, col=2)
fig.add_histogram(x=df_tdlm_warm['elevation_down_snow'], xbins=bins, name='with warming', histnorm='percent', cumulative_enabled=True, marker=style_warm, row=1, col=2)
Skis on on the way up median: now = 1343.7, with global warming of 3.5°C = 1455.8
Skis on on the way up median: now = 1348.3, with global warming of 3.5°C = 1454.8

Condition ratings with warming

In [125]:
print(f'Mean/std condition rating, now={df_tdlm.condition_rating_num.mean():.3g}/{df_tdlm.condition_rating_num.std():.3g}, with warming={df_tdlm_warm.condition_rating_num.mean():.3g}/{df_tdlm_warm.condition_rating_num.std():.3g}, ')
fig = go.Figure(layout=dict(title='Trou de la Mouche, condition rating', bargroupgap=0.01, 
                            xaxis=dict(categoryorder='array', categoryarray=list(rating_unmapper.values()), type="category"), yaxis=dict(title='%')))
fig.add_histogram(x=df_tdlm.condition_rating, name='now', histnorm='percent', marker=style_generated)
fig.add_histogram(x=df_tdlm_warm.condition_rating, name='with warming', histnorm='percent', marker=style_warm)
Mean/std condition rating, now=2.81/0.648, with warming=2.81/0.664, 
In [ ]: