Question about where "Feature map reduction" should be added

The model in this repo is a baseline model, which does not have these Feature Map Reduction layers that you mentioned. Your model should look more like this (the following code should run in Google Colab and give you an idea; but is far from beeing polished and a few - more detailed - aspects from the paper are missing):
# %%
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

def build_model(shape_X, args, verbose=1):
    sampling_factor = args['latent_sample_rate']
    i = tf.keras.Input(batch_shape=(None, shape_X[1], shape_X[2]))  

    e = i
    dil_layers = list()
    for k in args["dilations"]:
        e = tf.keras.layers.Conv1D(filters=args['nb_filters'], 
                                   kernel_size=args['kernel_size'], 
                                   activation="relu",
                                   padding=args['padding'], 
                                   dilation_rate=k,
                                   kernel_initializer=args['conv_kernel_init'],
                                   bias_initializer=args['conv_bias_init'])(e)

        e1 = tf.keras.layers.Conv1D(filters=args['nb_skip_filters'], 
                                    kernel_size=1,
                                    activation=args['activation_conv1d'], 
                                    padding=args['padding'],
                                    kernel_initializer=args['conv_kernel_init'],
                                    bias_initializer=args['conv_bias_init'])(e)

        # reduce size of feature map, if wanted
        if args['nb_skip_filters'] == args["nb_filters_map_reduce"]:
            e = e1
        elif args["nb_filters_map_reduce"] is not None:
            e = tf.keras.layers.Conv1D(filters=args['nb_filters_map_reduce'], 
                                       kernel_size=1, 
                                       activation="relu",
                                       padding=args['padding'],
                                       kernel_initializer=args['conv_kernel_init'],
                                       bias_initializer=args['conv_bias_init'])(e)
        dil_layers.append(e1)

    e = tf.keras.layers.Concatenate(axis=-1)(dil_layers)

    if args["stepwise_updownsample"]: # Stepwise downsampling
        # Here, we downsample the time series in multiple steps by a factor of 2
        layers = int(np.log2(args["latent_sample_rate"]))  
        lost_one = []
        length = shape_X[1]
        for j in range(layers):
            lost_one.append(length % 2 > 0)
            e = tf.keras.layers.Conv1D(filters=args['nb_filters_ae'], 
                                       kernel_size=args['kernel_size_ae'],
                                       activation='relu', 
                                       padding=args['padding'],
                                       kernel_initializer=args['conv_kernel_init'],
                                       bias_initializer=args['conv_bias_init'])(e)

            e = args['pooler'](pool_size=2, 
                               strides=None, 
                               padding='valid', 
                               data_format='channels_last')(e)
            length //= 2

        enc_flat = tf.keras.layers.Conv1D(filters=args['filters_bneck'], 
                                          kernel_size=1,
                                          activation=args['activation_conv1d'], 
                                          padding=args['padding'],
                                          kernel_initializer=args['conv_kernel_init'],
                                          bias_initializer=args['conv_bias_init'])(e)
        d = enc_flat
        for j in range(layers):
            d = tf.keras.layers.Conv1D(filters=args['nb_filters_ae'], 
                                       kernel_size=args['kernel_size_ae'],
                                       activation='relu', 
                                       padding=args['padding'],
                                       kernel_initializer=args['conv_kernel_init'],
                                       bias_initializer=args['conv_bias_init'])(d)

            # One could also use Upsampling1D()
            d = tf.keras.layers.Conv1DTranspose(filters=args['nb_filters_ae'], 
                                                kernel_size=args['kernel_size_ae'], 
                                                padding="same", 
                                                strides=2, 
                                                activation="relu")(d)
            if lost_one[-(j + 1)]:
                d = tf.keras.layers.ZeroPadding1D(padding=(0, 1))(d)
    else: # Hard downsampling
        enc_flat = tf.keras.layers.Conv1D(filters=args['filters_bneck'], 
                                          kernel_size=1,
                                          activation=args['activation_conv1d'], 
                                          padding=args['padding'])(e)

        # Do some average (max) pooling to get a compressed representation 
        # of the time series (e.g. a sequence of length 8)
        enc_pooled = args['pooler'](pool_size=sampling_factor, 
                                    strides=None, 
                                    padding='valid',
                                    data_format='channels_last')(enc_flat)

        # Maybe put the pooled values through a non-linear layer first?
        # Currently, just stick with identity function...
        enc_out = tf.keras.layers.Activation("linear")(enc_pooled)
        # ...
        # Now we should have a short sequence, which we will upsample 
        # again and then attempt to reconstruct the original series

        # One could also use Upsampling1D()
        d = tf.keras.layers.Conv1DTranspose(filters=16, # hard-coded ...
                                            kernel_size=4, # also hard-coded ...
                                            padding="same", 
                                            strides=sampling_factor,
                                            activation=args['activation_conv1d'])(enc_out)

    dil_layers = list()
    for k in reversed(args["dilations"]):
        d = tf.keras.layers.Conv1D(filters=args['nb_filters'], 
                                   kernel_size=args['kernel_size'], 
                                   activation="relu",
                                   padding=args['padding'], 
                                   dilation_rate=k,
                                   kernel_initializer=args['conv_kernel_init'],
                                   bias_initializer=args['conv_bias_init'])(d)
        d1 = tf.keras.layers.Conv1D(filters=args['nb_skip_filters'], 
                                    kernel_size=1,
                                    activation=args['activation_conv1d'], 
                                    padding=args['padding'],
                                    kernel_initializer=args['conv_kernel_init'],
                                    bias_initializer=args['conv_bias_init'])(d)

        # reduce size of feature map, if wanted
        if args['nb_skip_filters'] == args["nb_filters_map_reduce"]:
            d = d1
        elif args["nb_filters_map_reduce"] is not None:
            d = tf.keras.layers.Conv1D(filters=args['nb_filters_map_reduce'], 
                                       kernel_size=1, 
                                       activation="relu",
                                       padding=args['padding'],
                                       kernel_initializer=args['conv_kernel_init'],
                                       bias_initializer=args['conv_bias_init'])(d)

        dil_layers.append(d1)

    d = tf.keras.layers.Concatenate(axis=-1)(dil_layers)

    # Finally, put the concatenated outputs through a dense layer, 
    # to get the reconstructed signal
    o = tf.keras.layers.Dense(shape_X[2], activation='linear')(d)
    model = tf.keras.Model(inputs=[i], outputs=[o])

    if verbose > 1:
        model.summary()
        tf.keras.utils.plot_model(
            model,
            to_file="model.png",
        )

    return model

# %%
# The parameters are far from beeing optimal and were just guessed for 
# illustrative purposes. In general, it requires some
# time to get a feeling for the parameters and how to choose them. Also, the
# current model setup is quite rigid (e.g., there is no particular reason, 
# why certain layers should have the same amount of filters, etc.)
args = {
    'dilations': (1, 2, 4, 8, 16, 32, 64),
    'pooler': tf.keras.layers.AveragePooling1D,
    'padding': 'same',  # 'same', 'causal'
    'activation_conv1d': 'relu',
    'conv_bias_init': 'zeros',
    'conv_kernel_init': 'glorot_normal',
    'kernel_size': 8,
    'latent_sample_rate': 32, # This specifies the bottleneck...
    'filters_bneck': 4,  # ... as well as this...
    'nb_filters': 64,
    'kernel_size_ae': 4,
    'nb_filters_ae': 16,
    'nb_filters_map_reduce': 16,
    'nb_skip_filters': 8,
    'stepwise_updownsample': True, # False: Do a hard downsampling with 1 layer
  }

# %%
# Create some arbitrary training data (batch x time x channels)
# Use some time series length which is not a power of 2 for demonstration purposes
data_shape = 200, (1<<14) + 13, 3

train_X = np.sin(0.001 * np.arange(np.product(data_shape))).reshape(data_shape)
train_X += 0.05 * np.random.randn(*data_shape)
print("train_X.shape:", train_X.shape)

# %%
model = build_model(train_X.shape, args, verbose=2)

# Compile model
adam = tf.keras.optimizers.Adam(learning_rate=0.001, 
                                beta_1=0.9, 
                                beta_2=0.999, 
                                epsilon=1e-08, 
                                amsgrad=True)

model.compile(loss='logcosh', optimizer=adam, metrics=["mae", "mse", "logcosh"])

# %%
history = model.fit(train_X, train_X,
                    batch_size=16,
                    epochs=30,
                    validation_split=.1,
                    shuffle=True,
                    callbacks=None,
                    verbose=1)

# %%
ex = train_X[[33]] # 33rd time series
pred = model(ex)

plt.figure()
plt.plot(ex[0,:,0], label="original") # plot 0-th channel
plt.plot(pred[0,:,0], label="reconstructed")  # plot 0-th channel
plt.legend()
plt.show()
I hope this helps a bit. Otherwise, please let me know...
MarkusThill / bioma-tcn-ae

Question about where "Feature map reduction" should be added #3