Description

Thuy van du bao
Actions

Import library


import numpy as np
import tensorflow as tf
import IPython
import IPython.display
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

mpl.rcParams['figure.figsize'] = (8, 6) mpl.rcParams['axes.grid'] = False

- Prepare data for training, testing and validating
  - Create 3 folder is: `data`, `components` and `saved_model`
```python
!mkdir -p data
!mkdir -p components
!mkdir -p saved_model

Add data2.csv into folder data
In folder components. Add 2 file WindowGenerator.py and MultiStepModels.py by the following:
```
%%writefile components/WindowGenerator.py
```

import numpy as np import tensorflow as tf import IPython import IPython.display import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt

mpl.rcParams['figure.figsize'] = (8, 6) mpl.rcParams['axes.grid'] = False

class WindowGenerator(): def init(self, input_width, label_width, shift, train_df=None, val_df=None, test_df=None, label_columns=None):

Store the raw data.

    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
        self.label_columns_indices = {name: i for i, name in
                                      enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                                  enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

def split_window(self, features):
    # N: 3, A: 7, B: 19 -> A contains input_slice and labels_slice
    # slice(0, 6, None): begin: 0, size: 6, name: None
    # replace A from 7 to 6
    inputs = features[:, self.input_slice, :]
    # slice(6, None, None): begin: 6, size: None, name: None
    # replace A from 7 to 1
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
            axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

def plot(self, model=None, plot_col='height downstream', max_subplots=3):
    inputs, labels = self.example
    plt.figure(figsize=(12, 8))
    plot_col_index = self.column_indices[plot_col]
    max_n = min(max_subplots, len(inputs))
    for n in range(max_n):
        plt.subplot(max_n, 1, n+1)
        plt.ylabel(f'{plot_col} [normed]')
        plt.plot(self.input_indices, inputs[n, :, plot_col_index],
                 label='Inputs', marker='.', zorder=-10)

        if self.label_columns:
            label_col_index = self.label_columns_indices.get(plot_col, None)
        else:
            label_col_index = plot_col_index

        if label_col_index is None:
            continue

        plt.scatter(self.label_indices, labels[n, :, label_col_index],
                    edgecolors='k', label='Labels', c='#2ca02c', s=64)
        if model is not None:
            predictions = model(inputs)
            plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                        marker='X', edgecolors='k', label='Predictions', c='#ff7f0e', s=64)

        if n == 0:
            plt.legend()
    plt.xlabel('Time [h]')
    plt.show()

def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32,)
    ds = ds.map(self.split_window)

    return ds

@property
def train(self):
    return self.make_dataset(self.train_df)

@property
def val(self):
    return self.make_dataset(self.val_df)

@property
def test(self):
    return self.make_dataset(self.test_df)

@property
def example(self):
    # Get and cache an example batch of `input, label` for plotting.
    result = getattr(self, '_example', None)
    if result is None:
        # No example batch was found, so get one from the `.train` dataset
        result = next(iter(self.train))
        # And cache it for next time
        self._example = result
    return result

def __repr__(self):
    return "\n".join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

def __str__(self):
    return "\n".join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

```python
%%writefile components/MultiStepModels.py
import logging
import pandas as pd
import numpy as np
import tensorflow as tf
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

class FeedBack(tf.keras.Model):
    def __init__(self, units, out_steps, num_features):
        super().__init__()
        self.out_steps = out_steps
        self.units = units
        self.lstm_cell = tf.keras.layers.LSTMCell(units)
        # Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
        self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
        self.dense = tf.keras.layers.Dense(num_features)

    def warmup(self, inputs):
        # inputs.shape => (batch, time, features)
        # x.shape => (batch, lstm_units)
        x, *state = self.lstm_rnn(inputs)

        # predictions.shape => (batch, features)
        prediction = self.dense(x)
        return prediction, state

    def call(self, inputs, training=None):
        # Use a TensorArray to capture dynamically unrolled outputs.
        predictions = []
        # Initialize the lstm state
        prediction, state = self.warmup(inputs)

        # Insert the first prediction
        predictions.append(prediction)

        # Run the test of the prediction steps
        for n in range(1, self.out_steps):
            # Use the last prediction as input
            x = prediction
            # Execute one lstm step.
            x, state = self.lstm_cell(x, states=state, training=training)
            # Convert the lstm output to a prediction
            prediciton = self.dense(x)
            # Add the prediction to the output
            predictions.append(prediction)

        # predictions.shape => (time, batch, features)
        predictions = tf.stack(predictions)
        # predictions.shape => (batch, time, features)
        predictions = tf.transpose(predictions, [1, 0, 2])
        return predictions

class MultiStepLastBaseline(tf.keras.Model):
    def __init__(self, out_steps):
        super().__init__()
        self.out_steps = out_steps

    def call(self, inputs):
        return tf.tile(inputs[:, -1:, :], [1, self.out_steps, 1])

class RepeatBaseLine(tf.keras.Model):
    def call(self, inputs):
        return inputs

class MultiStepModels():
    MAX_EPOCHS = 20
    def __init__(self, out_steps, num_features, conv_width, multi_window, plot_col):
        self.out_steps = out_steps
        self.multi_window = multi_window
        self.conv_width = conv_width
        self.num_features = num_features
        self.plot_col = plot_col

    def compile_and_fit(self, model, window, patience=2):
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                          patience=patience,
                                                          mode='min')
        model.compile(loss=tf.losses.MeanSquaredError(),
                      optimizer=tf.optimizers.Adam(),
                      metrics=[tf.metrics.MeanAbsoluteError()])
        history = model.fit(window.train, epochs=self.MAX_EPOCHS,
                            validation_data = window.val,
                            callbacks=[early_stopping])
        return history

    def startApp(self):
        # # Baselines
        # last_baseline = MultiStepLastBaseline(self.out_steps)
        # last_baseline.compile(loss=tf.losses.MeanSquaredError(),
        #                       metrics=[tf.metrics.MeanAbsoluteError()])

        multi_val_performance = {}
        multi_performance = {}

        # multi_val_performance['Last'] = last_baseline.evaluate(self.multi_window.val)
        # multi_performance['Last'] = last_baseline.evaluate(self.multi_window.test, verbose=0)
        # plot1 = self.multi_window.plot(last_baseline, plot_col=self.plot_col)

        # # Repeat baselines
        # repeat_baseline = RepeatBaseLine()
        # repeat_baseline.compile(loss=tf.losses.MeanSquaredError(),
        #                         metrics=[tf.metrics.MeanAbsoluteError()])

        # multi_val_performance['Repeat'] = repeat_baseline.evaluate(self.multi_window.val)
        # multi_performance['Repeat'] = repeat_baseline.evaluate(self.multi_window.test, verbose=0)
        # plot2 = self.multi_window.plot(repeat_baseline, plot_col=self.plot_col)

        # Linear
        # multi_linear_model = tf.keras.Sequential([
        #     #Take the last time-step.
        #     # Shape [batch, time, feature] => [batch, 1, features]
        #     tf.keras.layers.Lambda(lambda x: x[:, -1, :]),
        #     # Shape => [batch, 1, out_steps*features]
        #     tf.keras.layers.Dense(self.out_steps*self.num_features,
        #                           kernel_initializer=tf.initializers.zeros()),
        #     # Shape => [batch, out_steps, features]
        #     tf.keras.layers.Reshape([self.out_steps, self.num_features])])

        # history = self.compile_and_fit(multi_linear_model, self.multi_window)
        # IPython.display.clear_output()

        # multi_val_performance['Linear'] = multi_linear_model.evaluate(self.multi_window.val)
        # multi_performance['Linear'] = multi_linear_model.evaluate(self.multi_window.test, verbose=0)
        # plot3 = self.multi_window.plot(multi_linear_model, plot_col=self.plot_col)

        # # CNN
        # multi_conv_model = tf.keras.Sequential([
        #     # Shape [batch, time, features] => [batch, CONV_WIDTH, features]
        #     tf.keras.layers.Lambda(lambda x: x[: , -self.conv_width:, :]),
        #     # Shape => [batch, 1, conv_units]
        #     tf.keras.layers.Conv1D(256, activation='relu', kernel_size=(self.conv_width)),
        #     # Shape => [batch, 1, out_steps*features]
        #     tf.keras.layers.Dense(self.out_steps*self.num_features,
        #                           kernel_initializer=tf.initializers.zeros()),
        #     # Shape => [batch, out_steps, features]
        #     tf.keras.layers.Reshape([self.out_steps, self.num_features])])

        # history = self.compile_and_fit(multi_conv_model, self.multi_window)
        # IPython.display.clear_output()

        # multi_val_performance['Conv'] = multi_conv_model.evaluate(self.multi_window.val)
        # multi_performance['Conv'] = multi_conv_model.evaluate(self.multi_window.test, verbose=0)
        # plot4 = self.multi_window.plot(multi_conv_model, plot_col=self.plot_col)

        # RNN
        multi_lstm_model = tf.keras.Sequential([
            # Shape [batch, time, features] => [batch, lstm_units]
            # Adding more `lstm_units` just overfits more quickly.
            tf.keras.layers.LSTM(128, return_sequences=False),
            # Shape => [batch, out_steps*features]
            tf.keras.layers.Dense(self.out_steps*self.num_features,
                                  kernel_initializer=tf.initializers.zeros()),
            # Shape => [batch, out_steps, features]
            tf.keras.layers.Reshape([self.out_steps, self.num_features])
            ])

        history = self.compile_and_fit(multi_lstm_model, self.multi_window)
        IPython.display.clear_output()

        multi_val_performance['LSTM'] = multi_lstm_model.evaluate(self.multi_window.val)
        multi_performance['LSTM'] = multi_lstm_model.evaluate(self.multi_window.test, verbose=0)
        multi_lstm_model.save('saved_model/multi_lstm_model')
        plot5 = self.multi_window.plot(multi_lstm_model, plot_col=self.plot_col)

        # Advanced: Autogressive model
        # RNN
        # feedback_model = FeedBack(units=32, out_steps=self.out_steps, num_features=self.num_features)
        # history = self.compile_and_fit(feedback_model, self.multi_window)
        # IPython.display.clear_output()

        # multi_val_performance['AR LSTM'] = feedback_model.evaluate(self.multi_window.val)
        # multi_performance['AR LSTM'] = feedback_model.evaluate(self.multi_window.test, verbose=0)
        # plot6 = self.multi_window.plot(feedback_model, plot_col=self.plot_col)

        # Performance
        x = np.arange(len(multi_performance))
        width = 0.3

        metric_index = multi_lstm_model.metrics_names.index('mean_absolute_error')
        val_mae = [v[metric_index] for v in multi_val_performance.values()]
        test_mae = [v[metric_index] for v in multi_performance.values()]

        plt.bar(x - 0.17, val_mae, width, label='Validation')
        plt.bar(x + 0.17, test_mae, width, label='Test')
        plt.xticks(ticks=x, labels=multi_performance.keys(), rotation=45)
        plt.ylabel(f'MAE (average over all times and outputs)')
        _ = plt.legend()
        plt.show()

Preprocessing data:
- 4
- 5
- 6

Training model:

Import library

from components.MultiStepModels import MultiStepModels
from components.WindowGenerator import WindowGenerator

Import label_column


#@title Create window for train, validation, test {display-mode: "form"}
label_column = "flow lake"  #@param {type: "string"}

label_columns = [] label_columns.append(label_column)

OUT_STEPS = 1 CONV_WIDTH = 3 multi_window = WindowGenerator( input_width=90, label_width=OUT_STEPS, shift=OUT_STEPS, train_df=train_df, val_df=val_df, test_df=test_df, label_columns=label_columns )

multiStepModels = MultiStepModels(out_steps=OUT_STEPS,num_features=num_features,conv_width=CONV_WIDTH,multi_window=multi_window,plot_col=label_column)

  - Start app:
```python
multiStepModels.startApp()

Load model:


new_model = tf.keras.models.load_model('saved_model/multi_lstm_model')

Check its architecture

new_model.summary()

  - Data prediction is taken from dataframe of test
```python
pred_df = pd.DataFrame(test_df.loc[1065:1065+90-1, :])
single_step_window = WindowGenerator(
    input_width=1, label_width=1, shift=1,
    train_df=pred_df,
    val_df=pred_df,
    test_df=pred_df,
    label_columns=['flow downstream'])
input, _ = single_step_window.example
column_indices = {name: i for i, name in
                  enumerate(pred_df.columns)}
plot_col_index = column_indices['flow downstream']
predictions = new_model(input)
val = predictions[0, :, plot_col_index]
val = val.numpy()*train_std + train_mean

Estimate

Medium
Tests

Run file src/test/app/MultiStepModelsTest.py


----------------------------------------------------------------------
Ran 1 test in 57.930s

chanelcolgate / hydroelectric-project

Thuy van du bao - Training and Predicting model #9

Description

Actions

Store the raw data.

4

5

6

Check its architecture

Estimate

Tests