Inconsistent calculation results between tslearn, pyts and fastdtw

Hi,

I updated the computation of the constraint regions in tslearn in PR #104 . Later, PR #31 by Hicham Janati updated the computation of the constraint regions in pyts so that they are tighter when the two time series have different lengths. I don't think that there is a consensus on the definition of the regions for time series with different lengths.

Here is an illustration for pyts and tslearn with a time series of length 8 and another one of length 4:

In the figures I plot the lines that define the constraint regions. Any point inside the lines is a valid point, any point outside the lines is not valid. I also plot the diagonale in black.

For Sakoe-Chiba bands, you can see that for window_size=0, pyts returns the tightest possible region, while tslearn adds a large band whose height corresponds to the difference of the lengths of the time series.

Likewise for Itakura parallelograms, pyts returns a tighter region than tslearn. Note that the max_slope parameter is relative in pyts but absolute in tslearn, which is why I multiplied by two the max_slope values in tslearn.

For the FastDTW algorithm, I don't have checked the code of the fastdtw package so I can't comment, but there is an issue mentioned in #72 that I have not merged yet, so it might explain the differences.

If you see differences between packages regarding DTW computations, it is very likely that they come from the computation of the constraint regions.

Hope this helps a bit and sorry for the delay.

Code for Sakoe-Chiba band figure

import matplotlib.pyplot as plt
import numpy as np
from pyts.metrics import sakoe_chiba_band
from pyts.metrics.dtw import _check_sakoe_chiba_params
from tslearn.metrics import sakoe_chiba_mask

def plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=0.5, ax=None):
    """Plot the Sakoe-Chiba band."""
    region = sakoe_chiba_band(n_timestamps_1, n_timestamps_2, window_size)
    scale, horizontal_shift, vertical_shift = \
        _check_sakoe_chiba_params(n_timestamps_1, n_timestamps_2, window_size)
    mask = np.zeros((n_timestamps_2, n_timestamps_1))
    for i, (j, k) in enumerate(region.T):
        mask[j:k, i] = 1.

    plt.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)

    sz = max(n_timestamps_1, n_timestamps_2)
    x = np.arange(-1, sz + 1)
    lower_bound = scale * (x - horizontal_shift) - vertical_shift
    upper_bound = scale * (x + horizontal_shift) + vertical_shift
    plt.plot(x, lower_bound, 'b', lw=2)
    plt.plot(x, upper_bound, 'g', lw=2)
    diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
    plt.plot(x, diag, 'black', lw=1)

    for i in range(n_timestamps_1):
        for j in range(n_timestamps_2):
            plt.plot(i, j, 'o', color='k', ms=3)

    ax.set_xticks(np.arange(-0.5, n_timestamps_1, 1), minor=True)
    ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
    plt.grid(color='b', which='minor', linestyle='--', linewidth=1)
    plt.xticks(np.arange(0, n_timestamps_1, 1))
    plt.yticks(np.arange(0, n_timestamps_2, 1))
    plt.xlim((-0.5, n_timestamps_1 - 0.5))
    plt.ylim((-0.5, n_timestamps_2 - 0.5))

def plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=0.5, ax=None):
    """Plot the Sakoe-Chiba band."""
    vertical_shift = window_size
    mask = sakoe_chiba_mask(n_timestamps_1, n_timestamps_2, window_size)
    mask[mask == 0.] = 1.
    mask[np.isinf(mask)] = 0.
    mask = mask.T

    plt.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)

    sz = max(n_timestamps_1, n_timestamps_2)
    x = np.arange(-1, sz + 1)
    lower_bound = x - vertical_shift - abs(n_timestamps_1 - n_timestamps_2)
    upper_bound = x + vertical_shift
    plt.plot(x, lower_bound, 'b', lw=2)
    plt.plot(x, upper_bound, 'g', lw=2)
    diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
    plt.plot(x, diag, 'black', lw=1)

    for i in range(n_timestamps_1):
        for j in range(n_timestamps_2):
            plt.plot(i, j, 'o', color='k', ms=3)

    ax.set_xticks(np.arange(-0.5, n_timestamps_1, 1), minor=True)
    ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
    plt.grid(color='b', which='minor', linestyle='--', linewidth=1)
    plt.xticks(np.arange(0, n_timestamps_1, 1))
    plt.yticks(np.arange(0, n_timestamps_2, 1))
    plt.xlim((-0.5, n_timestamps_1 - 0.5))
    plt.ylim((-0.5, n_timestamps_2 - 0.5))

n_timestamps_1, n_timestamps_2 = 8, 4

plt.figure(figsize=(12, 6))
ax = plt.subplot(2, 2, 1)
plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=0, ax=ax)
plt.title('pyts, window-size = 0', fontsize=18)

ax = plt.subplot(2, 2, 2)
plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=2, ax=ax)
plt.title('pyts, window-size = 2', fontsize=18)

ax = plt.subplot(2, 2, 3)
plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=0, ax=ax)
plt.title('tslearn, window-size = 0', fontsize=18)

ax = plt.subplot(2, 2, 4)
plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=2, ax=ax)
plt.title('tslearn, window-size = 2', fontsize=18)

plt.suptitle('Sakoe-Chiba band', y=1.02, fontsize=24)
plt.subplots_adjust(hspace=0.3)

Code for Itakura parallelogram figure

import matplotlib.pyplot as plt
import numpy as np
from pyts.metrics import itakura_parallelogram
from pyts.metrics.dtw import _get_itakura_slopes
from tslearn.metrics import itakura_mask

def plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=1., ax=None):
    """Plot Itakura parallelogram."""
    region = itakura_parallelogram(n_timestamps_1, n_timestamps_2, max_slope)
    max_slope, min_slope = _get_itakura_slopes(
        n_timestamps_1, n_timestamps_2, max_slope)
    mask = np.zeros((n_timestamps_2, n_timestamps_1))
    for i, (j, k) in enumerate(region.T):
        mask[j:k, i] = 1.

    plt.imshow(mask, origin='lower', cmap='Wistia')

    sz = max(n_timestamps_1, n_timestamps_2)
    x = np.arange(-1, sz + 1)

    low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
        max_slope * np.arange(-1, sz + 1)
    up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
        min_slope * np.arange(-1, sz + 1)
    diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
    plt.plot(x, diag, 'black', lw=1)
    plt.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
    plt.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
    plt.plot(x, low_max_line, 'g', lw=1.5)
    plt.plot(x, up_min_line, 'y', lw=1.5)

    for i in range(n_timestamps_1):
        for j in range(n_timestamps_2):
            plt.plot(i, j, 'o', color='k', ms=3)

    ax.set_xticks(np.arange(-.5, n_timestamps_1, 1), minor=True)
    ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
    plt.grid(which='minor', color='b', linestyle='--', linewidth=1)
    plt.xticks(np.arange(0, n_timestamps_1, 1))
    plt.yticks(np.arange(0, n_timestamps_2, 1))
    plt.xlim((-0.5, n_timestamps_1 - 0.5))
    plt.ylim((-0.5, n_timestamps_2 - 0.5))

def plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=1., ax=None):
    """Plot Itakura parallelogram."""
    mask = itakura_mask(n_timestamps_1, n_timestamps_2, max_slope)
    mask[mask == 0.] = 1.
    mask[np.isinf(mask)] = 0.
    mask = mask.T

    plt.imshow(mask, origin='lower', cmap='Wistia')

    sz = max(n_timestamps_1, n_timestamps_2)
    x = np.arange(-1, sz + 1)
    ratio = (n_timestamps_1 - 1) / (n_timestamps_2 - 1)
    max_slope *= ratio
    min_slope = 1 / max_slope

    low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
        max_slope * np.arange(-1, sz + 1)
    up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
        min_slope * np.arange(-1, sz + 1)
    diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
    plt.plot(x, diag, 'black', lw=1)
    plt.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
    plt.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
    plt.plot(x, low_max_line, 'g', lw=1.5)
    plt.plot(x, up_min_line, 'y', lw=1.5)

    for i in range(n_timestamps_1):
        for j in range(n_timestamps_2):
            plt.plot(i, j, 'o', color='k', ms=3)

    ax.set_xticks(np.arange(-.5, n_timestamps_1, 1), minor=True)
    ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
    plt.grid(which='minor', color='b', linestyle='--', linewidth=1)
    plt.xticks(np.arange(0, n_timestamps_1, 1))
    plt.yticks(np.arange(0, n_timestamps_2, 1))
    plt.xlim((-0.5, n_timestamps_1 - 0.5))
    plt.ylim((-0.5, n_timestamps_2 - 0.5))

n_timestamps_1, n_timestamps_2 = 8, 4

plt.figure(figsize=(12, 6))
ax = plt.subplot(2, 2, 1)
plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=1., ax=ax)
plt.title('pyts, max-slope = 1.', fontsize=18)

ax = plt.subplot(2, 2, 2)
plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=2., ax=ax)
plt.title('pyts, max-slope = 2.', fontsize=18)

ax = plt.subplot(2, 2, 3)
plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=2., ax=ax)
plt.title('tslearn, max-slope = 2.', fontsize=18)

ax = plt.subplot(2, 2, 4)
plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=4., ax=ax)
plt.title('tslearn, max-slope = 4.', fontsize=18)

plt.suptitle('Itakura parallelogram', y=1.02, fontsize=24)
plt.subplots_adjust(hspace=0.3)

johannfaouzi / pyts

Inconsistent calculation results between tslearn, pyts and fastdtw #76

Description

Codes

Versions

Code for Sakoe-Chiba band figure

Code for Itakura parallelogram figure