Open MichaelYin1994 opened 4 years ago
Hi,
I updated the computation of the constraint regions in tslearn in PR #104 . Later, PR #31 by Hicham Janati updated the computation of the constraint regions in pyts so that they are tighter when the two time series have different lengths. I don't think that there is a consensus on the definition of the regions for time series with different lengths.
Here is an illustration for pyts and tslearn with a time series of length 8 and another one of length 4:
In the figures I plot the lines that define the constraint regions. Any point inside the lines is a valid point, any point outside the lines is not valid. I also plot the diagonale in black.
For Sakoe-Chiba bands, you can see that for window_size=0
, pyts returns the tightest possible region, while tslearn adds a large band whose height corresponds to the difference of the lengths of the time series.
Likewise for Itakura parallelograms, pyts returns a tighter region than tslearn. Note that the max_slope
parameter is relative in pyts but absolute in tslearn, which is why I multiplied by two the max_slope
values in tslearn.
For the FastDTW algorithm, I don't have checked the code of the fastdtw
package so I can't comment, but there is an issue mentioned in #72 that I have not merged yet, so it might explain the differences.
If you see differences between packages regarding DTW computations, it is very likely that they come from the computation of the constraint regions.
Hope this helps a bit and sorry for the delay.
import matplotlib.pyplot as plt
import numpy as np
from pyts.metrics import sakoe_chiba_band
from pyts.metrics.dtw import _check_sakoe_chiba_params
from tslearn.metrics import sakoe_chiba_mask
def plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=0.5, ax=None):
"""Plot the Sakoe-Chiba band."""
region = sakoe_chiba_band(n_timestamps_1, n_timestamps_2, window_size)
scale, horizontal_shift, vertical_shift = \
_check_sakoe_chiba_params(n_timestamps_1, n_timestamps_2, window_size)
mask = np.zeros((n_timestamps_2, n_timestamps_1))
for i, (j, k) in enumerate(region.T):
mask[j:k, i] = 1.
plt.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
lower_bound = scale * (x - horizontal_shift) - vertical_shift
upper_bound = scale * (x + horizontal_shift) + vertical_shift
plt.plot(x, lower_bound, 'b', lw=2)
plt.plot(x, upper_bound, 'g', lw=2)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-0.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(color='b', which='minor', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
def plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=0.5, ax=None):
"""Plot the Sakoe-Chiba band."""
vertical_shift = window_size
mask = sakoe_chiba_mask(n_timestamps_1, n_timestamps_2, window_size)
mask[mask == 0.] = 1.
mask[np.isinf(mask)] = 0.
mask = mask.T
plt.imshow(mask, origin='lower', cmap='Wistia', vmin=0, vmax=1)
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
lower_bound = x - vertical_shift - abs(n_timestamps_1 - n_timestamps_2)
upper_bound = x + vertical_shift
plt.plot(x, lower_bound, 'b', lw=2)
plt.plot(x, upper_bound, 'g', lw=2)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-0.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(color='b', which='minor', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
n_timestamps_1, n_timestamps_2 = 8, 4
plt.figure(figsize=(12, 6))
ax = plt.subplot(2, 2, 1)
plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=0, ax=ax)
plt.title('pyts, window-size = 0', fontsize=18)
ax = plt.subplot(2, 2, 2)
plot_sakoe_chiba_pyts(n_timestamps_1, n_timestamps_2, window_size=2, ax=ax)
plt.title('pyts, window-size = 2', fontsize=18)
ax = plt.subplot(2, 2, 3)
plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=0, ax=ax)
plt.title('tslearn, window-size = 0', fontsize=18)
ax = plt.subplot(2, 2, 4)
plot_sakoe_chiba_tslearn(n_timestamps_1, n_timestamps_2, window_size=2, ax=ax)
plt.title('tslearn, window-size = 2', fontsize=18)
plt.suptitle('Sakoe-Chiba band', y=1.02, fontsize=24)
plt.subplots_adjust(hspace=0.3)
import matplotlib.pyplot as plt
import numpy as np
from pyts.metrics import itakura_parallelogram
from pyts.metrics.dtw import _get_itakura_slopes
from tslearn.metrics import itakura_mask
def plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=1., ax=None):
"""Plot Itakura parallelogram."""
region = itakura_parallelogram(n_timestamps_1, n_timestamps_2, max_slope)
max_slope, min_slope = _get_itakura_slopes(
n_timestamps_1, n_timestamps_2, max_slope)
mask = np.zeros((n_timestamps_2, n_timestamps_1))
for i, (j, k) in enumerate(region.T):
mask[j:k, i] = 1.
plt.imshow(mask, origin='lower', cmap='Wistia')
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
max_slope * np.arange(-1, sz + 1)
up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
min_slope * np.arange(-1, sz + 1)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
plt.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
plt.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
plt.plot(x, low_max_line, 'g', lw=1.5)
plt.plot(x, up_min_line, 'y', lw=1.5)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(which='minor', color='b', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
def plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=1., ax=None):
"""Plot Itakura parallelogram."""
mask = itakura_mask(n_timestamps_1, n_timestamps_2, max_slope)
mask[mask == 0.] = 1.
mask[np.isinf(mask)] = 0.
mask = mask.T
plt.imshow(mask, origin='lower', cmap='Wistia')
sz = max(n_timestamps_1, n_timestamps_2)
x = np.arange(-1, sz + 1)
ratio = (n_timestamps_1 - 1) / (n_timestamps_2 - 1)
max_slope *= ratio
min_slope = 1 / max_slope
low_max_line = ((n_timestamps_2 - 1) - max_slope * (n_timestamps_1 - 1)) +\
max_slope * np.arange(-1, sz + 1)
up_min_line = ((n_timestamps_2 - 1) - min_slope * (n_timestamps_1 - 1)) +\
min_slope * np.arange(-1, sz + 1)
diag = (n_timestamps_2 - 1) / (n_timestamps_1 - 1) * np.arange(-1, sz + 1)
plt.plot(x, diag, 'black', lw=1)
plt.plot(x, max_slope * np.arange(-1, sz + 1), 'b', lw=1.5)
plt.plot(x, min_slope * np.arange(-1, sz + 1), 'r', lw=1.5)
plt.plot(x, low_max_line, 'g', lw=1.5)
plt.plot(x, up_min_line, 'y', lw=1.5)
for i in range(n_timestamps_1):
for j in range(n_timestamps_2):
plt.plot(i, j, 'o', color='k', ms=3)
ax.set_xticks(np.arange(-.5, n_timestamps_1, 1), minor=True)
ax.set_yticks(np.arange(-.5, n_timestamps_2, 1), minor=True)
plt.grid(which='minor', color='b', linestyle='--', linewidth=1)
plt.xticks(np.arange(0, n_timestamps_1, 1))
plt.yticks(np.arange(0, n_timestamps_2, 1))
plt.xlim((-0.5, n_timestamps_1 - 0.5))
plt.ylim((-0.5, n_timestamps_2 - 0.5))
n_timestamps_1, n_timestamps_2 = 8, 4
plt.figure(figsize=(12, 6))
ax = plt.subplot(2, 2, 1)
plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=1., ax=ax)
plt.title('pyts, max-slope = 1.', fontsize=18)
ax = plt.subplot(2, 2, 2)
plot_itakura_pyts(n_timestamps_1, n_timestamps_2, max_slope=2., ax=ax)
plt.title('pyts, max-slope = 2.', fontsize=18)
ax = plt.subplot(2, 2, 3)
plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=2., ax=ax)
plt.title('tslearn, max-slope = 2.', fontsize=18)
ax = plt.subplot(2, 2, 4)
plot_itakura_tslearn(n_timestamps_1, n_timestamps_2, max_slope=4., ax=ax)
plt.title('tslearn, max-slope = 4.', fontsize=18)
plt.suptitle('Itakura parallelogram', y=1.02, fontsize=24)
plt.subplots_adjust(hspace=0.3)
Description
Please forgive me for my poor English since English is not my native language.
I have read the source code and Sakoe_Chiba band generation examples from tslearn, pyts. The generation manner of the Sakoe_Chiba seems different between pyts and tslearn, which leads to different calculation results when comparing 2 sequences with different lengths. I have also read the source code of fastdtw(https://pypi.org/project/fastdtw/) when the radius parameter is different, the calculation results between fastdtw and pyts are also varied.
Codes
Versions
NumPy 1.18.1 SciPy 1.4.1 Scikit-Learn 0.22.1 Numba 0.49.1 Pyts 0.11.0 tslearn: '0.4.1' fastdtw: See pypi