pydata / bottleneck

Fast NumPy array functions written in C
BSD 2-Clause "Simplified" License
1.05k stars 101 forks source link

Ignore length check #434

Open wukan1986 opened 1 year ago

wukan1986 commented 1 year ago

Ignore length check

when groupby, the data length changes. If bottleneck could be the same as talib and not throw any errors, it would be great

import numpy as np
import pandas as pd
import talib as ta
import bottleneck as bn

pd._testing._N = 10
pd._testing._K = 4

df = pd._testing.makeTimeDataFrame()
df.iloc[:5, 2:] = np.nan
print(df)
"""
                   A         B         C         D
2000-01-03 -0.871075  0.698964       NaN       NaN
2000-01-04 -0.096067  0.305798       NaN       NaN
2000-01-05 -1.691204 -1.157270       NaN       NaN
2000-01-06 -1.538781  0.688315       NaN       NaN
2000-01-07 -0.435411  0.337505       NaN       NaN
2000-01-10 -0.874476 -0.291829  0.144669  0.594654
2000-01-11  0.613167 -0.082849  1.034253 -0.036335
2000-01-12  0.452147  0.610120  1.045389  1.367216
2000-01-13  0.401962 -0.193553  0.182087 -1.482994
2000-01-14 -1.078286  0.129149  0.491651 -0.000052
"""

df = pd.DataFrame(df.stack(), columns=['CLOSE'])
df.index.names = ['date', 'asset']
print(df.tail())
"""
                     CLOSE
date       asset          
2000-01-13 D     -1.482994
2000-01-14 A     -1.078286
           B      0.129149
           C      0.491651
           D     -0.000052
"""

df['SMA'] = df['CLOSE'].groupby('asset', group_keys=False).apply(lambda x: ta.SMA(x, 5))
df['move_mean'] = df['CLOSE'].groupby('asset', group_keys=False).apply(lambda x: pd.Series(bn.move_mean(x, 5), index=x.index))
print(df.tail())
"""
                     CLOSE       SMA  move_mean
date       asset                               
2000-01-13 D     -1.482994       NaN        NaN
2000-01-14 A     -1.078286 -0.097097  -0.097097
           B      0.129149  0.034208   0.034208
           C      0.491651  0.579610   0.579610
           D     -0.000052  0.088498   0.088498
"""

df['SMA'] = df['CLOSE'].groupby('asset', group_keys=False).apply(lambda x: ta.SMA(x, 8))
df['move_mean'] = df['CLOSE'].groupby('asset', group_keys=False).apply(lambda x: pd.Series(bn.move_mean(x, 8), index=x.index))
print(df.tail())
"""
    df['move_mean'] = df['CLOSE'].groupby('asset', group_keys=False).apply(lambda x: pd.Series(bn.move_mean(x, 8), index=x.index))
                                                                                               ^^^^^^^^^^^^^^^^^^
ValueError: Moving window (=8) must between 1 and 5, inclusive
"""

There may be two areas that need to be modified https://github.com/pydata/bottleneck/blob/master/bottleneck/src/move_template.c#L993 https://github.com/pydata/bottleneck/blob/master/bottleneck/src/nonreduce_axis_template.c#L62

wukan1986 commented 1 year ago

https://github.com/TA-Lib/ta-lib-python/issues/585

same problem. keep silent, do not throw exception