stefan-jansen / machine-learning-for-trading

Code for Machine Learning for Algorithmic Trading, 2nd edition.
https://ml4trading.io
12.57k stars 4.03k forks source link

problem in Long-Short Strategy, Part 1: Preparing Alpha Factors and Features #300

Closed ishikabansal77 closed 1 year ago

ishikabansal77 commented 1 year ago

def compute_bb(close): high, mid, low = BBANDS(close, timeperiod=20) return pd.DataFrame({'bb_high': high, 'bb_low': low}, index=close.index)

def compute_bb(close): high, mid, low = BBANDS(close, timeperiod=20) return pd.DataFrame({'bb_high': high, 'bb_low': low}, index=close.index) ​ bb_data = prices.groupby(level='symbol')['close'].apply(compute_bb) prices = pd.concat([prices, bb_data], axis=1)

ValueError Traceback (most recent call last) Cell In[33], line 1 ----> 1 prices = pd.concat([prices, bb_data], axis=1)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\reshape\concat.py:372, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy) 369 elif copy and using_copy_on_write(): 370 copy = False --> 372 op = _Concatenator( 373 objs, 374 axis=axis, 375 ignore_index=ignore_index, 376 join=join, 377 keys=keys, 378 levels=levels, 379 names=names, 380 verify_integrity=verify_integrity, 381 copy=copy, 382 sort=sort, 383 ) 385 return op.get_result()

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\reshape\concat.py:563, in _Concatenator.init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort) 560 self.verify_integrity = verify_integrity 561 self.copy = copy --> 563 self.new_axes = self._get_new_axes()

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\reshape\concat.py:633, in _Concatenator._get_new_axes(self) 631 def _get_new_axes(self) -> list[Index]: 632 ndim = self._get_result_dim() --> 633 return [ 634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) 635 for i in range(ndim) 636 ]

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\reshape\concat.py:634, in (.0) 631 def _get_new_axes(self) -> list[Index]: 632 ndim = self._get_result_dim() 633 return [ --> 634 self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) 635 for i in range(ndim) 636 ]

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\reshape\concat.py:640, in _Concatenator._get_comb_axis(self, i) 638 def _get_comb_axis(self, i: AxisInt) -> Index: 639 data_axis = self.objs[0]._get_block_manager_axis(i) --> 640 return get_objs_combined_axis( 641 self.objs, 642 axis=data_axis, 643 intersect=self.intersect, 644 sort=self.sort, 645 copy=self.copy, 646 )

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\api.py:95, in get_objs_combined_axis(objs, intersect, axis, sort, copy) 71 """ 72 Extract combined index: return intersection or union (depending on the 73 value of "intersect") of indexes on given axis, or None if all objects (...) 92 Index 93 """ 94 obs_idxes = [obj._get_axis(axis) for obj in objs] ---> 95 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\api.py:148, in _get_combined_index(indexes, intersect, sort, copy) 146 index = index.intersection(other) 147 else: --> 148 index = union_indexes(indexes, sort=False) 149 index = ensure_index(index) 151 if sort:

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\api.py:293, in union_indexes(indexes, sort) 290 result = indexes[0] 292 for other in indexes[1:]: --> 293 result = result.union(other, sort=None if sort else False) 294 return result 296 elif kind == "array":

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\base.py:3200, in Index.union(self, other, sort) 3197 return result.sort_values() 3198 return result -> 3200 result = self._union(other, sort=sort) 3202 return self._wrap_setop_result(other, result)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\multi.py:3546, in MultiIndex._union(self, other, sort) 3541 return MultiIndex.from_arrays( 3542 zip(*result), sortorder=None, names=result_names 3543 ) 3545 else: -> 3546 right_missing = other.difference(self, sort=False) 3547 if len(right_missing): 3548 result = self.append(right_missing)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\base.py:3501, in Index.difference(self, other, sort) 3498 return result.sort_values() 3499 return result -> 3501 result = self._difference(other, sort=sort) 3502 return self._wrap_difference_result(other, result)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\base.py:3509, in Index._difference(self, other, sort) 3504 def _difference(self, other, sort): 3505 # overridden by RangeIndex 3507 this = self.unique() -> 3509 indexer = this.get_indexer_for(other) 3510 indexer = indexer.take((indexer != -1).nonzero()[0]) 3512 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\base.py:5859, in Index.get_indexer_for(self, target) 5841 """ 5842 Guaranteed return of an indexer even when non-unique. 5843 (...) 5856 array([0, 2]) 5857 """ 5858 if self._index_as_unique: -> 5859 return self.getindexer(target) 5860 indexer, = self.get_indexer_non_unique(target) 5861 return indexer

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\base.py:3802, in Index.get_indexer(self, target, method, limit, tolerance) 3797 target = target.astype(dtype, copy=False) 3798 return this._get_indexer( 3799 target, method=method, limit=limit, tolerance=tolerance 3800 ) -> 3802 return self._get_indexer(target, method, limit, tolerance)

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\base.py:3823, in Index._get_indexer(self, target, method, limit, tolerance) 3820 engine = self._engine 3821 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" 3822 # has no attribute "_extract_level_codes" -> 3823 tgt_values = engine._extract_level_codes( # type: ignore[union-attr] 3824 target 3825 ) 3826 else: 3827 tgt_values = target._get_engine_target()

File ~\anaconda3\envs\baclass\lib\site-packages\pandas_libs\index.pyx:714, in pandas._libs.index.BaseMultiIndexCodesEngine._extract_level_codes()

File ~\anaconda3\envs\baclass\lib\site-packages\pandas\core\indexes\multi.py:143, in MultiIndexUIntEngine._codes_to_ints(self, codes) 126 """ 127 Transform combination(s) of uint64 in one uint64 (each), in a strictly 128 monotonic way (i.e. respecting the lexicographic order of integer (...) 139 Integer(s) representing one combination (each). 140 """ 141 # Shift the representation of each level by the pre-calculated number 142 # of bits: --> 143 codes <<= self.offsets 145 # Now sum and OR are in fact interchangeable. This is a simple 146 # composition of the (disjunct) significant bits of each level (i.e. 147 # each column in "codes") in a single positive integer: 148 if codes.ndim == 1: 149 # Single key

ValueError: operands could not be broadcast together with shapes (2061238,2) (3,) (2061238,2)

stefan-jansen commented 1 year ago

Something is off with the files you are trying to concatenate, I'd recommend tracing where they come from and check why they do not have the same index length.