Closed Mr-Geekman closed 1 year ago
Script for testing describe
:
import time
import json
import numpy as np
import pandas as pd
from loguru import logger
from etna.models import NaiveModel
from etna.datasets import TSDataset, generate_ar_df
from etna.metrics import MAE
from etna.pipeline import Pipeline
HORIZON = 14
def make_df(num_segments: int, num_features: int, num_periods: int, random_state: int = 0) -> pd.DataFrame:
rng = np.random.default_rng(random_state)
df = generate_ar_df(
periods=num_periods, start_time="2020-01-01", n_segments=num_segments
)
for i in range(num_features):
# add int column
df[f"new_int_{i}"] = rng.integers(low=-100, high=100, size=df.shape[0])
return df
def check_time(num_segments: int, num_features: int, num_periods: int = 365):
df = make_df(num_segments=num_segments, num_features=num_features, num_periods=num_periods)
df_wide = TSDataset.to_dataset(df)
ts = TSDataset(df=df_wide, freq="D")
start_time = time.perf_counter()
_ = ts.describe()
elapsed_time = time.perf_counter() - start_time
return elapsed_time
def main():
num_segments = [10, 100, 1000, 10_000, 100_000]
num_features = [0, 3, 10]
results = []
for cur_num_segments in num_segments:
for cur_num_features in num_features:
time_result = check_time(num_segments=cur_num_segments, num_features=cur_num_features)
record = {"num_segments": cur_num_segments, "num_features": cur_num_features, "time": time_result}
results.append(record)
logger.info(json.dumps(record))
json.dump(results, open("records.json", "w"), indent=2)
if __name__ == "__main__":
main()
Results before optimization:
[
{
"num_segments": 10,
"num_features": 0,
"time": 0.007862442000000414
},
{
"num_segments": 10,
"num_features": 3,
"time": 0.00775050900000096
},
{
"num_segments": 10,
"num_features": 10,
"time": 0.00862645999999856
},
{
"num_segments": 100,
"num_features": 0,
"time": 0.06804819900000147
},
{
"num_segments": 100,
"num_features": 3,
"time": 0.05528060099999976
},
{
"num_segments": 100,
"num_features": 10,
"time": 0.05490351599999954
},
{
"num_segments": 1000,
"num_features": 0,
"time": 0.511956906
},
{
"num_segments": 1000,
"num_features": 3,
"time": 0.5077033259999997
},
{
"num_segments": 1000,
"num_features": 10,
"time": 0.49680727800000035
},
{
"num_segments": 10000,
"num_features": 0,
"time": 5.198245515000002
},
{
"num_segments": 10000,
"num_features": 3,
"time": 5.023976880999999
},
{
"num_segments": 10000,
"num_features": 10,
"time": 5.116792693999997
},
{
"num_segments": 100000,
"num_features": 0,
"time": 50.777624478999996
},
{
"num_segments": 100000,
"num_features": 3,
"time": 51.87359783100001
},
{
"num_segments": 100000,
"num_features": 10,
"time": 62.32446584499996
}
]
Results after optimization:
[
{
"num_segments": 10,
"num_features": 0,
"time": 0.006445242999999934
},
{
"num_segments": 10,
"num_features": 3,
"time": 0.005044411000000082
},
{
"num_segments": 10,
"num_features": 10,
"time": 0.00412322800000009
},
{
"num_segments": 100,
"num_features": 0,
"time": 0.007066482000000818
},
{
"num_segments": 100,
"num_features": 3,
"time": 0.006966671999999008
},
{
"num_segments": 100,
"num_features": 10,
"time": 0.006875658000000229
},
{
"num_segments": 1000,
"num_features": 0,
"time": 0.015869262000000717
},
{
"num_segments": 1000,
"num_features": 3,
"time": 0.018922749999999766
},
{
"num_segments": 1000,
"num_features": 10,
"time": 0.019158535000000754
},
{
"num_segments": 10000,
"num_features": 0,
"time": 0.05820048300000025
},
{
"num_segments": 10000,
"num_features": 3,
"time": 0.07253477299999922
},
{
"num_segments": 10000,
"num_features": 10,
"time": 0.0792398090000006
},
{
"num_segments": 100000,
"num_features": 0,
"time": 0.47934153599999973
},
{
"num_segments": 100000,
"num_features": 3,
"time": 0.6095439070000026
},
{
"num_segments": 100000,
"num_features": 10,
"time": 0.9615795119999859
}
]
Results for script from #1338:
[
{
"num_segments": 10,
"num_features": 0,
"time": 0.18644669199999964
},
{
"num_segments": 10,
"num_features": 3,
"time": 0.44299768199999967
},
{
"num_segments": 10,
"num_features": 10,
"time": 0.3184416309999998
},
{
"num_segments": 100,
"num_features": 0,
"time": 0.40837533100000023
},
{
"num_segments": 100,
"num_features": 3,
"time": 0.49695419899999926
},
{
"num_segments": 100,
"num_features": 10,
"time": 0.6302544880000003
},
{
"num_segments": 1000,
"num_features": 0,
"time": 2.3735116309999995
},
{
"num_segments": 1000,
"num_features": 3,
"time": 2.3557946890000014
},
{
"num_segments": 1000,
"num_features": 10,
"time": 3.484642255999999
},
{
"num_segments": 10000,
"num_features": 0,
"time": 18.414344812
},
{
"num_segments": 10000,
"num_features": 3,
"time": 23.947836302000006
},
{
"num_segments": 10000,
"num_features": 10,
"time": 37.481222474999996
}
]
🚀 Deployed on https://deploy-preview-1344--etna-docs.netlify.app
Merging #1344 (d7bdcf5) into master (ddc1711) will increase coverage by
0.30%
. The diff coverage is100.00%
.
:exclamation: Your organization is not using the GitHub App Integration. As a result you may experience degraded service beginning May 15th. Please install the Github App Integration for your organization. Read more.
@@ Coverage Diff @@
## master #1344 +/- ##
==========================================
+ Coverage 88.84% 89.15% +0.30%
==========================================
Files 204 204
Lines 12665 12675 +10
==========================================
+ Hits 11252 11300 +48
+ Misses 1413 1375 -38
Files Changed | Coverage Δ | |
---|---|---|
etna/datasets/tsdataset.py | 93.09% <100.00%> (+0.13%) |
:arrow_up: |
... and 4 files with indirect coverage changes
:mega: We’re building smart automated test selection to slash your CI/CD build times. Learn more
Before submitting (must do checklist)
Proposed Changes
Look at #1341.
Closing issues
Closes #1341.