Closed SverreNystad closed 7 months ago
def remove_outliers(df, value_col, period = None, robust = True): serie = df[value_col] res = STL(serie, period = period, robust = robust).fit() resid = res.resid q1 = resid.quantile(0.25) q3 = resid.quantile(0.75) iqr = q3 - q1 lower = q1 - (3*iqr) upper = q3 + (3*iqr) anomalies = serie[(resid < lower) | (resid >= upper)] df = df.assign(anomaly = np.where(df[value_col].index.isin(anomalies.index), True, False)) df["value_corrected"] = np.where(df["anomaly"] == True, np.NaN, df[value_col]) df.interpolate(method = "linear", inplace=True) df["value_corrected"] = np.where(df["value_corrected"].isna(), df[value_col], df["value_corrected"]) return df
This made it worse