fslaborg / Deedle

Easy to use .NET library for data and time series manipulation and for scientific programming
http://fslab.org/Deedle/
BSD 2-Clause "Simplified" License
924 stars 196 forks source link

Converted code to Deedle #528

Closed Hulkstance closed 3 years ago

Hulkstance commented 3 years ago

I'm recreating this from python (pandas). I'm new to the library and I'd like to have a feedback, if something could be done better. And @zyzhu, thanks for your examples on SO and here in the issues, By the way, the strategy differs from the python code, but the idea stays.

I don't think ZipInner is the most human-readable way to represent the following. It would be nice if you know a better way.

dataframe.loc[
    (
        qtpylib.crossed_above(dataframe['ema20'], dataframe['ema50']) &
        (dataframe['ha_close'] > dataframe['ema20']) &
        (dataframe['ha_open'] < dataframe['ha_close'])  # green bar
    ),
    'buy'] = 1

Full code

public abstract class BaseStrategy
{
    public Frame<int, string> AnalyzeTicker(Frame<int, string> df)
    {
        df = PopulateIndicators(df);
        df = PopulateBuyTrend(df);
        df = PopulateSellTrend(df);

        return df;
    }

    public abstract Frame<int, string> PopulateIndicators(Frame<int, string> df);

    public abstract Frame<int, string> PopulateBuyTrend(Frame<int, string> df);

    public abstract Frame<int, string> PopulateSellTrend(Frame<int, string> df);
}

public class RsiStrategy : BaseStrategy
{
    public override Frame<int, string> PopulateIndicators(Frame<int, string> df)
    {
        var candles = df.Rows.Select(kvp => new Ohlcv
        {
            Timestamp = kvp.Value.GetAs<DateTime>("Timestamp"),
            Open = kvp.Value.GetAs<decimal>("Open"),
            High = kvp.Value.GetAs<decimal>("High"),
            Low = kvp.Value.GetAs<decimal>("Low"),
            Close = kvp.Value.GetAs<decimal>("Close"),
            Volume = kvp.Value.GetAs<decimal>("Volume")
        }).Observations.Select(e => e.Value).ToList<IOhlcv>();

        df.AddColumn("rsi", candles.Rsi(2));

        return df;
    }

    public override Frame<int, string> PopulateBuyTrend(Frame<int, string> df)
    {
        var a = df.GetColumn<decimal>("rsi").Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);
        var b = df.GetColumn<decimal>("rsi").Shift(1).Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);

        //var buy = a.ZipInner(b).Select(kvp => kvp.Value.Item1 < 45 && kvp.Value.Item1 > kvp.Value.Item2);
        //df.AddColumn("buy", buy);

        // Which one is more readable? Or another solution?
        // https://stackoverflow.com/questions/58451804/concatenating-two-string-columns-using-deedle-in-c-sharp
        var buy2 = new SeriesBuilder<int, bool>();
        for (int i = 0; i < df.Rows.KeyCount; i++)
        {
            buy2.Add(i, a[i] < 45 && a[i] > b[i]);
        }
        df.AddColumn("buy", buy2);

        return df;
    }

    public override Frame<int, string> PopulateSellTrend(Frame<int, string> df)
    {
        var a = df.GetColumn<decimal>("rsi").Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);
        var b = df.GetColumn<decimal>("rsi").Shift(1).Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);

        var sell = a.ZipInner(b).Select(kvp => kvp.Value.Item1 > 70);
        df.AddColumn("sell", sell);

        return df;
    }
}

public interface IOhlcv
{
    DateTime Timestamp { get; }
    decimal Open { get; }
    decimal High { get; }
    decimal Low { get; }
    decimal Close { get; }
    decimal Volume { get; }
}

public class Ohlcv : IOhlcv
{
    public DateTime Timestamp { get; set; }
    public decimal Open { get; set; }
    public decimal High { get; set; }
    public decimal Low { get; set; }
    public decimal Close { get; set; }
    public decimal Volume { get; set; }
}

public static partial class IndicatorExtensions
{
    private static IList<decimal?> FixIndicatorOrdering(IList<decimal> source, int outBegIdx, int outNbElement)
    {
        var outValues = new List<decimal?>();
        var validItems = source.Take(outNbElement);

        for (int i = 0; i < outBegIdx; i++)
            outValues.Add(null);

        foreach (var value in validItems)
            outValues.Add(value);

        return outValues;
    }

    public static IList<decimal?> Rsi(this IList<IOhlcv> source, int period = 14)
    {
        var rsiValues = new decimal[source.Count];

        var closes = source.Select(e => e.Close).ToArray();

        var result = TALib.Core.Rsi(closes, 0, source.Count - 1, rsiValues, out int outBegIdx, out int outNbElement, period);

        if (result == TALib.Core.RetCode.Success)
        {
            return FixIndicatorOrdering(rsiValues.ToList(), outBegIdx, outNbElement);
        }

        throw new Exception("Could not calculate RSI.");
    }

    public static IList<decimal?> Rsi(this IList<decimal> source, int period = 14)
    {
        var rsiValues = new decimal[source.Count];

        var result = TALib.Core.Rsi(source.ToArray(), 0, source.Count - 1, rsiValues, out int outBegIdx, out int outNbElement, period);

        if (result == TALib.Core.RetCode.Success)
        {
            return FixIndicatorOrdering(rsiValues.ToList(), outBegIdx, outNbElement);
        }

        throw new Exception("Could not calculate RSI.");
    }
}
zyzhu commented 3 years ago

The following snippet only checks close and open prices. You may expand it to make more complex logic assuming you already calculated ema indicators

var buy = df.Rows.Select(kvp => {
    var sr = kvp.Value.As<float>();
    var open = sr.Get("Open");
    var close = sr.Get("Close");
    return open > close ? 1.0 : 0.0;
});
df.AddColumn("Buy", buy);
Hulkstance commented 3 years ago

@zyzhu, thanks for your answer. By following your way,

kvp.Value.GetAs<decimal>("Rsi")

returns Deedle.MissingValueException: 'Value at the key Rsi is missing' because the values are probably <missing> and not filled with zeroes.

This is the actual strategy

def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
    dataframe['rsi'] = ta.RSI(dataframe['close'], timeperiod=2)
    return dataframe

def informative_pairs(self):
    informative_pairs = []
    return informative_pairs

def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:

    dataframe.loc[
        (dataframe['rsi'] < 45
        & (dataframe['rsi'] > dataframe['rsi'].shift(1))),
        'buy'] = 1
    return dataframe

def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:

    dataframe.loc[                
        (dataframe['rsi'].gt(70)),
        'sell'] = 0
    return dataframe

Zip works fine, but Item1 is not descriptive.

csharp
var buy = a.ZipInner(b).Select(kvp => kvp.Value.Item1 < 45 && kvp.Value.Item1 > kvp.Value.Item2);
zyzhu commented 3 years ago

You may either fill missing first or use TryGet to get an OptionalValue. Then check it like the following. The actual logic is up to you.

var buy = df.Rows.Select(kvp => {
    var sr = kvp.Value.As<float>();
    var open = sr.TryGet("Open");
    var close = sr.TryGet("Close");
    return open.HasValue && close.HasValue && open.Value > close.Value ? 1.0 : 0.0;
});
df.AddColumn("Buy", buy);
Hulkstance commented 3 years ago

@zyzhu, it seems like I need to use the Zip anyway. This looks good? P.S. I don't want to use TryGet because it's not generic.

public override Frame<int, string> PopulateIndicators(Frame<int, string> df)
{
    var candles = df.Rows.Select(kvp => new Ohlcv
    {
        Timestamp = kvp.Value.GetAs<DateTime>("Timestamp"),
        Open = kvp.Value.GetAs<decimal>("Open"),
        High = kvp.Value.GetAs<decimal>("High"),
        Low = kvp.Value.GetAs<decimal>("Low"),
        Close = kvp.Value.GetAs<decimal>("Close"),
        Volume = kvp.Value.GetAs<decimal>("Volume")
    }).Observations.Select(e => e.Value).ToList<IOhlcv>();

    df.AddColumn("Rsi", candles.Rsi(2));

    return df;
}

public override Frame<int, string> PopulateBuyTrend(Frame<int, string> df)
{
    var a = df.GetColumn<decimal>("Rsi").Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);
    var b = df.GetColumn<decimal>("Rsi").Shift(1).Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);

    var buy = a.ZipInner(b).Select(kvp =>
    {
        var rsi = kvp.Value.Item1;
        var rsiShifted = kvp.Value.Item2;

        return rsi < 45 && rsi > rsiShifted;
    });
    df.AddColumn("Buy", buy);

    return df;
}

public override Frame<int, string> PopulateSellTrend(Frame<int, string> df)
{
    var a = df.GetColumn<decimal>("Rsi").Realign(Enumerable.Range(0, df.RowCount)).FillMissing(0m);

    var sell = a.Select(kvp =>
    {
        var rsi = kvp.Value;

        return rsi > 70;
    });
    df.AddColumn("Sell", sell);

    return df;
}
zyzhu commented 3 years ago

That looks good. Just one feedback. You may return buy or sell series directly instead of adding it into the df so that you can keep the original df intact. If "Buy" column exists in df already the next time you run it, AddColumn will throw error.

Hulkstance commented 3 years ago

@zyzhu, thanks a lot! :)