Help please - Githubissues

nisbus commented 6 years ago

Hello,

I have limited experience with Tensorflow but am working my way through the book "Machine Learning with Tensorflow".

I have got the code working in Jupyter notebook but then I found out about TensorflowSharp which suits be much better being a C# programmer.

I'm trying to translate a Q decision policy from the book into TensorflowSharp but getting stuck with a lot of things :)

Here is the code I'm trying to get working:

`

public class QDecisionPolicy : DecisionPolicy
{
    #region Fields

    double _Epsilon = 0.95;
    double _Gamma = 0.3;
    IEnumerable<string> _Actions;
    int _OutputDim;
    int _h1Dim = 20;
    TFSession session;
    TFGraph graph;
    Random random = new Random();
    TFOutput q;
    Variable b2;
    Variable W2;
    Variable b1;
    Variable W1;
    TFOutput x;
    TFOutput y;
    TFOutput h1;
    TFOutput train_op;

    #endregion

    #region CTOR

    public QDecisionPolicy(int _InputDim, IEnumerable<string> actions = null)
    {
        _Actions = actions ?? new List<string>() { "Buy", "Sell", "Hold" };
        _OutputDim = _Actions.Count();

        var a = new List<double>(_h1Dim);
        var b = new List<double>(_OutputDim);
        for (int i = 0; i < _h1Dim; i++)
            a[i] = 0.1;
        for (int i = 0; i < _OutputDim; i++)
            b[i] = 0.1;
        session = new TFSession();

        graph  = session.Graph;
        x  = graph.Placeholder(TFDataType.Float, new TFShape((long)TFDataType.Unknown, _InputDim));
        y  = graph.Placeholder(TFDataType.Float, new TFShape(_OutputDim));
        W1 = graph.Variable(graph.RandomNormal(new TFShape(_InputDim, _h1Dim)));            
        b1 = graph.Variable(graph.Const(a.ToArray()));
        h1 = graph.Relu(graph.MatMul(x, W1) + b1);                   //Operator + cannot be applied to operands of type TFOutput and Variable
        W2 = graph.Variable(graph.RandomNormal(new TFShape(_h1Dim, _OutputDim)));
        b2 = graph.Variable(graph.Const(b.ToArray()));
        q  = graph.Relu(graph.MatMul(h1, W2) + b2);                  //Operator + cannot be applied to operands of type TFOutput and Variable
        var loss = graph.Square(y - q);                              //Operator - cannot be applied to operands of type TFOutput and TFOutput
        train_op = graph.ApplyAdagrad(0.01).minimize(loss);          //How would this get translated?
        session.Run(graph.GetGlobalVariablesInitializer());          //And this one?
    }

    #endregion

    public override string SelectAction(State state, int? step)
    {
        string action = string.Empty;
        var threshold = Math.Min(_Epsilon, step.Value / 1000);
        if (random.Next() < threshold)
        {
            var action_q_vals = session.Run(q, feed_dict: { x, state});//How do I construct the feed_dict from Tensorflow?
            var action_idx = graph.ArgMax(action_q_vals);              //??
            return _Actions.ElementAt(action_idx);                     // This should be an int from ArgMax
        }
        else
            action = _Actions.ElementAt(random.Next(_Actions.Count() - 1));
        return action;
    }

    public override void UpdateQ(State state, string action, double reward, State nextState)
    {
        var action_q_vals = session.Run(q, feed_dict: { q, state});    
        var next_action_q_vals = session.Run(q, feed_dict: { x, nextState});
        var next_action_idx = graph.ArgMax(next_action_q_vals);             
        action_q_vals[0, next_action_idx] = reward + _Gamma * next_action_q_vals[0, next_action_idx];
        action_q_vals = graph.Squeeze(action_q_vals.ToArray());
        session.Run(train_op, feed_dict={ x: state, y: action_q_vals});
    }
}

`

Of course this does not compile but I'm pretty much out of my depth here in finding the correlating methods etc in TensorflowSharp.

Any help would be greatly appreciated.

Here is the code from the book (or very similar)

BTW is there some other place than here for submitting these kinds of questions as I feel bad about creating an issue for it since the only bug here is me :)

Thank you

axmand commented 6 years ago

I have the same question...

var trainOp = _graph.ApplyGradientDescent(w1, loss, delta);

while there are more than two layers,how can it be('w1')? // I found something that might help you @nisbus Learn TensorflowSharp

nisbus commented 6 years ago

Thanks for that @axmand I'll take a look

A bit easier to read: google translate

migueldeicaza / TensorFlowSharp

Help please #229