ammarbinfaisal / sahl

a programming language with channels and coroutines/threads
MIT License
9 stars 1 forks source link

neural net doesn't correctly learn Xor #65

Open ammarbinfaisal opened 9 months ago

ammarbinfaisal commented 9 months ago

figure out whats wrong in the code https://github.com/ammarbinfaisal/sahl/blob/master/samples/neuralnet.sahl

image

ammarbinfaisal commented 9 months ago

This python code also does something similar therefore bug is in the logic itself.

import numpy as np

def randf():
    return np.random.rand()

def pow(a, b):
    return a ** b

def exit(code):
    import sys
    sys.exit(code)

def exp(a):
    return np.exp(a)

def randmat(r, c):
    return np.random.rand(r, c)

def submat(m1, m2):
    return np.subtract(m1, m2)

def powmat(m, p):
    return np.power(m, p)

def max(a, b):
    return np.maximum(a, b)

def relu(m):
    return np.maximum(0.0, m)

def sigmoid(m):
    return 1.0 / (1.0 + np.exp(-m))

def summat(m):
    return np.sum(m)

def matmul(m1, m2):
    return np.dot(m1, m2)

def matmulc(mat, v):
    return np.multiply(mat, v)

def transpose(m):
    return np.transpose(m)

def print_size(m):
    print("size: {} x {}".format(len(m), len(m[0])))

def main():
    input_size = 2
    hidden_size = 36
    output_size = 1

    w1 = randmat(hidden_size, input_size)
    w2 = randmat(output_size, hidden_size)

    print("w1:\n", w1)
    print("w2:\n", w2)

    lr = 0.01

    # training xor
    _x = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
    x = np.transpose(_x)
    y = np.array([[0.0, 1.0, 1.0, 0.0]])

    print("x:\n", x)

    for i in range(10000):
        # forward
        h = relu(matmul(w1, x))
        h2 = sigmoid(matmul(w2, h))

        # loss
        loss = summat(powmat(submat(h2, y), 2.0))
        if i % 1000 == 0:
            print("i: {}".format(i))
            print("loss: {}".format(loss))
            print("preds:\n", h2)

        # backward
        grad_h2 = submat(h2, y)
        grad_w2 = matmul(grad_h2, transpose(h))
        grad_h = matmul(transpose(w2), grad_h2)
        grad_w1 = matmul(grad_h, transpose(x))

        # update
        w1 = submat(w1, matmulc(grad_w1, lr))
        w2 = submat(w2, matmulc(grad_w2, lr))

if __name__ == "__main__":
    main()
ammarbinfaisal commented 9 months ago

Also this python code takes 0.25 seconds and sahl code takes 3.3s :(