differentiableReduce error

jackharmer commented 4 years ago

Hello, using the following code on Ubuntu 18.04:

public struct MLP: Layer {
    public typealias Input = Tensor<Float>
    public typealias Output = Tensor<Float>

    var denseLayers = [Dense<Float>]()

    init(inputSize: Int, hiddenSizes: [Int]) {
        let allSizes = [inputSize] + hiddenSizes
        for cc in 0..<hiddenSizes.count { 
            denseLayers.append(
                Dense(
                    inputSize: allSizes[cc], 
                    outputSize: hiddenSizes[cc], 
                    activation: relu
                )
            )
        }
    }

    @differentiable
    public func callAsFunction(_ input: Input) -> Output {
         return denseLayers.differentiableReduce(input) { x, layer in layer(x) }
    }
}

produces the error (tested on s4tf 0.8 and 0.9, gpu and cpu):

Precondition failed: cannot move Array.DifferentiableView with count 2 along direction with different count 0: file /swift-base/swift/stdlib/public/core/../Differentiation/ArrayDifferentiation.swift, line 58

A version of this that is hard-coded to use 2 layers works fine and as expected, i.e. this variant:

public struct MLP: Layer {
    public typealias Input = Tensor<Float>
    public typealias Output = Tensor<Float>

    var denseLayer1: Dense<Float>
    var denseLayer2: Dense<Float>

    init(inputSize: Int, hiddenSizes: [Int]) {
        let allSizes = [inputSize] + hiddenSizes
        denseLayer1 = Dense(
            inputSize: allSizes[0], 
            outputSize: allSizes[1], 
            activation: relu
        )
        denseLayer2 = Dense(
            inputSize: allSizes[1], 
            outputSize: allSizes[2], 
            activation: relu
        )
    }

    @differentiable
    public func callAsFunction(_ input: Input) -> Output {
        return input.sequenced(through: denseLayer1, denseLayer2)
    }
}

dan-zheng commented 4 years ago

produces the error (tested on s4tf 0.8 and 0.9, gpu and cpu):

Precondition failed: cannot move Array.DifferentiableView with count 2 along direction with different count 0: file /swift-base/swift/stdlib/public/core/../Differentiation/ArrayDifferentiation.swift, line 58

Could you please share a complete reproducer code snippet? The snippet you shared doesn't trigger differentiation (e.g. call gradient(at:in:)) to reproduce the precondition failure.

At first glance, this array differentiation error seems like TF-1005: a lack of per-instance zero tangent vectors. I've been making progress on that issue for a while, it will take some time (2-3 weeks) to land.

jackharmer commented 4 years ago

Thanks, this is about as succinct as I could get it:

import TensorFlow

public struct MLPHack: Layer {
    var layer1: Dense<Float>
    var layer2: Dense<Float>

    init(sizes: [Int]) {
        layer1 = Dense( inputSize: sizes[0], outputSize: sizes[1], activation: relu)
        layer2 = Dense( inputSize: sizes[1], outputSize: sizes[2], activation: relu)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        return input.sequenced(through: layer1, layer2)
    }
}

public struct MLP: Layer {
    var layers = [Dense<Float>]()

    public init(sizes: [Int]) {
        for cc in 0..<(sizes.count-1) { 
            layers.append( Dense( inputSize: sizes[cc], outputSize: sizes[cc+1], activation: relu))
        }
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
         return layers.differentiableReduce(input) { x, layer in layer(x) }
    }
}

public struct Network: Module {
    public typealias NetType = MLP  // This doesn't work
    // public typealias NetType = MLPHack // This works.

    public var netA: NetType
    public var netB: NetType

    public init(sizes: [Int]) {
        netA = NetType(sizes: sizes)

        // The existance of netB seems to cause problems when using MLP, 
        // but not when using MLPHack. NB, in my actual project, netB 
        // is required for another task and shares an encoder with netA.
        netB = NetType(sizes: sizes)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> { return netA(input) }
}

var net = Network(sizes: [100, 200, 10])
let optimizer = Adam(for: net, learningRate: 0.0001)

Context.local.learningPhase = .training
print("Start")
for _ in 0..<40 {
    let data = Tensor<Float>(randomNormal: TensorShape(50, 100))
    let (loss, grads) = TensorFlow.valueWithGradient(at: net) { net -> Tensor<Float> in
        let y = net(data*data)
        return (y*y).sum()
    }
    print(loss)
    optimizer.update(&net, along: grads)
}
print("End")

If you switch the typealias in the Network struct this should work. It seems to be related to the second layer (netB), if you comment that out then both versions work.

wasd96040501 commented 4 years ago

I encountered the same problem.

tensorflow / swift-apis

differentiableReduce error #941