Cannot use custom Differentiable structure in a Layer input

laclouis5 commented 3 years ago

I'm trying to implement a UNet-like segmentation network, here is the complete definition:

struct TensorPair<Scalar: TensorFlowFloatingPoint>: AdditiveArithmetic, Differentiable {
    var lhs: Tensor<Scalar>
    var rhs: Tensor<Scalar>

    init(_ lhs: Tensor<Scalar>, _ rhs: Tensor<Scalar>) {
        self.lhs = lhs
        self.rhs = rhs
    }
}

struct ReLU<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer {
    typealias TangentVector = EmptyTangentVector

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        relu(input)
    }
}

struct DoubleConv<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv1, conv2: Conv2D<Scalar>
    var bn1, bn2: BatchNorm<Scalar>
    let relu = ReLU<Scalar>()

    init(_ inChannels: Int, _ outChannels: Int) {
        conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false)
        bn1 = BatchNorm(featureCount: outChannels)
        conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false)
        bn2 = BatchNorm(featureCount: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu)
    }
}

struct ResidualBlock<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv1, conv2: Conv2D<Scalar>
    var bn1, bn2: BatchNorm<Scalar>
    var relu = ReLU<Scalar>()

    init(channels: Int) {
        conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
        conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
        bn1 = BatchNorm(featureCount: channels)
        bn2 = BatchNorm(featureCount: channels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input
    }
}

struct Down<Scalar: TensorFlowFloatingPoint>: Layer {
    var downsample: Conv2D<Scalar>
    var conv: ResidualBlock<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same)
        conv = ResidualBlock(channels: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        input.sequenced(through: downsample, conv)
    }
}

struct Up<Scalar: TensorFlowFloatingPoint>: Layer {
    var upsample: TransposedConv2D<Scalar>
    var conv: ResidualBlock<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2))
        conv = ResidualBlock(channels: outChannels)
    }

    @differentiable
    func callAsFunction(_ input: TensorPair<Scalar>) -> Tensor<Scalar> {
        conv(upsample(input.lhs) + input.rhs)
    }
}

struct Head<Scalar: TensorFlowFloatingPoint>: Layer {
    var conv: Conv2D<Scalar>

    init(_ inChannels: Int, _ outChannels: Int) {
        self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        conv(input)
    }
}

struct UNet<Scalar: TensorFlowFloatingPoint>: Layer {
    var adapter: DoubleConv<Scalar>
    var down1, down2, down3, down4: Down<Scalar>
    var up1, up2, up3, up4: Up<Scalar>
    var head: Head<Scalar>

    init(_ inChannels: Int = 3, _ outChannels: Int = 1) {
        adapter = DoubleConv(inChannels, 64)
        down1 = Down(64, 128)
        down2 = Down(128, 256)
        down3 = Down(256, 512)
        down4 = Down(512, 1024)
        up1 = Up(1024, 512)
        up2 = Up(512, 256)
        up3 = Up(256, 128)
        up4 = Up(128, 64)
        head = Head(64, outChannels)
    }

    @differentiable
    func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        let d0 = adapter(input)

        let d1 = down1(d0)
        let d2 = down2(d1)
        let d3 = down3(d2)
        let d4 = down4(d3)

        let u1 = up1(TensorPair(d4, d3))
        let u2 = up1(TensorPair(u1, d2))
        let u3 = up1(TensorPair(u2, d1))
        let u4 = up1(TensorPair(u3, d0))

        let output = head(u4)

        return output
    }
}

The problem is that the Up layer must accept two inputs (the input and shortcut connection), so I created a Differentiable struct to handle this case as the callAsFunction() method only accept one input. However, this leads to an error:

error: <Cell 11>:32:22: error: expression is not differentiable
        let u4 = up1(TensorPair(u3, d0))
                     ^

<Cell 11>:32:22: note: cannot differentiate functions that have not been marked '@differentiable' and that are defined in other files
        let u4 = up1(TensorPair(u3, d0))

Same error for the three other Up layers.

Any idea on how to solve this?
Is there a better alternative to write layers that accept more than one input?
Will the Layer protocol support a callAsFunction() requirement with multiple inputs in the future (when generics allow that of course), i.e a callAsFunction(_ input1: Input1, _ input2: Input2, ...) or callAsFunction(_ inputs: Input...)? A also think that the TensorPair struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.

dan-zheng commented 3 years ago

Thanks for reporting!

Any idea on how to solve this?

Could you please share the compiler version you're using (swift --version)?

Your code snippet compiles fine for me, with Swift for TensorFlow 0.12:

$ swift --version
Swift version 5.3-dev (LLVM 69d8678431d3eee, Swift e1aef96b7fea59b)
Target: x86_64-apple-darwin19.6.0

```swift import TensorFlow struct TensorPair: AdditiveArithmetic, Differentiable { var lhs: Tensor var rhs: Tensor init(_ lhs: Tensor, _ rhs: Tensor) { self.lhs = lhs self.rhs = rhs } } struct ReLU: ParameterlessLayer { typealias TangentVector = EmptyTangentVector @differentiable func callAsFunction(_ input: Tensor) -> Tensor { relu(input) } } struct DoubleConv: Layer { var conv1, conv2: Conv2D var bn1, bn2: BatchNorm var relu = ReLU() init(_ inChannels: Int, _ outChannels: Int) { conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false) bn1 = BatchNorm(featureCount: outChannels) conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false) bn2 = BatchNorm(featureCount: outChannels) } @differentiable func callAsFunction(_ input: Tensor) -> Tensor { input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) } } struct ResidualBlock: Layer { var conv1, conv2: Conv2D var bn1, bn2: BatchNorm var relu = ReLU() init(channels: Int) { conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false) conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false) bn1 = BatchNorm(featureCount: channels) bn2 = BatchNorm(featureCount: channels) } @differentiable func callAsFunction(_ input: Tensor) -> Tensor { input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input } } struct Down: Layer { var downsample: Conv2D var conv: ResidualBlock init(_ inChannels: Int, _ outChannels: Int) { downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same) conv = ResidualBlock(channels: outChannels) } @differentiable func callAsFunction(_ input: Tensor) -> Tensor { input.sequenced(through: downsample, conv) } } struct Up: Layer { var upsample: TransposedConv2D var conv: ResidualBlock init(_ inChannels: Int, _ outChannels: Int) { upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2)) conv = ResidualBlock(channels: outChannels) } @differentiable func callAsFunction(_ input: TensorPair) -> Tensor { conv(upsample(input.lhs) + input.rhs) } } struct Head: Layer { var conv: Conv2D init(_ inChannels: Int, _ outChannels: Int) { self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same) } @differentiable func callAsFunction(_ input: Tensor) -> Tensor { conv(input) } } struct UNet: Layer { var adapter: DoubleConv var down1, down2, down3, down4: Down var up1, up2, up3, up4: Up var head: Head init(_ inChannels: Int = 3, _ outChannels: Int = 1) { adapter = DoubleConv(inChannels, 64) down1 = Down(64, 128) down2 = Down(128, 256) down3 = Down(256, 512) down4 = Down(512, 1024) up1 = Up(1024, 512) up2 = Up(512, 256) up3 = Up(256, 128) up4 = Up(128, 64) head = Head(64, outChannels) } @differentiable func callAsFunction(_ input: Tensor) -> Tensor { let d0 = adapter(input) let d1 = down1(d0) let d2 = down2(d1) let d3 = down3(d2) let d4 = down4(d3) let u1 = up1(TensorPair(d4, d3)) let u2 = up1(TensorPair(u1, d2)) let u3 = up1(TensorPair(u2, d1)) let u4 = up1(TensorPair(u3, d0)) let output = head(u4) return output } } ```

Is there a better alternative to write layers that accept more than one input?

Will the Layer protocol support a callAsFunction() requirement with multiple inputs in the future (when generics allow that of course), i.e a callAsFunction(_ input1: Input1, _ input2: Input2, ...) or callAsFunction(_ inputs: Input...)? A also think that the TensorPair struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.

There isn't a better way to write layers taking/return more than one value currently, unfortunately.

Layer.callAsFunction takes a single Differentiable-conforming argument and returns a single Differentiable-conforming result. To encode multiple arguments or results, a tuple-representing struct like TensorPair is the best solution.

Tuple types cannot yet conform to protocols like Differentiable. Some hardcoded support is ~being added to let tuples conform to Equatable, Comparable, and Hashable (https://github.com/apple/swift/pull/28833), but that is quite involved and not extensible. There's also the nuance that different tuple types with the same arity may have different TangentVector types based on elements' conformances to Differentiable (like (Float, Float) vs (Float, Int)).

By the way, here's an alternative (simpler) UNet definition. It directly calls functions like relu in callAsFunction methods instead of creating a dedicated ReLU layer.

laclouis5 commented 3 years ago

Thank you for the quick and detailed answer! I'm using S4TF in a Google Colab but I was not enable to find the swift version.

I see that the simpler implementation you linked inlines everything in the UNet Module, this is great but not flexible enough for my application.

I'll try on a different environment as the code seems to compile correctly on your side.

marcrasi commented 3 years ago

Please open a new issue if you have any more questions :)

tensorflow / swift

Cannot use custom Differentiable structure in a Layer input #582