SciSharp / TensorFlow.NET

.NET Standard bindings for Google's TensorFlow for developing, training and deploying Machine Learning models in C# and F#.
https://scisharp.github.io/tensorflow-net-docs
Apache License 2.0
3.17k stars 506 forks source link

[Question]: How to Nest a Sequential within a Sequential #1084

Closed dogvane closed 1 year ago

dogvane commented 1 year ago

Description

...

        ILayer l = keras.layers.Normalization();
        var input = tf.ones((3, 3), name: "input");
        var block1 = keras.Sequential(new[] { l });
        block1.Apply(input);

        var block2 = keras.Sequential(new[] { keras.Sequential(new[] { l }) });
        var output = block2.Apply(input);

        print(output);

...

block1 is ok, but block2 is error.

Alternatives

No response

AsakusaRinne commented 1 year ago

Thank you for reporting this issue. Is the code above a simplification? If using new[] { l } as the input of sequential, the sequential is actually empty and may lead to error.

dogvane commented 1 year ago

yes,is a simple demo。 i write a denseNet model, use Sequential create a block, and add to other block. There may be some issues。 the full code :

class DenseNet
    {
        class ConvBlock : Layer
        {
            static int layerId;

            public ConvBlock(int num_channels) : base(new LayerArgs { Name = "ConvBlock" + ++layerId })
            {
                Layers.add(keras.layers.BatchNormalization());
                Layers.add(keras.layers.LeakyReLU());
                Layers.add(keras.layers.Conv2D(filters: num_channels, kernel_size: (3, 3), padding: "same", activation:"relu"));
            }

            protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
            {
                print($"layer.name {this.Name} x.shape:{inputs.shape}");
                var y = inputs;

                foreach(var lay in Layers)
                {
                    y = lay.Apply(y);
                }

                return keras.layers.Concatenate().Apply((inputs, y));
            }
        }

        public class DenseBlock : Layer
        {
            static int layerId;

            public DenseBlock(int num_convs, int num_channels) : base(new LayerArgs { Name = "DenseBlock" + ++ layerId })
            {
                for(var i =0;i < num_convs; i++)
                {
                    Layers.add(new ConvBlock(num_channels));
                }
            }

            protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
            {
                var x = inputs;
                foreach(var layer in Layers)
                {
                    x = layer.Apply(x);
                }

                return x;
            }
        }
        class TransitionBlock : Layer
        {
            static int layerId;

            public TransitionBlock(int num_channels) : base(new LayerArgs { Name = "TransitionBlock" + ++layerId })
            {
                Layers.add(keras.layers.BatchNormalization());
                Layers.add(keras.layers.LeakyReLU());
                Layers.add(keras.layers.Conv2D(num_channels, kernel_size: 1, activation:"relu"));
                Layers.add(keras.layers.AveragePooling2D(pool_size: 2, strides:2));
            }

            protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
            {
                var x = inputs;
                foreach (var layer in Layers)
                {
                    x = layer.Apply(x);
                }
                return x;
            }
        }

        public static void DenseNet_Test()
        {
            var block_1 = ()=>
            {
                return new[] {
                                keras.layers.Conv2D(64, kernel_size: 7, strides:2, padding:"same", activation:"relu"),
                                keras.layers.BatchNormalization(),
                                keras.layers.LeakyReLU(),
                                keras.layers.MaxPooling2D(pool_size:3, strides:2, padding:"same")
                };
            };

            var block_2 = () => {
                var layers = new List<ILayer>();
                 layers.AddRange(block_1());

                layers.AddRange(new[] {
                                keras.layers.Conv2D(64, kernel_size: 7, strides: 2, padding: "same", activation: "relu"),
                                keras.layers.BatchNormalization(),
                                keras.layers.LeakyReLU(),
                                keras.layers.MaxPooling2D(pool_size: 3, strides: 2, padding: "same")
                                });

                var num_channels = 64;
                var growth_rate = 32;
                var num_convs_in_dense_blocks = new[] { 4, 4, 4, 4 };

                for (var i = 0; i < num_convs_in_dense_blocks.Length; i++)
                {
                    var num_convs = num_convs_in_dense_blocks[i];

                    layers.add(new DenseBlock(num_convs, growth_rate));
                    num_channels += (num_convs * growth_rate);
                    if (i != num_convs_in_dense_blocks.Length - 1)
                    {
                        num_channels = num_channels / 2;
                        layers.add(new TransitionBlock(num_channels));
                    }
                }

                return keras.Sequential(layers);
            };

            var model = keras.Sequential(new[] {
                block_2(),
                keras.layers.BatchNormalization(),
                keras.layers.LeakyReLU(),
                keras.layers.GlobalAveragePooling2D(),
                keras.layers.Flatten(),
                keras.layers.Dense(10),
            });

            model.build(new Tensorflow.Keras.Saving.KerasShapesWrapper((-1, 96, 96, 1)));
            model.summary();

            var X = tf.random.uniform((1, 96, 96, 1));

            foreach (var l in model.Layers)
            {
                var X1 = X;
                X = l.Apply(X);
                print($"layer.name:{l.Name} input.shape:{X1.shape} output.shape:{X.shape}");
            }
            print("X:", X);

        }
    }
dogvane commented 1 year ago

the code modified from https://en.d2l.ai/chapter_convolutional-modern/densenet.html

AsakusaRinne commented 1 year ago

Hi, I've made a quick fix for this problem in #1086. However, unfortunately, the reason is actually something related to the basic framework which may be changed before v1.0.0. Therefore we may not merge #1086 but fix it by optimizing the framework in the future. Would you mind using the code of #1086 for a temporary solution? We'll complete the fix before v1.0.0.