Namdosa commented 2 years ago

Hi all,

the cudnnSetPooling2dDescriptor is defined as below.

cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride);

But the code are as below. void cudnn_maxpool_setup(layer *l) {

ifdef CUDNN

CHECK_CUDNN(cudnnSetPooling2dDescriptor(
    l->poolingDesc,
    CUDNN_POOLING_MAX,
    CUDNN_NOT_PROPAGATE_NAN,    // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN
    l->size,
    l->size,
    l->pad/2, //0, //l.pad,
    l->pad/2, //0, //l.pad,
    **l->stride_x,
    l->stride_y));**

.. void cudnn_local_avgpool_setup(layer *l) {

ifdef CUDNN

CHECK_CUDNN(cudnnSetPooling2dDescriptor(
    l->poolingDesc,
    CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING,
    CUDNN_NOT_PROPAGATE_NAN,    // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN
    l->size,
    l->size,
    l->pad / 2, //0, //l.pad,
    l->pad / 2, //0, //l.pad,
    **l->stride_x,
    l->stride_y));**

It seems appropriate to change the positions of stride_x and stride_y. What do you think about this?

zzk2021 commented 1 month ago

I think so. I would also like to know why the backward propagation for max pooling does not use the cuDNN API. And the forward did not too? And Why is the condition in the if statement CUDNN_DISABLED? It's vary strange.

extern "C" void forward_local_avgpool_layer_gpu(maxpool_layer layer, network_state state)
{

#ifdef CUDNN_DISABLED
    if (!state.train && layer.stride == layer.size) {
        // cudnnPoolingBackward
        cudnnStatus_t maxpool_status;

        float alpha = 1, beta = 0;
        maxpool_status = cudnnPoolingForward(
            cudnn_handle(),
            layer.poolingDesc,
            &alpha,
            layer.srcTensorDesc,
            state.input,
            &beta,
            layer.dstTensorDesc,
            layer.output_gpu);

        //maxpool_status = cudnnDestroyPoolingDescriptor(poolingDesc);
        //cudnnDestroyTensorDescriptor(layer.srcTensorDesc);
        //cudnnDestroyTensorDescriptor(layer.dstTensorDesc);

    }
    else
#endif
    {
        int h = layer.out_h;
        int w = layer.out_w;
        int c = layer.out_c;

        size_t n = h*w*c*layer.batch;

        forward_local_avgpool_layer_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu);
        CHECK_CUDA(cudaPeekAtLastError());
    }
}

zzk2021 commented 1 month ago

This looks unfinished.

AlexeyAB / darknet

Could you check parameters in cudnnSetPooling2dDescriptor() of maxpool_layer? #8302

ifdef CUDNN

ifdef CUDNN