How to Cooperative use udmabuf with PL DMA and RAM in vivado 2018.2 block design?

honorpeter commented 5 years ago

I had insmod udmabuf.ko on zcu102 board and succeed.but it's only a dma_buf, how to Cooperative use it with PL DMA and RAM in vivado 2018.2 block design ？ Simple and efficient methods?

honorpeter commented 5 years ago

By the way , Can this be used directly? https://github.com/pulp-platform/udma_external_per

ikwzm commented 5 years ago

Here is a simple example using Vivado-HLS.

https://github.com/ikwzm/ZynqMP-FPGA-Linux-Example-2-Ultra96

In this example, the following C code is implemented in PL using Vivado-HLS.

int negative(volatile int *in, volatile int *out, int size){
#pragma HLS INTERFACE m_axi depth=10 port=out offset=slave
#pragma HLS INTERFACE m_axi depth=10 port=in  offset=slave
#pragma HLS INTERFACE s_axilite port=size
#pragma HLS INTERFACE s_axilite port=return
    int i;

    for (i = 0; i < size; i++){
        out[i] = -in[i];
    }

    return(0);
}

From PS, it is executed by script file written in Python.

from udmabuf import Udmabuf
from uio     import Uio
import numpy as np
import time

if __name__ == '__main__':
    uio1       = Uio('uio1')
    regs       = uio1.regs()
    udmabuf4   = Udmabuf('udmabuf4')
    udmabuf5   = Udmabuf('udmabuf5')
    test_dtype = np.uint32
    test_size  = min(int(udmabuf4.buf_size/(np.dtype(test_dtype).itemsize)),
                     int(udmabuf5.buf_size/(np.dtype(test_dtype).itemsize)))

    udmabuf4_array    = udmabuf4.memmap(dtype=test_dtype, shape=(test_size))
    udmabuf4_array[:] = np.random.randint(-21474836478,2147483647,(test_size))
    udmabuf4.set_sync_to_device(0, test_size*(np.dtype(test_dtype).itemsize))

    udmabuf5_array    = udmabuf5.memmap(dtype=test_dtype, shape=(test_size))
    udmabuf5_array[:] = np.random.randint(-21474836478,2147483647,(test_size))
    udmabuf5.set_sync_to_cpu(   0, test_size*(np.dtype(test_dtype).itemsize))

    total_setup_time   = 0
    total_cleanup_time = 0
    total_xfer_time    = 0
    total_xfer_size    = 0
    count              = 0

    for i in range (0,9):

        start_time  = time.time()
        udmabuf4.sync_for_device()
        udmabuf5.sync_for_device()
        regs.write_word(0x18, udmabuf4.phys_addr & 0xFFFFFFFF)
        regs.write_word(0x20, udmabuf5.phys_addr & 0xFFFFFFFF)
        regs.write_word(0x28, test_size)
        regs.write_word(0x04, 0x000000001)
        regs.write_word(0x08, 0x000000001)
        regs.write_word(0x0C, 0x000000001)
        uio1.irq_on()
        phase0_time = time.time()
        regs.write_word(0x00, 0x000000001)
        uio1.wait_irq()

        phase1_time = time.time()
        regs.write_word(0x0C, 0x000000001)
        udmabuf4.sync_for_cpu()
        udmabuf5.sync_for_cpu()

        end_time     = time.time()
        setup_time   = phase0_time - start_time
        xfer_time    = phase1_time - phase0_time
        cleanup_time = end_time    - phase1_time
        total_time   = end_time    - start_time

        total_setup_time   = total_setup_time   + setup_time
        total_cleanup_time = total_cleanup_time + cleanup_time
        total_xfer_time    = total_xfer_time    + xfer_time
        total_xfer_size    = total_xfer_size    + test_size
        count              = count              + 1
        print ("total:{0:.3f}[msec] setup:{1:.3f}[msec] xfer:{2:.3f}[msec] cleanup:{3:.3f}[msec]".format(round(total_time*1000.0,3), round(setup_time*1000.0,3), round(xfer_time*1000.0,3), round(cleanup_time*1000.0,3)))

    print ("average_setup_time  :{0:.3f}".format(round((total_setup_time  /count)*1000.0,3)) + "[msec]")
    print ("average_cleanup_time:{0:.3f}".format(round((total_cleanup_time/count)*1000.0,3)) + "[msec]")
    print ("average_xfer_time   :{0:.3f}".format(round((total_xfer_time   /count)*1000.0,3)) + "[msec]")
    print ("throughput          :{0:.3f}".format(round(((total_xfer_size/total_xfer_time)/(1000*1000)),3)) + "[MByte/sec]")

    udmabuf4_negative_array = np.negative(udmabuf4_array)
    if np.array_equal(udmabuf4_negative_array, udmabuf5_array):
         print("np.negative(udmabuf4) == udmabuf5 : OK")
    else:
         print("np.negative(udmabuf4) == udmabuf5 : NG")
         count = 0
         for i in range(test_size):
             if udmabuf4_negative_array[i] != udmabuf5_array[i] :
                 count = count + 1
                 if count < 16:
                     print("udmabuf4_negative_array[0x{0:08X}] = 0x{1:08X} udmabuf5_array[0x{0:08X}] = 0x{2:08X}".format(i, udmabuf4_negative_array[i], udmabuf5_array[i]))
         print("NG Count:{0}".format(count))

honorpeter commented 5 years ago

@ikwzm Thanks ,I will try. BUT there were not PL DMA to Cooperative use with the udma buf.

ikwzm / udmabuf

How to Cooperative use udmabuf with PL DMA and RAM in vivado 2018.2 block design? #21