chel-data / chel-fs

GNU General Public License v3.0
0 stars 1 forks source link

Investigate on RPC Frameworks in RUST #22

Closed josephaug26 closed 1 week ago

josephaug26 commented 2 weeks ago

The intention is to identify a RPC framework in RUST for,

  1. libchelfs <-> chelfs-mds
  2. chelfs-mds <-> chelfs-mds

For now, we are fine if it works on TCP (to start with)

josephaug26 commented 1 week ago

Requirement:

  1. We need a RPC which can support on both TCP and RDMA and provides an abstraction from the bellow details.
  2. Is build directly on Transport layer of the network stack rather than application layer (like grpc)
  3. Good to have : Aligned to DAOS DATA Path network - CART

Solution : Mercury (https://github.com/mercury-hpc/mercury)

  1. Works on both TCP and RDMA
  2. Provides an abstraction api which hides the details on handling the transport layer.
  3. Unlike GRPC is build directly on the transport layer of the network stack
  4. Aligns with DAOS's CART RPC, which also uses Mercury under the sheets.

cc: @paul356 @lpabon

Reference:

  1. https://github.com/mercury-hpc/mercury
  2. https://github.com/ofiwg/libfabric
  3. https://github.com/pdlfs/deltafs
  4. https://github.com/pdlfs/deltafs/issues/8
  5. https://mochi.readthedocs.io/en/latest/mercury/02_hello.html
  6. What is Mochi : https://github.com/mochi-hpc/
josephaug26 commented 1 week ago

Tried on Rock Linux 8.6

Install libfabric:

git clone https://github.com/ofiwg/libfabric.git
cd libfabric
git submodule update --init
sudo ./autogen.sh
sudo ./configure
sudo make -j 32
sudo make install

Install Mercury

git clone https://github.com/mercury-hpc/mercury.git
git submodule update --init
mkdir build
cd build
ccmake ..
#
have this configuration... press c

 BUILD_DOCUMENTATION              OFF
 BUILD_EXAMPLES                   OFF
 BUILD_SHARED_LIBS                ON
 BUILD_TESTING                    ON
 CMAKE_ARCHIVE_OUTPUT_DIRECTORY   /root/code/mercury/build/bin
 CMAKE_BUILD_TYPE                 RelWithDebInfo
 CMAKE_INSTALL_PREFIX             /usr/local
 CMAKE_LIBRARY_OUTPUT_DIRECTORY   /root/code/mercury/build/bin
 CMAKE_RUNTIME_OUTPUT_DIRECTORY   /root/code/mercury/build/bin
 MCHECKSUM_ENABLE_DEBUG           OFF
 MCHECKSUM_USE_ISAL               OFF
 MCHECKSUM_USE_SSE4_2             ON
 MCHECKSUM_USE_ZLIB               OFF
 MERCURY_ENABLE_COVERAGE          OFF
  MERCURY_ENABLE_DEBUG             ON
 MERCURY_USE_BOOST_PP             ON
 MERCURY_USE_CHECKSUMS            ON
 MERCURY_USE_SYSTEM_BOOST         OFF
 MERCURY_USE_SYSTEM_MCHECKSUM     OFF
 MERCURY_USE_XDR                  OFF
 NA_USE_BMI                       OFF
 NA_USE_DYNAMIC_PLUGINS           OFF
 NA_USE_MPI                       OFF
 NA_USE_OFI                       ON
 NA_USE_PSM                       OFF
 NA_USE_PSM2                      OFF
 NA_USE_SM                        ON
 NA_USE_UCX                       OFF
 json-c_DIR                       json-c_DIR-NOTFOUND

 Once the configuration is set .. press g
#
sudo make -j 32
sudo make install
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib

Disable Firewall

systemctl stop firewalld
systemctl status firewalld

now create a test folder outside .. say /root/code/test/

server.c


#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <mercury.h>

static hg_class_t*     hg_class   = NULL; /* the mercury class */
static hg_context_t*   hg_context = NULL; /* the mercury context */

/* after serving this number of rpcs, the server will shut down. */
static const int TOTAL_RPCS = 10;
/* number of RPCS already received. */
static int num_rpcs = 0;

/* 
 * hello_world function to expose as an RPC.
 * This function just prints "Hello World"
 * and increment the num_rpcs variable.
 *
 * All Mercury RPCs must have a signature
 *   hg_return_t f(hg_handle_t h)
 */
hg_return_t hello_world(hg_handle_t h);

/*
 * main function.
 */
int main(int argc, char** argv)
{
    hg_return_t ret;

    if(argc != 2) {
        printf("Usage: %s <protocol>\n", argv[0]);
        exit(0);
    }

    hg_class = HG_Init(argv[1], HG_TRUE);
    assert(hg_class != NULL);

    char hostname[128];
    hg_size_t hostname_size = 128;
    hg_addr_t self_addr;
    HG_Addr_self(hg_class, &self_addr);
    HG_Addr_to_string(hg_class, hostname, &hostname_size, self_addr);
    printf("Server running at address %s\n",hostname);
    HG_Addr_free(hg_class, self_addr);

    hg_context = HG_Context_create(hg_class);
    assert(hg_context != NULL);

    /* Register the RPC by its name ("hello").
     * The two NULL arguments correspond to the functions user to
     * serialize/deserialize the input and output parameters
     * (hello_world doesn't have parameters and doesn't return anything, hence NULL).
     */
    hg_id_t rpc_id = HG_Register_name(hg_class, "hello", NULL, NULL, hello_world);

    /* We call this function to tell Mercury that hello_world will not
     * send any response back to the client.
     */
    HG_Registered_disable_response(hg_class, rpc_id, HG_TRUE);

    do
    {
        unsigned int count;
        do {
            ret = HG_Trigger(hg_context, 0, 1, &count);
        } while((ret == HG_SUCCESS) && count);
        HG_Progress(hg_context, 100);
    } while(num_rpcs < TOTAL_RPCS);
    /* Exit the loop if we have reached the given number of RPCs. */

    ret = HG_Context_destroy(hg_context);
    assert(ret == HG_SUCCESS);

    ret = HG_Finalize(hg_class);
    assert(ret == HG_SUCCESS);

    return 0;
}

/* Implementation of the hello_world RPC. */
hg_return_t hello_world(hg_handle_t h)
{
    hg_return_t ret;

    printf("Hello World!\n");
    num_rpcs += 1;
    /* We are not going to use the handle anymore, so we should destroy it. */
    ret = HG_Destroy(h);
    assert(ret == HG_SUCCESS);
    return HG_SUCCESS;
}

client.c

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <mercury.h>

static hg_class_t*     hg_class   = NULL; /* Pointer to the Mercury class */
static hg_context_t*   hg_context = NULL; /* Pointer to the Mercury context */
static hg_id_t         hello_rpc_id;      /* ID of the RPC */
static int completed = 0;                 /* Variable indicating if the call has completed */

/*
 * This callback will be called after looking up for the server's address.
 * This is the function that will also send the RPC to the servers, then
 * set the completed variable to 1.
 */
hg_return_t lookup_callback(const struct hg_cb_info *callback_info);

int main(int argc, char** argv)
{
    hg_return_t ret;

    if(argc != 3) {
        printf("Usage: %s <protocol> <server_address>\n",argv[0]);
        printf("Example: %s tcp ofi+tcp://1.2.3.4:1234\n",argv[0]);
        exit(0);
    }

    char* protocol = argv[1];
    char* server_address = argv[2];

    hg_class = HG_Init(protocol, HG_FALSE);
    assert(hg_class != NULL);

    hg_context = HG_Context_create(hg_class);
    assert(hg_context != NULL);

    /* Register a RPC function.
     * The first two NULL correspond to what would be pointers to
     * serialization/deserialization functions for input and output datatypes
     * (not used in this example).
     * The third NULL is the pointer to the function (which is on the server,
     * so NULL here on the client).
     */
    hello_rpc_id = HG_Register_name(hg_class, "hello", NULL, NULL, NULL);

    /* Indicate Mercury that we shouldn't expect a response from the server
     * when calling this RPC.
     */
    HG_Registered_disable_response(hg_class, hello_rpc_id, HG_TRUE);

    /* Lookup the address of the server, this is asynchronous and
     * the result will be handled by lookup_callback once we start the progress loop.
     * NULL correspond to a pointer to user data to pass to lookup_callback (we don't use
     * any here). The 4th argument is the address of the server.
     * The 5th argument is a pointer a variable of type hg_op_id_t, which identifies the operation.
     * It can be useful to get this identifier if we want to be able to cancel it using
     * HG_Cancel. Here we don't use it so we pass HG_OP_ID_IGNORE.
     */
    ret = HG_Addr_lookup(hg_context, lookup_callback, NULL, server_address, HG_OP_ID_IGNORE);

    /* Main event loop: we do some progress until completed becomes TRUE. */
    while(!completed)
    {
        unsigned int count;
        do {
            ret = HG_Trigger(hg_context, 0, 1, &count);
        } while((ret == HG_SUCCESS) && count && !completed);
        HG_Progress(hg_context, 100);
    }

    ret = HG_Context_destroy(hg_context);
    printf("HG_Finalize : %d \n", ret);
    assert(ret == HG_SUCCESS);

    /* Finalize the hg_class. */
    hg_return_t err = HG_Finalize(hg_class);
    printf("HG_Finalize : %d \n", err);
    //assert(err == HG_SUCCESS);
    return 0;
}

/*
 * This function is called when the address lookup operation has completed.
 */
hg_return_t lookup_callback(const struct hg_cb_info *callback_info)
{
    hg_return_t ret;

    /* First, check that the lookup went fine. */
    assert(callback_info->ret == 0);

    /* Get the address of the server. */
    hg_addr_t addr = callback_info->info.lookup.addr;

    /* Create a call to the hello_world RPC. */
    hg_handle_t handle;
    ret = HG_Create(hg_context, addr, hello_rpc_id, &handle);
    assert(ret == HG_SUCCESS);

    /* Send the RPC. The first NULL correspond to the callback
     * function to call when receiving the response from the server
     * (we don't expect a response, hence NULL here).
     * The second NULL is a pointer to user-specified data that will
     * be passed to the response callback.
     * The third NULL is a pointer to the RPC's argument (we don't
     * use any here).
     */
    ret = HG_Forward(handle, NULL, NULL, NULL);
    assert(ret == HG_SUCCESS);

    /* Free the handle */
    ret = HG_Destroy(handle);
    assert(ret == HG_SUCCESS);

    /* Set completed to 1 so we terminate the loop. */
    completed = 1;
    return HG_SUCCESS;
}

Build test:

g++ server.c -o server -lmercury
g++ client.c -o client -lmercury

Run server:

[root@localhost mercury-test]# ./server tcp
Server running at address ofi+tcp://172.16.204.176:44049

Run Client:

[root@localhost mercury-test]# ./client tcp  ofi+tcp://172.16.204.176:44049
HG_Finalize : 0 
HG_Finalize : 8 
[root@localhost mercury-test]# 

Server output:


[root@localhost mercury-test]# ./server tcp
Server running at address ofi+tcp://172.16.204.176:44049
Hello World!
josephaug26 commented 1 week ago

Next step is to interface mercury with RUST.

https://medium.com/@aidagetoeva/async-c-rust-interoperability-39ece4cd3dcf

josephaug26 commented 1 week ago

Investigation done.