Closed josephaug26 closed 1 week ago
Requirement:
Solution : Mercury (https://github.com/mercury-hpc/mercury)
cc: @paul356 @lpabon
Reference:
Tried on Rock Linux 8.6
Install libfabric:
git clone https://github.com/ofiwg/libfabric.git
cd libfabric
git submodule update --init
sudo ./autogen.sh
sudo ./configure
sudo make -j 32
sudo make install
Install Mercury
git clone https://github.com/mercury-hpc/mercury.git
git submodule update --init
mkdir build
cd build
ccmake ..
#
have this configuration... press c
BUILD_DOCUMENTATION OFF
BUILD_EXAMPLES OFF
BUILD_SHARED_LIBS ON
BUILD_TESTING ON
CMAKE_ARCHIVE_OUTPUT_DIRECTORY /root/code/mercury/build/bin
CMAKE_BUILD_TYPE RelWithDebInfo
CMAKE_INSTALL_PREFIX /usr/local
CMAKE_LIBRARY_OUTPUT_DIRECTORY /root/code/mercury/build/bin
CMAKE_RUNTIME_OUTPUT_DIRECTORY /root/code/mercury/build/bin
MCHECKSUM_ENABLE_DEBUG OFF
MCHECKSUM_USE_ISAL OFF
MCHECKSUM_USE_SSE4_2 ON
MCHECKSUM_USE_ZLIB OFF
MERCURY_ENABLE_COVERAGE OFF
MERCURY_ENABLE_DEBUG ON
MERCURY_USE_BOOST_PP ON
MERCURY_USE_CHECKSUMS ON
MERCURY_USE_SYSTEM_BOOST OFF
MERCURY_USE_SYSTEM_MCHECKSUM OFF
MERCURY_USE_XDR OFF
NA_USE_BMI OFF
NA_USE_DYNAMIC_PLUGINS OFF
NA_USE_MPI OFF
NA_USE_OFI ON
NA_USE_PSM OFF
NA_USE_PSM2 OFF
NA_USE_SM ON
NA_USE_UCX OFF
json-c_DIR json-c_DIR-NOTFOUND
Once the configuration is set .. press g
#
sudo make -j 32
sudo make install
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
Disable Firewall
systemctl stop firewalld
systemctl status firewalld
now create a test folder outside .. say /root/code/test/
server.c
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <mercury.h>
static hg_class_t* hg_class = NULL; /* the mercury class */
static hg_context_t* hg_context = NULL; /* the mercury context */
/* after serving this number of rpcs, the server will shut down. */
static const int TOTAL_RPCS = 10;
/* number of RPCS already received. */
static int num_rpcs = 0;
/*
* hello_world function to expose as an RPC.
* This function just prints "Hello World"
* and increment the num_rpcs variable.
*
* All Mercury RPCs must have a signature
* hg_return_t f(hg_handle_t h)
*/
hg_return_t hello_world(hg_handle_t h);
/*
* main function.
*/
int main(int argc, char** argv)
{
hg_return_t ret;
if(argc != 2) {
printf("Usage: %s <protocol>\n", argv[0]);
exit(0);
}
hg_class = HG_Init(argv[1], HG_TRUE);
assert(hg_class != NULL);
char hostname[128];
hg_size_t hostname_size = 128;
hg_addr_t self_addr;
HG_Addr_self(hg_class, &self_addr);
HG_Addr_to_string(hg_class, hostname, &hostname_size, self_addr);
printf("Server running at address %s\n",hostname);
HG_Addr_free(hg_class, self_addr);
hg_context = HG_Context_create(hg_class);
assert(hg_context != NULL);
/* Register the RPC by its name ("hello").
* The two NULL arguments correspond to the functions user to
* serialize/deserialize the input and output parameters
* (hello_world doesn't have parameters and doesn't return anything, hence NULL).
*/
hg_id_t rpc_id = HG_Register_name(hg_class, "hello", NULL, NULL, hello_world);
/* We call this function to tell Mercury that hello_world will not
* send any response back to the client.
*/
HG_Registered_disable_response(hg_class, rpc_id, HG_TRUE);
do
{
unsigned int count;
do {
ret = HG_Trigger(hg_context, 0, 1, &count);
} while((ret == HG_SUCCESS) && count);
HG_Progress(hg_context, 100);
} while(num_rpcs < TOTAL_RPCS);
/* Exit the loop if we have reached the given number of RPCs. */
ret = HG_Context_destroy(hg_context);
assert(ret == HG_SUCCESS);
ret = HG_Finalize(hg_class);
assert(ret == HG_SUCCESS);
return 0;
}
/* Implementation of the hello_world RPC. */
hg_return_t hello_world(hg_handle_t h)
{
hg_return_t ret;
printf("Hello World!\n");
num_rpcs += 1;
/* We are not going to use the handle anymore, so we should destroy it. */
ret = HG_Destroy(h);
assert(ret == HG_SUCCESS);
return HG_SUCCESS;
}
client.c
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <mercury.h>
static hg_class_t* hg_class = NULL; /* Pointer to the Mercury class */
static hg_context_t* hg_context = NULL; /* Pointer to the Mercury context */
static hg_id_t hello_rpc_id; /* ID of the RPC */
static int completed = 0; /* Variable indicating if the call has completed */
/*
* This callback will be called after looking up for the server's address.
* This is the function that will also send the RPC to the servers, then
* set the completed variable to 1.
*/
hg_return_t lookup_callback(const struct hg_cb_info *callback_info);
int main(int argc, char** argv)
{
hg_return_t ret;
if(argc != 3) {
printf("Usage: %s <protocol> <server_address>\n",argv[0]);
printf("Example: %s tcp ofi+tcp://1.2.3.4:1234\n",argv[0]);
exit(0);
}
char* protocol = argv[1];
char* server_address = argv[2];
hg_class = HG_Init(protocol, HG_FALSE);
assert(hg_class != NULL);
hg_context = HG_Context_create(hg_class);
assert(hg_context != NULL);
/* Register a RPC function.
* The first two NULL correspond to what would be pointers to
* serialization/deserialization functions for input and output datatypes
* (not used in this example).
* The third NULL is the pointer to the function (which is on the server,
* so NULL here on the client).
*/
hello_rpc_id = HG_Register_name(hg_class, "hello", NULL, NULL, NULL);
/* Indicate Mercury that we shouldn't expect a response from the server
* when calling this RPC.
*/
HG_Registered_disable_response(hg_class, hello_rpc_id, HG_TRUE);
/* Lookup the address of the server, this is asynchronous and
* the result will be handled by lookup_callback once we start the progress loop.
* NULL correspond to a pointer to user data to pass to lookup_callback (we don't use
* any here). The 4th argument is the address of the server.
* The 5th argument is a pointer a variable of type hg_op_id_t, which identifies the operation.
* It can be useful to get this identifier if we want to be able to cancel it using
* HG_Cancel. Here we don't use it so we pass HG_OP_ID_IGNORE.
*/
ret = HG_Addr_lookup(hg_context, lookup_callback, NULL, server_address, HG_OP_ID_IGNORE);
/* Main event loop: we do some progress until completed becomes TRUE. */
while(!completed)
{
unsigned int count;
do {
ret = HG_Trigger(hg_context, 0, 1, &count);
} while((ret == HG_SUCCESS) && count && !completed);
HG_Progress(hg_context, 100);
}
ret = HG_Context_destroy(hg_context);
printf("HG_Finalize : %d \n", ret);
assert(ret == HG_SUCCESS);
/* Finalize the hg_class. */
hg_return_t err = HG_Finalize(hg_class);
printf("HG_Finalize : %d \n", err);
//assert(err == HG_SUCCESS);
return 0;
}
/*
* This function is called when the address lookup operation has completed.
*/
hg_return_t lookup_callback(const struct hg_cb_info *callback_info)
{
hg_return_t ret;
/* First, check that the lookup went fine. */
assert(callback_info->ret == 0);
/* Get the address of the server. */
hg_addr_t addr = callback_info->info.lookup.addr;
/* Create a call to the hello_world RPC. */
hg_handle_t handle;
ret = HG_Create(hg_context, addr, hello_rpc_id, &handle);
assert(ret == HG_SUCCESS);
/* Send the RPC. The first NULL correspond to the callback
* function to call when receiving the response from the server
* (we don't expect a response, hence NULL here).
* The second NULL is a pointer to user-specified data that will
* be passed to the response callback.
* The third NULL is a pointer to the RPC's argument (we don't
* use any here).
*/
ret = HG_Forward(handle, NULL, NULL, NULL);
assert(ret == HG_SUCCESS);
/* Free the handle */
ret = HG_Destroy(handle);
assert(ret == HG_SUCCESS);
/* Set completed to 1 so we terminate the loop. */
completed = 1;
return HG_SUCCESS;
}
Build test:
g++ server.c -o server -lmercury
g++ client.c -o client -lmercury
Run server:
[root@localhost mercury-test]# ./server tcp
Server running at address ofi+tcp://172.16.204.176:44049
Run Client:
[root@localhost mercury-test]# ./client tcp ofi+tcp://172.16.204.176:44049
HG_Finalize : 0
HG_Finalize : 8
[root@localhost mercury-test]#
Server output:
[root@localhost mercury-test]# ./server tcp
Server running at address ofi+tcp://172.16.204.176:44049
Hello World!
Next step is to interface mercury with RUST.
https://medium.com/@aidagetoeva/async-c-rust-interoperability-39ece4cd3dcf
Investigation done.
The intention is to identify a RPC framework in RUST for,
For now, we are fine if it works on TCP (to start with)