open-quantum-safe / liboqs

C library for prototyping and experimenting with quantum-resistant cryptography
https://openquantumsafe.org/
Other
1.92k stars 466 forks source link

Multi-threaded performance testing issues #1859

Closed MMDHBZ closed 3 months ago

MMDHBZ commented 4 months ago

I wrote a multi-threaded test function myself, but when testing performance, the performance of multi-threading and single-threading are almost the same, and even multi-threading is slower than single-threading. I want to know why?

CPU:Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz OS:centos7

Compilation of liboqs is followed: mkdir build && cd build cmake -GNinja .. -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=../install -DOQS_USE_PTHREADS=ON ninja ninja install

The function that calls liboqs.so for performance testing is as follows:

include "oqs/kem_kyber.h"

include

include

include

include

include

include

include

define SLEEP sleep(1)

unsigned int isContinue = 0; unsigned int g_unTotalTimes[100]; unsigned int loop = 0;

/*****TEST_KYBER****/ void PerformanceTestStatFunc_Kyber512_Keygen(void* argv) { // int _g_unTotalTimes; int i; int times; clock_t ctStart, ctEnd;

time_t t0, t1;
t0 = time(0);
ctStart = clock();
while (isContinue)
{

    SLEEP;
    t1 = time(0);
    printf("once: test time:%d sec\n", times = t1 - t0);//times);
    _g_unTotalTimes = 0;
    for (i = 0; i < 100; i++)
    {
        //printf("%d %d\n", i, g_unTotalTimes[i]);
        _g_unTotalTimes += g_unTotalTimes[i];
    }
    printf("once: Kyber512_Keygen times:%d times\n", _g_unTotalTimes);
    printf("once: performance: %dTps\n\n", _g_unTotalTimes / times);
}
ctEnd = clock();
t1 = time(0);

times = (ctEnd - ctStart) / CLOCKS_PER_SEC / loop;
printf("test time:%d sec\n", t1 - t0);//times);

_g_unTotalTimes = 0;
for (i = 0; i < 100; i++)
{
    //printf("%d %d\n", i, g_unTotalTimes[i]);
    _g_unTotalTimes += g_unTotalTimes[i];
}
printf("Kyber512_Keygen times:%d times\n", _g_unTotalTimes);
printf("performance: %dTps\n", _g_unTotalTimes / times);

}

int myThread_Kyber512_Keygen(void* argv) { unsigned char public_key[OQS_KEM_kyber_512_length_public_key]; unsigned char secret_key[OQS_KEM_kyber_512_length_secret_key]; unsigned char ciphertext[OQS_KEM_kyber_512_length_ciphertext]; unsigned char shared_secreta[OQS_KEM_kyber_512_length_shared_secret]; unsigned char shared_secretb[OQS_KEM_kyber_512_length_shared_secret]; int ret = 5, i;

OQS_KEM_new("Kyber512");
ret = OQS_KEM_kyber_512_keypair(public_key, secret_key);
ret = OQS_KEM_kyber_512_encaps(ciphertext, shared_secreta, public_key);
ret = OQS_KEM_kyber_512_decaps(shared_secretb, ciphertext, secret_key);
if(ret !=0 || memcmp(shared_secreta, shared_secretb, OQS_KEM_kyber_512_length_shared_secret) !=0)
{printf("\n OQS_KEM_kyber_512 failed\n");}
else{printf("\n OQS_KEM_kyber_512 sucess\n");}

while (isContinue)
{
    ret = OQS_KEM_kyber_512_keypair(public_key, secret_key);
    if (ret != 0)
    {
        printf("OQS_KEM_kyber_512_keypair: ERROR");
        break;
        return 0;
    }
    (*(int*)argv)++;

}
return 1;

}

void PerformanceTestStatFunc_Kyber512_Enc(void* argv) { // int _g_unTotalTimes; int i; int times; clock_t ctStart, ctEnd;

time_t t0, t1;
t0 = time(0);
ctStart = clock();
while (isContinue)
{

    SLEEP;
    t1 = time(0);
    printf("once: test time:%d sec\n", times = t1 - t0);//times);
    _g_unTotalTimes = 0;
    for (i = 0; i < 100; i++)
    {
        //printf("%d %d\n", i, g_unTotalTimes[i]);
        _g_unTotalTimes += g_unTotalTimes[i];
    }
    printf("once: Kyber512_Enc times:%d times\n", _g_unTotalTimes);
    printf("once: performance: %dTps\n\n", _g_unTotalTimes / times);
}
ctEnd = clock();
t1 = time(0);

times = (ctEnd - ctStart) / CLOCKS_PER_SEC / loop;
printf("test time:%d sec\n", t1 - t0);//times);

_g_unTotalTimes = 0;
for (i = 0; i < 100; i++)
{
    //printf("%d %d\n", i, g_unTotalTimes[i]);
    _g_unTotalTimes += g_unTotalTimes[i];
}
printf("Kyber512_Enc times:%d times\n", _g_unTotalTimes);
printf("performance: %dTps\n", _g_unTotalTimes / times);

}

int myThread_Kyber512_Enc(void* argv) {

unsigned char  public_key[OQS_KEM_kyber_512_length_public_key];
unsigned char secret_key[OQS_KEM_kyber_512_length_secret_key];
unsigned char ciphertext[OQS_KEM_kyber_512_length_ciphertext];
unsigned char shared_secreta[OQS_KEM_kyber_512_length_shared_secret];
unsigned char shared_secretb[OQS_KEM_kyber_512_length_shared_secret];
int ret = 5, i;

OQS_KEM_new("Kyber512");
ret = OQS_KEM_kyber_512_keypair(public_key, secret_key);
ret = OQS_KEM_kyber_512_encaps(ciphertext, shared_secreta, public_key);
ret = OQS_KEM_kyber_512_decaps(shared_secretb, ciphertext, secret_key);
if(ret !=0 || memcmp(shared_secreta, shared_secretb, OQS_KEM_kyber_512_length_shared_secret) !=0)
{printf("\n OQS_KEM_kyber_512 failed\n");}
else{printf("\n OQS_KEM_kyber_512 sucess\n");}

while (isContinue)
{
    ret =  OQS_KEM_kyber_512_encaps(ciphertext, shared_secreta, public_key);
    if (ret != 0)
    {
        printf("Kyber512_Enc: ERROR");
        break;
        return 0;
    }
    (*(int*)argv)++;

}
return 1;

}

void test_speed() {

ifndef WIN32

pthread_t ntid;

endif

int i;
int ID;

isContinue = 1;
getchar();
printf("\n please scanf loops (threads'number)....\n");
scanf("%d", &loop);
printf("loops:%d\n", loop);
printf("\n Choosed ID....\n");
printf("\n0:Kyber512_Keygen\n");
printf("\n1:Kyber512_Enc\n");

printf("\n please scanf ID....\n");
scanf("%d", &ID);
printf("ID:%d\n", ID);
if (ID == 0)
{

ifdef WIN32

    _beginthread(PerformanceTestStatFunc_Kyber512_Keygen, 0, NULL); //性能统计线程
    for (i = 0; i < loop; i++)
    {

        _beginthread(myThread_Kyber512_Keygen, 0, g_unTotalTimes + i);

        //SLEEP(10);    
    }

else

    // struct params t_params[100] ;
    pthread_create(&ntid, NULL, (void*)PerformanceTestStatFunc_Kyber512_Keygen, NULL);

    for (i = 0; i < loop; i++)
    {   
        // t_params[i].g_unTotalTimes = g_unTotalTimes + i;
        // t_params[i].p = 4;
        pthread_create(&ntid, NULL, (void*)myThread_Kyber512_Keygen, g_unTotalTimes + i);
        // pthread_create(&ntid, NULL, (void*)myThread_Kyber512_Keygen, t_params + i);
        //SLEEP(10);    
    }

endif

    printf("testing....press any key to stop...\n");
    getchar();
    getchar();
    isContinue = 0;
    getchar();
    puts("press CTL+C to exit");
    SLEEP;
}

if (ID == 1)
{

ifdef WIN32

    _beginthread(PerformanceTestStatFunc_Kyber512_Keygen, 0, NULL); //性能统计线程
    for (i = 0; i < loop; i++)
    {

        _beginthread(myThread_Kyber512_Keygen, 0, g_unTotalTimes + i);

        //SLEEP(10);    
    }

else

    pthread_create(&ntid, NULL, (void*)PerformanceTestStatFunc_Kyber512_Keygen, NULL);

    for (i = 0; i < loop; i++)
    {
        pthread_create(&ntid, NULL, (void*)myThread_Kyber512_Keygen, g_unTotalTimes + i);
        //SLEEP(10);    
    }

endif

    printf("testing....press any key to stop...\n");
    getchar();
    getchar();
    isContinue = 0;
    getchar();
    puts("press CTL+C to exit");
    SLEEP;
}

}

void main() { test_speed(); }

cothan commented 4 months ago

First of all, you clearly didn't put effort to format your code properly. It's hard to read. I stop reading after the Cmake command. Instead of posting the code and ask community to "hey, figure out my code", can you briefly explain what are the numbers you got? And the numbers from A are slower compare to B, and can you show the numbers in B as well?

MMDHBZ commented 4 months ago

Sorry, I'm just trying to figure out the performance of calling the dynamic library to test kyber, why multi-threaded and single-threaded performance is about the same, measured in Tps.

The following is a screenshot of a single-threaded test 单线程

Below is a screenshot of the 12-thread test 12线程

baentsch commented 3 months ago

Mirroring @cothan 's comments the above is not concise enough to judge whether there is a problem with OQS or your code @MMDHBZ . In general, multi-threaded code is notoriously difficult to understand and debug, so best would be for you to dig deeper and analyze where (e.g., in which function) there is resource contention (as seems to be the case as you don't see any speedup). Also, please note that not everyone understands Chinese (comments).

baentsch commented 3 months ago

Closing until further input received.