HewlettPackard / quartz

Quartz: A DRAM-based performance emulator for NVM
https://github.com/HewlettPackard/quartz
Other
159 stars 66 forks source link

Statistics showing 0 NVM accesses for a simple linked list code using pmalloc #16

Open dannyi96 opened 6 years ago

dannyi96 commented 6 years ago

I have used this sample code where I have used pmalloc for a linked list

#include<stdio.h>
#include<stdlib.h>
#include "pmalloc.h"

typedef struct node
{
    int data;
    struct node *next;
}NODE;

void insertAtFront(NODE **head,int x)
{
    NODE *new_node = (NODE*)pmalloc(sizeof(NODE));
    new_node->data = x;
    new_node->next = *head;
    *head = new_node;
}

void insertAfter(NODE *prev,int x)
{
    if(prev==NULL)
    {
        printf("prev can't be NULL\n");
        return;
    }
    NODE *new_node = (NODE*)pmalloc(sizeof(NODE));
    new_node->data = x;
    new_node->next = prev->next;
    prev->next = new_node;
}

void append(NODE **head,int x)
{
    NODE *new_node = (NODE*)pmalloc(sizeof(NODE));
    new_node->data = x;
    new_node->next = NULL;

    NODE *last = *head;
    if(*head==NULL)
    {
        *head = new_node;
        return;
    }
    while(last->next != NULL)
        last = last->next;
    last->next = new_node;
}

void printList(NODE *p)
{
    while(p)
    {
        printf("%d->",p->data);
        p = p->next;
    }
    printf("\n");
}

void deleteElement(NODE **p,int elem)
{
    NODE *temp=*p;
    NODE *prev;
    if(temp != NULL && temp->data == elem) // if elem is at first node
    {
        *p = temp->next;
        free(temp);
    }
    while(temp!=NULL && temp->data!=elem)
    {
        prev=temp;
        temp=temp->next;
    }
    if(temp==NULL) return; // no such element
    prev->next = temp->next;
    free(temp);
}

void deleteAtPosition(NODE **p,int pos)
{
    if(*p==NULL) return;
    NODE *temp = *p;
    if(pos==0)
    {
        *p = temp->next;
        free(temp);
        return;
    }
    int i;
    for(i=0;temp!=NULL && i<pos-1;i++)
        temp = temp->next;   // ultimately gets previous node of the node to be deleted
    if(temp==NULL || temp->next==NULL)
        return;
    NODE *next = temp->next->next;
    free(temp->next);
    temp->next = next;
}

int getLength(NODE *p)
{
    int count = 0;
    while(p)
    {
        count++;
        p = p->next;
    }
    return count;
}

int getLengthRecursive(NODE *p)
{
    if(p==NULL)
        return 0;
    return 1 + getLengthRecursive(p->next);

}

void swapNodes(NODE **p,int x, int y)  
{
    if(x==y) 
        return;

    NODE *prevX=NULL, *prevY=NULL,*X=*p,*Y=*p;

    while(X!=NULL && X->data != x)
    {
        prevX = X;
        X = X->next;
    }

    while(Y!=NULL && Y->data != y)
    {
        prevY = Y;
        Y = Y->next;
    }

    if(X==NULL || Y == NULL)
        return;

    if(prevX==NULL)
        *p = Y;
    else
        prevX->next = Y;

    if(prevY==NULL)
        *p = X;
    else
        prevY->next = X;

    NODE *temp = X->next;
    X->next = Y->next;
    Y->next = temp;

}

void reverse(NODE **p)
{
    NODE *prev=NULL,*curr=*p,*next;
    while(curr!=NULL)
    {
        next = curr->next;
        curr->next=prev;
        prev = curr;
        curr = next;
    }
    *p = prev;  
}

void reverseRecursive(NODE **p)
{
    NODE *node = *p;
    if(node == NULL)
        return;
    NODE *rest = (*p)->next;
    if(rest==NULL)
        return;
    reverseRecursive(&rest);
    node->next->next = node;
    node->next = NULL;
    *p = rest;
}

int main()
{
    NODE *head = NULL;
    append(&head,1);
    insertAtFront(&head,2);
    append(&head,3);
    insertAfter(head->next,10);
    printList(head);
    printf("Length: %d \n",getLength(head));
    printf("Length Recursive: %d \n", getLengthRecursive(head));
    //deleteElement(&head,1);
    printList(head);
    //deleteAtPosition(&head,1);
    printList(head);
    printf("Length: %d \n",getLength(head));
    printf("Length Recursive: %d \n", getLengthRecursive(head));
    swapNodes(&head,2,1);
    printList(head);
    reverse(&head);
    printList(head);
    reverseRecursive(&head);
    printList(head);
    return 0;
}

My current directory contents looks like this 1) plinkedlist.c 2) src < src directory of quartz > 3) scripts < scripts directory of quartz> 4) build < build file of quartz> 5) nvmemul.ini 6) nvmemul.dox 7) nvmemul-orig.ini 8) a.out < the program executable>

I have compiled the file using the following commands gcc -I src/lib/ plinkedlist.c -L build/src/lib/ -lnvmemul sudo scripts/setupdev.sh load scripts/runenv.sh ./a.out

I get the correct program output but in the statistics I get 0 NVM accesses, even though this is untrue.

Statistics Output:


===== STATISTICS (Thu Nov 23 22:22:17 2017) =====

PID: 18718
Initialization duration: 2136458 usec
Running threads: 0
Terminated threads: 1

== Running threads == 

== Terminated threads == 
    Thread id [18718]
        : cpu id: 0
        : spawn timestamp: 632629839714
        : termination timestamp: 632629839811
        : execution time: 97 usecs
        : stall cycles: 0
        : NVM accesses: 0
        : latency calculation overhead cycles: 0
        : injected delay cycles: 0
        : injected delay in usec: 0
        : longest epoch duration: 0 usec
        : shortest epoch duration: 0 usec
        : average epoch duration: 0 usec
        : number of epochs: 0
        : epochs which didn't reach min duration: 0
        : static epochs requested: 0

Is there any reason/mistake I'm making?

guimagalhaes commented 6 years ago

What is the size of an 'epoch' you are using? Make sure the program runs longer than the max epoch time. Please run your program in loop for some seconds and check the results again.

dannyi96 commented 6 years ago

Thanks for the response. I tried running the same code in loops for 2000 iterations and then it shows NVM accesses value in the statistics output as expected. I'm not sure of how exactly to check the current 'epoch' size? And is there a way to modify it so that even if there is 1 NVM access, it displays in the output.

guimagalhaes commented 6 years ago

You can change the epoch time in the nvmemul.ini file. However, very small epochs may lead to extra overhead and additional deviations in the modeling.

dannyi96 commented 6 years ago

Ok. Thanks a lot!

dannyi96 commented 6 years ago

Also, I wanted to address another issue. I've used this as sample code

#include<stdio.h>
#include<stdlib.h>
#include "pmalloc.h"

typedef struct node
{
    int data;
    struct node *next;
}NODE;

void dummyUse(int x)
{
    NODE *new_node = (NODE*)pmalloc(sizeof(NODE));
    new_node->data = x;
    new_node->next = NULL;
}

int main()
{
    int i;
    for(i=0;i<20000;i++)
        dummyUse(rand()%100);
    return 0;
}

But when I try running the code I get the wrong value of NVM accesses(it should be 20000, if I'm not mistaken). But I get only 169 NVM accesses

== Terminated threads == 
    Thread id [30300]
        : cpu id: 0
        : spawn timestamp: 766953101986
        : termination timestamp: 766953167483
        : execution time: 65497 usecs
        : stall cycles: 54152
        : NVM accesses: 169
        : latency calculation overhead cycles: 0
        : injected delay cycles: 278789
        : injected delay in usec: 132
        : longest epoch duration: 10051 usec
        : shortest epoch duration: 10010 usec
        : average epoch duration: 10026 usec
        : number of epochs: 6
        : epochs which didn't reach min duration: 0
        : static epochs requested: 6

Is there any reason for this?

Edit: Also when I unload and load the kernel module and then run, I get 658 NVM accesses shown in the statistics output

guimagalhaes commented 6 years ago

@danny311296 , this code example is not reading/loading any persistent memory allocated with pmalloc(). Some small amount of 'NVM' access may happen for architectural reasons, but if you want to see 2000 access or more, you have to randomically access/load/read the allocated 'new_node's. Also make sure for your code example that the compiler will not optmize out the memory accesses and make random memory access so the CPU pre-fetch will not hide the memory latency (preventing CPU stall).

dannyi96 commented 6 years ago

Ok. Thanks a lot for the help!

usergit19 commented 6 years ago

@danny311296 I've picked your first code to try it as an initiation to know how to use the emulator but I don't get the correct program output especially for NVM access, is it due to this : "tee: /sys/bus/event_source/devices/cpu/rdpmc: No file or folder of this type" ?

guimagalhaes commented 6 years ago

What linux kernel version are you using? Is the linux perf events enabled in the kernel? Did you try acessing that file and running the application as root?

usergit19 commented 6 years ago

@guimagalhaes I'm using version 4.4, I've tried to run the app as root but in vain and I didn't find that file. I'm working on a virtual machine Ubuntu 14.04, Intel(R) Xeon(R) CPU E3-1245 v3 @ 3.40GHz.

guimagalhaes commented 6 years ago

Please check the kernel has performane counters enabled.

usergit19 commented 6 years ago

@guimagalhaes In fact, the kernel has performance counters enabled.

Statistics Output:

== Terminated threads == Thread id [3582] : cpu id: 0 : spawn timestamp: 635334690 : termination timestamp: 679622253 : execution time: 44287563 usecs : stall cycles: 0 : NVM accesses: 0 : latency calculation overhead cycles: 1048035309 : injected delay cycles: 0 : injected delay in usec: 0 : longest epoch duration: 18604 usec : shortest epoch duration: 509 usec : average epoch duration: 1427 usec : number of epochs: 31020 : epochs which didn't reach min duration: 0 : static epochs requested: 31020

I don't really Understand why and I still have this : tee: /sys/bus/event_source/devices/cpu/rdpmc: No file or folder of this type" ?

guimagalhaes commented 6 years ago

Can you access '/sys/devices/cpu/rdpmc'?

usergit19 commented 6 years ago

@guimagalhaes No I can't.

guimagalhaes commented 6 years ago

Are you using a modified or official kernel version? Could you install the latest stable kernel version?

usergit19 commented 6 years ago

Yes, I've installed Linux 4.10.0-27-generic x86_64

guimagalhaes commented 6 years ago

I am using 4.16.11 and I don't see the problem. Please install 4.16 and retry.

ParkWANG commented 5 years ago

Yes, I've installed Linux 4.10.0-27-generic x86_64 Hello. I met the same problem with you (tee: /sys/bus/event_source/devices/cpu/rdpmc: No file or folder of this type" ). And our hardware configuration is same ( Intel(R) Xeon(R) CPU E3), I also tried the linux version 4.16.11, but it still didn't work. Have you fixed the problem. Please give me some help if possible.