dmlc / treelite

Universal model exchange and serialization format for decision tree forests
https://treelite.readthedocs.io/en/latest/
Apache License 2.0
738 stars 100 forks source link

Segmentation fault on predictor load with C runtime API #19

Closed aivarasbaranauskas closed 6 years ago

aivarasbaranauskas commented 6 years ago

Treelite version: 0.31 Compiler: gcc-7 (Homebrew GCC 7.3.0_1) 7.3.0 Cmake version: 3.10.3 Environment: OS X 10.13.3 high sierra

Using Treelite through C API throws segmentation fault on TreelitePredictorLoad. Worked fine on 0.3.

hcho3 commented 6 years ago

Thanks for the report. Could you post some logs or scripts to help us diagnose the issue? Do keep in mind that the C API for Predictor has changed significantly for 0.31.

hcho3 commented 6 years ago

Also, the latest version uses a custom implementation of thread pool for Predictor. Is your application do something unusual that interferes with multithreading, e.g. forking the process in the middle?

aivarasbaranauskas commented 6 years ago

Yes, I am aware of changes, code was updated. Also tried on VM: Ubuntu 16.04; gcc-7 (Ubuntu 7.1.0-10ubuntu1~16.04.york0) 7.1.0; cmake 3.5.1. Backtrace from GDB in VM (antivirus kills gdb on OS X...):

#0  0x0000000000000000 in ?? ()
#1  0x00007ffff7bbeb81 in dmlc::CustomLogMessage::~CustomLogMessage() () from /usr/lib/libtreelite_runtime.so
#2  0x00007ffff7bbf21d in dmlc::LogMessageFatal::~LogMessageFatal() () from /usr/lib/libtreelite_runtime.so
#3  0x00007ffff7bca2b9 in treelite::Predictor::Load(char const*) () from /usr/lib/libtreelite_runtime.so
#4  0x00007ffff7bbd99f in TreelitePredictorLoad () from /usr/lib/libtreelite_runtime.so
#5  0x0000000000400a09 in main (argc=1, argv=0x7fffffffe5c8) at main.c:9

Edit: Application do not interferes with threads.

aivarasbaranauskas commented 6 years ago

Applications is pretty basic, built to benchmark performance with our model.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <c_api_runtime.h>

int main(int argc, char **argv) {
    PredictorHandle predictor;
    if (TreelitePredictorLoad("model/predictor.so", 0, 1, predictor) == -1) {
        printf("Predictor load failed");
        return -1;
    }

    int lines = 1000;
    int features = 374;

    //Open and load raw data from file
    FILE *fstream = fopen("input_data_no_header.csv","r");
    if (fstream == NULL)
    {
        printf("\n file opening failed ");
        return -1 ;
    }

    float *data = (float *)malloc(lines * features * sizeof(float));
    char *line = NULL;
    char *pt;
    size_t len = 0;
    ssize_t read;
    int x = 0;
    int y = 0;

    while ((read = getline(&line, &len, fstream)) != -1 && x < lines) {
        y = 0;
        pt = strtok(line, ",");
        while (pt != NULL) {
            data[x * features + y] = (float)atof(pt);
            pt = strtok (NULL, ",");
            y++;
        }
        x++;
    }

    printf("File read.\n\n");

    DenseBatchHandle batch;
    if (TreeliteAssembleDenseBatch(data, 0, (size_t)lines, (size_t)features, batch) == -1) {
        printf("Creating batch failed");
        return -1;
    }

    float* predicitons = (float*)malloc(lines * sizeof(float));
    float size;
    int reps = 100;

    clock_t start = clock();
    for (int i=0;i<reps;i++) {
        if (TreelitePredictorPredictBatch(predictor, batch, 0, 1, 1, predicitons, (size_t*)&size) == -1) {
            printf("Prediction failed");
            return -1;
        }
    }
    clock_t end = clock();
    float diff = ((float)(end - start) / 1000000.0F ) * 1000;   
    printf("Duration %f\n", diff/reps);
}

Compile command: gcc-7 main.c -o main -Ltreelite/lib -ltreelite_runtime -Itreelite/include/treelite

aivarasbaranauskas commented 6 years ago

Actual backtrace:

#0  0x0000000000000000 in ?? ()
#1  0x00007ffff7bcf03d in dmlc::CustomLogMessage::Log (msg="[19:01:08] /srv/treelite/dmlc-core/include/dmlc/logging.h:308: [19:01:08] /srv/treelite/src/thread_pool/thread_pool.h:30: Check failed: num_worker_ > 0 && num_worker_ + 1 <= std::thread::hardware_conc"...)
    at /srv/treelite/src/logging.cc:16
#2  0x00007ffff7bc3cf8 in dmlc::CustomLogMessage::~CustomLogMessage (this=0x7fffffffde70, __in_chrg=<optimized out>) at /srv/treelite/dmlc-core/include/dmlc/logging.h:240
#3  0x00007ffff7bc3fe7 in dmlc::LogMessageFatal::~LogMessageFatal (this=0x7fffffffe050, __in_chrg=<optimized out>) at /srv/treelite/dmlc-core/include/dmlc/logging.h:308
#4  0x00007ffff7bc6025 in treelite::ThreadPool<(anonymous namespace)::InputToken, (anonymous namespace)::OutputToken, treelite::Predictor>::ThreadPool (this=0x615290, num_worker=0, context=0x614c20,
    task=0x7ffff7bc45ef <treelite::Predictor::<lambda(SpscQueue<(anonymous namespace)::InputToken>*, SpscQueue<(anonymous namespace)::OutputToken>*, const treelite::Predictor*)>::_FUN(SpscQueue<(anonymous namespace)::InputToken> *, SpscQueue<(anonymous namespace)::OutputToken> *, const treelite::Predictor *)>) at /srv/treelite/src/thread_pool/thread_pool.h:29
#5  0x00007ffff7bc4adb in treelite::Predictor::Load (this=0x614c20, name=0x400d18 "model/predictor.so") at /srv/treelite/src/predictor.cc:276
#6  0x00007ffff7bc3804 in TreelitePredictorLoad (library_path=0x400d18 "model/predictor.so", num_worker_thread=0, include_master_thread=1, out=0x0) at /srv/treelite/src/c_api/c_api_runtime.cc:79
#7  0x0000000000400a09 in main (argc=1, argv=0x7fffffffe5c8) at main.c:9

P.S. Learning on doing...

aivarasbaranauskas commented 6 years ago

Works now, closing the issue. Thanks!

hcho3 commented 6 years ago

That's good to hear. Just for your reference, I've modified your code to get it to work:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <treelite/c_api.h>
#include <treelite/c_api_runtime.h>

#define CHECK_CALL(x) if ( (x) == -1 ) { fprintf(stderr, "%s", #x); exit(-1); }

int main(void) {
  PredictorHandle predictor;
  CHECK_CALL(TreelitePredictorLoad("model/predictor.so", -1, 1, &predictor));

  // Open CSV file and assemble a dense patch
  FILE* fstream = fopen("input_data_no_header.csv", "r");
  if (fstream == NULL) {
    printf("file opening failed\n");
    return -1;
  }

  int lines = 1000;
  int features = 374;
  float* data = (float*)malloc(lines * features * sizeof(float));
  char* line = NULL;
  char* pt;
  size_t len = 0;
  ssize_t read;
  int x = 0;
  int y = 0;

  while ((read = getline(&line, &len, fstream)) != -1 && x < lines) {
    y = 0;
    pt = strtok(line, ",");
    while (pt != NULL) {
      data[x * features + y] = (float)atof(pt);
      pt = strtok(NULL, ",");
      y++;
    }
    x++;
  }

  printf("File read.\n\n");

  DenseBatchHandle batch;
  CHECK_CALL(TreeliteAssembleDenseBatch(data, 0, (size_t)lines,
                                        (size_t)features, &batch));

  float* predictions = (float*)malloc(lines * sizeof(float));
  size_t size;
  int reps = 100;

  clock_t start = clock();
  for (int i = 0; i < reps; i++) {
    CHECK_CALL(TreelitePredictorPredictBatch(predictor, batch, 0, 0, 0,
               predictions, &size));
  }
  clock_t end = clock();
  float diff = ((float)(end - start) / 1000000.0F) * 1000;   
  printf("Duration %f\n", diff/reps);

  return 0;
}