radareorg / sdb

Simple and fast string based key-value database with support for arrays and json
https://www.radare.org/
MIT License
218 stars 62 forks source link

Implement Journal bulk mode #126

Open radare opened 7 years ago

radare commented 7 years ago

The idea behind this is to optimize the load times when the program that uses sdb creates a LOT of keys in a short period of time, this results in many hashtable lookups and makes loading times much worst.

The solution behind this is to make sdb_set() store the key=value in a linear buffer in memory.

Here's the PoC patch for r2 dbginfo, which makes loading a kernel takes ~10s instead of > 2 minutes:

diff --git a/libr/bin/dwarf.c b/libr/bin/dwarf.c
index 9455c41..3ff0659 100644
--- a/libr/bin/dwarf.c
+++ b/libr/bin/dwarf.c
@@ -16,6 +16,8 @@
 #include <r_bin.h>
 #include <r_bin_dwarf.h>
 #include <r_core.h>
+FILE *JOURNAL = NULL;
+RBuffer *JOURNALBUF = NULL;

 #define STANDARD_OPERAND_COUNT_DWARF2 9
 #define STANDARD_OPERAND_COUNT_DWARF3 12
@@ -398,10 +400,28 @@ static inline void add_sdb_addrline(Sdb *s, ut64 addr, const char *file, ut64 li
 #else
    p = file;
 #endif
+   if (!p) {
+       p = "";
+   }
    fileline = r_str_newf ("%s|%"PFMT64d, p, line);
    offset_ptr = sdb_itoa (addr, offset, 16);
+   if (!JOURNAL) {
+       JOURNAL = fopen ("TheDwarfJournal.txt", "w");
+   }
+   if (!JOURNALBUF) {
+       JOURNALBUF = r_buf_new ();
+   }
+#if 0
    sdb_add (s, offset_ptr, fileline, 0);
    sdb_add (s, fileline, offset_ptr, 0);
+#else
+   char *q = r_str_newf ("%s=%s\n%s=%s\n", offset_ptr, fileline, fileline, offset_ptr);
+   r_buf_append_string (JOURNALBUF, q);
+   free (q);
+   // fprintf (JOURNAL, "%s=%s\n%s=%s\n", offset_ptr, fileline, fileline, offset_ptr);
+#endif
+   free (fileline);
+// free (offset_ptr);
 }

 static const ut8* r_bin_dwarf_parse_ext_opcode(const RBin *a, const ut8 *obuf,
@@ -574,16 +594,16 @@ static const ut8* r_bin_dwarf_parse_std_opcode(
        }
        break;
    case DW_LNS_advance_line:
-       buf = r_leb128(buf, &sbuf);
+       buf = r_leb128 (buf, &sbuf);
        regs->line += sbuf;
        if (f) {
-           fprintf(f, "Advance line by %"PFMT64d", to %"PFMT64d"\n", sbuf, regs->line);
+           fprintf (f, "Advance line by %"PFMT64d", to %"PFMT64d"\n", sbuf, regs->line);
        }
        break;
    case DW_LNS_set_file:
        buf = r_uleb128 (buf, ST32_MAX, &addr);
        if (f) {
-           fprintf(f, "Set file to %"PFMT64d"\n", addr);
+           fprintf (f, "Set file to %"PFMT64d"\n", addr);
        }
        regs->file = addr;
        break;
@@ -1486,11 +1506,12 @@ R_API int r_bin_dwarf_parse_info(RBinDwarfDebugAbbrev *da, RBin *a, int mode) {

 static RBinDwarfRow *r_bin_dwarf_row_new (ut64 addr, const char *file, int line, int col) {
    RBinDwarfRow *row = R_NEW0 (RBinDwarfRow);
-   if (!row) return NULL;
-   row->file = strdup (file);
-   row->address = addr;
-   row->line = line;
-   row->column = 0;
+   if (row) {
+       row->file = strdup (file);
+       row->address = addr;
+       row->line = line;
+       row->column = 0;
+   }
    return row;
 }

@@ -1499,6 +1520,7 @@ static void r_bin_dwarf_row_free(void *p) {
    free (row->file);
    free (row);
 }
+bool singleton = false;

 R_API RList *r_bin_dwarf_parse_line(RBin *a, int mode) {
    ut8 *buf;
@@ -1506,6 +1528,15 @@ R_API RList *r_bin_dwarf_parse_line(RBin *a, int mode) {
    int len, ret;
    RBinSection *section = getsection (a, "debug_line");
    RBinFile *binfile = a ? a->cur: NULL;
+if (singleton) {
+   return NULL;
+}
+singleton = true;
+eprintf ("LOAD DEBUG INFO\n");
+if (JOURNAL) {
+eprintf ("DONT LAOD AGAIN\n");
+   return NULL;
+}
    if (binfile && section) {
        len = section->size;
        if (len < 1) {
@@ -1556,6 +1587,10 @@ R_API RList *r_bin_dwarf_parse_line(RBin *a, int mode) {
        ls_free (ls);
        free (buf);
    }
+   if (JOURNAL) {
+       fclose (JOURNAL);
+       JOURNAL = NULL;
+   }
    return list;
 }

diff --git a/libr/cons/dietline.c b/libr/cons/dietline.c
index f92295c..085472d 100644
--- a/libr/cons/dietline.c
+++ b/libr/cons/dietline.c
@@ -1013,7 +1013,7 @@ R_API const char *r_line_readline_cb(RLineReadCallback cb, void *user) {
        case 2: // ^b // emacs left
 #if USE_UTF8
        {
-           char *s = I.buffer.data+I.buffer.index-1;
+           char *s = I.buffer.data + I.buffer.index - 1;
            utflen = 1;
            while (s > I.buffer.data && (*s & 0xc0) == 0x80) {
                utflen++;
diff --git a/libr/core/cbin.c b/libr/core/cbin.c
index ce71f40..7ebf01e 100644
--- a/libr/core/cbin.c
+++ b/libr/core/cbin.c
@@ -15,6 +15,21 @@
 #define IS_MODE_RAD(mode) (mode & R_CORE_BIN_RADARE)
 #define IS_MODE_NORMAL(mode) (!mode)

+typedef struct {
+   RCore *core;
+   int mode;
+} DwarfThreadState;
+
+static int bin_dwarf(RCore *core, int mode);
+
+static int backgroundDwarf(RThread *th) {
+   DwarfThreadState *dts = th->user;
+   eprintf ("Loading dwarf info in a thread\n");
+   bin_dwarf (dts->core, dts->mode);
+   eprintf ("Dwarf info loaded\n");
+   return 0;
+}
+
 // dup from cmd_info
 #define PAIR_WIDTH 9
 static void pair(const char *a, const char *b, int mode, bool last) {
@@ -2687,7 +2702,15 @@ R_API int r_core_bin_info(RCore *core, int action, int mode, int va, RCoreBinFil
    if ((action & R_CORE_BIN_ACC_RAW_STRINGS)) ret &= bin_raw_strings (core, mode, va);
    if ((action & R_CORE_BIN_ACC_INFO)) ret &= bin_info (core, mode);
    if ((action & R_CORE_BIN_ACC_MAIN)) ret &= bin_main (core, mode, va);
-   if ((action & R_CORE_BIN_ACC_DWARF)) ret &= bin_dwarf (core, mode);
+   if ((action & R_CORE_BIN_ACC_DWARF)) {
+       if (r_config_get_i (core->config, "bin.dbginfo.bg")) {
+           DwarfThreadState dts = { core, mode };
+           RThread* th = r_th_new (backgroundDwarf, &dts, false);
+           r_th_start (th, true);
+       } else {
+           ret &= bin_dwarf (core, mode);
+       }
+   }
    if ((action & R_CORE_BIN_ACC_PDB)) ret &= bin_pdb (core, mode);
    if ((action & R_CORE_BIN_ACC_ENTRIES)) ret &= bin_entry (core, mode, loadaddr, va);
    if ((action & R_CORE_BIN_ACC_SECTIONS)) ret &= bin_sections (core, mode, loadaddr, va, at, name, chksum);
diff --git a/libr/core/cconfig.c b/libr/core/cconfig.c
index 306f69a..e83c8f1 100755
--- a/libr/core/cconfig.c
+++ b/libr/core/cconfig.c
@@ -1876,6 +1876,7 @@ R_API int r_core_config_init(RCore *core) {
    SETI("bin.baddr", -1, "Base address of the binary");
    SETI("bin.laddr", 0, "Base address for loading library ('*.so')");
    SETPREF("bin.dbginfo", "true", "Load debug information on startup if available");
+   SETPREF("bin.dbginfo.bg", "false", "Load debug information in a background thread");
    SETICB("bin.minstr", 0, &cb_binminstr, "Minimum string length for r_bin");
    SETICB("bin.maxstr", 0, &cb_binmaxstr, "Maximum string length for r_bin");
    SETICB("bin.maxstrbuf", 1024*1024*10, & cb_binmaxstrbuf, "Maximum size of range to load strings from");
diff --git a/libr/util/buf.c b/libr/util/buf.c
index 5fc2e4e..4d401a3 100644
--- a/libr/util/buf.c
+++ b/libr/util/buf.c
@@ -397,7 +397,9 @@ R_API bool r_buf_append_ut64(RBuffer *b, ut64 n) {
 }

 R_API bool r_buf_append_buf(RBuffer *b, RBuffer *a) {
-   if (!b) return false;
+   if (!b) {
+       return false;
+   }
    if (b->fd != -1) {
        r_buf_append_bytes (b, a->buf, a->length);
        return true;
@@ -406,6 +408,7 @@ R_API bool r_buf_append_buf(RBuffer *b, RBuffer *a) {
        b->length = 0;
        b->empty = 0;
    }
+   /* TODO: optimize this, realloc is not optiomal */
    if ((b->buf = realloc (b->buf, b->length + a->length))) {
        memmove (b->buf+b->length, a->buf, a->length);
        b->length += a->length;
jpenalbae commented 7 years ago

seems to do the job for me.