minzu0306 / tasks

0 stars 0 forks source link

[Server] btree delete 성능개선 #5

Open minzu0306 opened 8 years ago

minzu0306 commented 8 years ago

do_coll_all_elem_delete(struct default_engine engine, hash_item it) (void)do_btree_elem_delete(engine, info, BKEY_RANGE_TYPE_ASC, &bkrange_space, NULL, 0, NULL, ELEM_DELETE_COLL)

do_btree_elem_delete(struct default engine engine, btree_meta_info info, const int bkrtype, const bkey_range bkrange, const eflag_filter efilter, const uint32_t count, uint32_t *access_count, enum elem_delete_cause cause)

minzu0306 commented 8 years ago

do_btree_elem_delete(~~, const uint32_t count,~)

if (count > 0 && (tot_found+cur_found) >= count) break; -> 삭제할 btree의 elem의 개수가 count보다 커지면 break -> 0일 경우에는 삭제할 btree의 elem개수 상관없이 전부 삭제 진행

minzu0306 commented 8 years ago

do_btree_elem_delete()함수 세부 구조

minzu0306 commented 8 years ago
#define DELETE_NO_MERGE
#ifdef DELETE_NO_MERGE
 enum elem_delete_cause {
     ELEM_DELETE_NORMAL = 1, /* delete by normal request */
     ELEM_DELETE_COLL,       /* delete by collection deletion */
     ELEM_DELETE_TRIM,       /* delete by overflow trim */
     ELEM_DELETE_COLL_INIT   /* delete by collection deletion when initializing */
 };
#else
 enum elem_delete_cause {
     ELEM_DELETE_NORMAL = 1, /* delete by normal request */
     ELEM_DELETE_COLL,       /* delete by collection deletion */
     ELEM_DELETE_TRIM        /* delete by overflow trim*/
 };

collection_delete_thread()

#ifdef DELETE_NO_MERGE
                (void)do_btree_elem_delete(engine, info, BKEY_RANGE_TYPE_ASC, &bkrange_space,
                                           NULL, 100, NULL, ELEM_DELETE_COLL_INIT);
#else
                (void)do_btree_elem_delete(engine, info, BKEY_RANGE_TYPE_ASC, &bkrange_space,
                                           NULL, 100, NULL, ELEM_DELETE_COLL);
#endif

do_btree_elem_delete()

#ifdef DELETE_NO_MERGE
                if(cause == ELEM_DELETE_COLL_INIT) {
                    do_btree_node_remove(engine, info, path, forward, node_cnt);
                }else{
                    do_btree_node_merge(engine, info, path, forward, node_cnt);
                }
#else
                do_btree_node_merge(engine, info, path, forward, node_cnt);

#endif

do_btree_node_remove()

#ifdef DELETE_NO_MERGE
static void do_btree_node_remove(struct default_engine *engine,
                                btree_meta_info *info, btree_elem_posi *path,
                                const bool forward, const int leaf_node_count)
{
    btree_indx_node *node;
    int cur_node_count = leaf_node_count;
    int par_node_count;
    uint8_t btree_depth = 0;

    /*
     * leaf_node_count : # of leaf nodes to be removed
     * cur_node_count  : # of current nodes to be removed in the current btree depth.
     * par_node_count  : # of parent nodes that might be removed after the current remove.
     */
    while (cur_node_count > 0)
    {
        par_node_count = 0;
        if (cur_node_count == 1) {
            node = path[btree_depth].node;
            if (node == info->root) {
                if (node->used_count == 0) {
                    do_btree_node_unlink(engine, info, node, NULL);
                }
            } else {
                if (node->used_count == 0) {
                    do_btree_node_unlink(engine, info, node, &path[btree_depth+1]);
                    par_node_count = 1;
                }
            }
        } else { /* cur_node_count > 1 */
            btree_elem_posi  upth[BTREE_MAX_DEPTH]; /* upper node path */
            btree_elem_posi  s_posi;
            int tot_unlink_cnt = 0;
            int cur_unlink_cnt = 0;
            int i, upp_depth = btree_depth+1;

            /* prepare upper node path */
            for (i = upp_depth; i <= info->root->ndepth; i++) {
                upth[i] = path[i];
            }
            s_posi = upth[upp_depth];

            for (i = 1; i <= cur_node_count; i++) {
                node = BTREE_GET_NODE_ITEM(upth[upp_depth].node, upth[upp_depth].indx);
                if (node->used_count == 0) {
                    do_btree_node_detach(engine, node);
                    upth[upp_depth].node->item[upth[upp_depth].indx] = NULL;
                    assert(upth[upp_depth].node->ecnt[upth[upp_depth].indx] == 0);
                    cur_unlink_cnt++;
                }

                if (i == cur_node_count) break;

                if (forward) do_btree_incr_path(upth, upp_depth);
                else         do_btree_decr_path(upth, upp_depth);

                if (s_posi.node != upth[upp_depth].node) {
                    if (cur_unlink_cnt > 0) {
                        do_btree_node_remove_null_items(&s_posi, forward, cur_unlink_cnt);
                        tot_unlink_cnt += cur_unlink_cnt; cur_unlink_cnt = 0;
                    }
                    s_posi = upth[upp_depth];
                    par_node_count += 1;
                }
            }
            if (cur_unlink_cnt > 0) {
                do_btree_node_remove_null_items(&s_posi, forward, cur_unlink_cnt);
                tot_unlink_cnt += cur_unlink_cnt;
                par_node_count += 1;
            }
            if (tot_unlink_cnt > 0 && info->stotal > 0) { /* apply memory space */
                size_t stotal;
                if (btree_depth > 0) stotal = tot_unlink_cnt * slabs_space_size(engine, sizeof(btree_indx_node));
                else                 stotal = tot_unlink_cnt * slabs_space_size(engine, sizeof(btree_leaf_node));
                decrease_collection_space(engine, ITEM_TYPE_BTREE, (coll_meta_info *)info, stotal);
            }
        }
        btree_depth += 1;
        cur_node_count = par_node_count;
    }
    if (btree_position_debug) {
        do_btree_consistency_check(info->root, info->ccnt, true);
    }
}

#endif
minzu0306 commented 8 years ago

B+tree collection 관련 구조체

typedef struct _btree_elem_item {
    uint16_t refcount;
    uint8_t  slabs_clsid;        /* which slab class we're in */
    uint8_t  status;             /* 3(used), 2(insert mark), 1(delete_mark), or 0(free) */
    uint8_t  nbkey;              /* length of bkey */
    uint8_t  neflag;             /* length of element flag */
    uint16_t nbytes;             /**< The total size of the data (in bytes) */
    unsigned char data[1];       /* data: <bkey, [eflag,] value> */
} btree_elem_item;
typedef struct _btree_leaf_node {
    uint16_t refcount;
    uint8_t  slabs_clsid;      /* which slab class we're in */
    uint8_t  ndepth;
    uint16_t used_count;    /* element count */
    uint16_t reserved;
    struct _btree_indx_node *prev;
    struct _btree_indx_node *next;
    void    *item[BTREE_ITEM_COUNT];
} btree_leaf_node;
typedef struct _btree_indx_node {
    uint16_t refcount;
    uint8_t  slabs_clsid;      /* which slab class we're in */
    uint8_t  ndepth;
    uint16_t used_count;   /* child node count */
    uint16_t reserved;
    struct _btree_indx_node *prev;
    struct _btree_indx_node *next;
    void    *item[BTREE_ITEM_COUNT];
    uint32_t ecnt[BTREE_ITEM_COUNT];  /* 각 child node밑에 있는 elem이나 node의 개수  */
} btree_indx_node;
typedef struct _btree_meta_info {
    int32_t  mcnt;      /* maximum count */
    int32_t  ccnt;      /* current count */
    uint8_t  ovflact;   /* overflow action */
    uint8_t  mflags;    /* sticky, readable, trimmed flags */
    uint8_t  itdist;    /* distance from hash item (unit: sizeof(size_t)) */
    uint8_t  bktype;    /* bkey type : BKEY_TYPE_UINT64 or BKEY_TYPE_BINARY */
    uint32_t stotal;    /* total space */
    void    *prefix;    /* pointer to prefix meta info */
    bkey_t   maxbkeyrange;
    btree_indx_node *root;
} btree_meta_info;
minzu0306 commented 8 years ago
btree_indx_node *node;
node = do_btree_find_leaf(root, ...);
elem = do_btree_find_first(node);
c_posi.node = node
c_posi.indx 
while(1){
if (node->used_count >0){
   for(i = 0; i<node->used->count; i++)
        elem = node->item[i];
        stotal += slabs_space_size();
        do_btree_elem_free(engine, elem);

        node->item[i] = NULL;
        node->used_count -= 1;
    }
 }
 assert(node->used_count ==0);
  if(node->next == NULL){
  break;
}
else  node = node->next;
}
}

if(flag == 0){

}
minzu0306 commented 8 years ago
static btree_indx_node *do_btree_find_leaf(btree_indx_node *root,
                                           const unsigned char *bkey, const int nbkey,
                                           btree_elem_posi *path,
                                           btree_elem_item **found_elem)
{
    btree_indx_node *node = root;
    btree_elem_item *elem;
    int mid, left, right, comp;

    *found_elem = NULL; /* the same bkey is not found */

    while (node->ndepth > 0) {
        left  = 1;
        right = node->used_count-1;

        while (left <= right) {
            mid  = (left + right) / 2;
            elem = do_btree_get_first_elem((btree_indx_node *)(node->item[mid])); /* separator */
            comp = BKEY_COMP(bkey, nbkey, elem->data, elem->nbkey);
            if (comp == 0) break;
            if (comp <  0) right = mid-1;
            else           left  = mid+1;
        }

        if (left <= right) { /* found the element */
            *found_elem = elem; /* the same bkey is found */
            if (path) { 
                path[node->ndepth].node = node;
                path[node->ndepth].indx = mid;
            } /* node가 있는 ndepth 위치의 node는 node */
            /* node의 child node 중 왼쪽 node 가져옴 */
            node = do_btree_get_first_leaf((btree_indx_node *)(node->item[mid]), path);
            assert(node->ndepth == 0);
            break;
        }

        if (path) {
            path[node->ndepth].node = node;
            path[node->ndepth].indx = right;
        }
        /* node의 child node 중 왼쪽 node 가져옴 */
        node = (btree_indx_node *)(node->item[right]);
    }
    return node;
}
minzu0306 commented 8 years ago

새로운 구조체 추가

typedef struct _btree_delete_posi{
    btree_elem_posi path[BTREE_MAX_DEPTH]; 
    btree_indx_node cur_node;
    int node_cnt;
    int flag = 0;
}btree_delete_posi;

함수 구조

flag == 0 : 처음 btree delete 
flag == 1 : 중간 elem 삭제중
flag == 2 : node 삭제중  do_btree_node_remove()함수 이용
minzu0306 commented 8 years ago

do_btree_elem_delete()함수

static uint32_t do_btree_elem_delete(struct default_engine *engine, btree_meta_info *info,
                                     const int bkrtype, const bkey_range *bkrange,
                                     const eflag_filter *efilter, const uint32_t count,
                                     uint32_t *access_count, enum elem_delete_cause cause)
{
    btree_elem_posi  path[BTREE_MAX_DEPTH];
    btree_elem_item *elem;
    uint32_t tot_found = 0; /* found count */
    uint32_t tot_access = 0; /* access count */

    if (info->root == NULL) {
        if (access_count)
            *access_count = 0;
        return 0;
    } /*root가 null이면 return 0*/

    assert(info->root->ndepth < BTREE_MAX_DEPTH);

    /*삭제해야할 첫번째 리프의 첫번째 elem을 찾음 path에 ndepth별 맨 왼쪽 node 정보를 담아옴 */
    elem = do_btree_find_first(info->root, bkrtype, bkrange, path, true);

    if (elem != NULL) {
        if (bkrtype == BKEY_RANGE_TYPE_SIN) {
            assert(path[0].bkeq == true);
            tot_access++;
            if (efilter == NULL || do_btree_elem_filter(elem, efilter)) {
                /* cause == ELEM_DELETE_NORMAL */
                do_btree_elem_unlink(engine, info, path, cause);
                tot_found = 1;
            }
        } else {
            btree_elem_posi upth[BTREE_MAX_DEPTH]; /* upper node path */
            btree_elem_posi c_posi = path[0];
            btree_elem_posi s_posi = c_posi;
            size_t stotal = 0;
            int cur_found = 0;
            int node_cnt = 1;
            bool forward = (bkrtype == BKEY_RANGE_TYPE_ASC ? true : false);
            int i;

            /* prepare upper node path
             * used to incr/decr element counts  in upper nodes.
             */
            for (i = 1; i <= info->root->ndepth; i++) {
                upth[i] = path[i];
            } /* upth에 path에 대한 정보 담음 parent node에서 child node 비교하기 위해 */

            c_posi.bkeq = false;
            do {
                tot_access++;
                if (efilter == NULL || do_btree_elem_filter(elem, efilter)) {
                    stotal += slabs_space_size(engine, do_btree_elem_ntotal(elem));

                   /* elem 삭제, 아직 node에서 연결을 끊은건 아니다 */
                    if (elem->refcount > 0) {
                        elem->status = BTREE_ITEM_STATUS_UNLINK;
                    } else {
                        elem->status = BTREE_ITEM_STATUS_FREE;
                        do_btree_elem_free(engine, elem);
                    }
                    c_posi.node->item[c_posi.indx] = NULL;

                    cur_found++;
                    if (count > 0 && (tot_found+cur_found) >= count) break;
                }

                if (c_posi.bkeq == true) {
                    elem = NULL; break;
                }
                /* 다음 elem으로 연결 이때 node에 더이상 elem이 없으면 옆의 node로 이동한다. */
                elem = (forward ? do_btree_find_next(&c_posi, bkrange)
                                : do_btree_find_prev(&c_posi, bkrange));
                if (elem == NULL) break;

                /* node가 옆으로 이동하여 s_posi와 c_posi가 같지 않은 상황 */
                if (s_posi.node != c_posi.node) {
                    if (cur_found > 0) {
                        /*
                         * 삭제된 elem의 수만큼 node에서도 연결을 끊어준다.
                         * */
                        do_btree_node_remove_null_items(&s_posi, forward, cur_found);
                        /* decrement element count in upper nodes */
                        for (i = 1; i <= info->root->ndepth; i++) {
                            assert(upth[i].node->ecnt[upth[i].indx] >= cur_found);
                            upth[i].node->ecnt[upth[i].indx] -= cur_found;
                        }
                        tot_found += cur_found; cur_found = 0;
                    }

                    /* 부모노드도 자식 노드의 변경에 맞게 이동 만약 자식 노드가 옆으로 이동할 때 부모노드가 변경되면 이에 맞게 upth도 조정해줌 */
                    if (info->root->ndepth > 0) {
                        /* adjust upper node path */
                        if (forward) do_btree_incr_path(upth, 1);
                        else         do_btree_decr_path(upth, 1);
                    }
                    s_posi = c_posi;
                    node_cnt += 1;
                }
            } while (elem != NULL);

            if (cur_found > 0) {
                /* 삭제된 elem의 개수만큼 node->used_count 감소 */
                do_btree_node_remove_null_items(&s_posi, forward, cur_found);

                /* decrement element count in upper nodes */
                for (i = 1; i <= info->root->ndepth; i++) {
                    assert(upth[i].node->ecnt[upth[i].indx] >= cur_found);
                    upth[i].node->ecnt[upth[i].indx] -= cur_found;
                }
                tot_found += cur_found;
            }
            if (tot_found > 0) {
                /* 삭제한 개수만큼 space 축소 */
                info->ccnt -= tot_found;
                if (info->stotal > 0) { /* apply memory space */
                    /* The btree has already been unlinked from hash table.
                     * If then, the btree doesn't have prefix info and has stotal of 0.
                     * So, do not need to descrese space total info.
                     */
                    assert(stotal > 0 && stotal <= info->stotal);
                    decrease_collection_space(engine, ITEM_TYPE_BTREE, (coll_meta_info *)info, stotal);
                }
                /* 노드 병합 */
                do_btree_node_merge(engine, info, path, forward, node_cnt);
            }
        }
    }
    if (access_count)
        *access_count = tot_access;
    return tot_found;
}
minzu0306 commented 8 years ago
do_btree_elem_delete_fast(engine, cur_path, count)
{
/* cur_node가 NULL이면 처음 삭제이므로 가장 첫번째 node가져옴 
   NULL이 아니면 그 node부터 삭제 진행 */
cur_path->cur_node == NULL : node = do_btree_get_fist_leaf_fast(info->root)
                                 != NULL  : node = cur_path->cur_node;
stotal = 0;
 while(node != NULL){
      ucnt = node->used_count;
      /* node에 있는 elem을 차례차례 삭제해나감 */
      for(int i=0 i<node->ucnt; i++){
        stotal += slabs_space_size(engine, do_btree_elem_ntotal(elem));
        elem = (btree_elem_item *) node->item[i];
        do_btree_elem_free(engine, elem);
        node->item[i] = NULL;
        node->used_count--;
        tot_found++;
      }
      node = node->next;
      if (tot_found > 100) {
         cur_path->cur_node = node;
         info->ccnt -= tot_found;
         if (info->stotal >0) {
             assert(stotal > 0 && stotal <= info->stotal);
             decrease_collection_space(engine,..); 
         return ;
      }
   }
 do_btree_node_remove(info);
}
do_btree_node_remove(info, path)
{
  ndepth = 0;
   tot_found=0;
  while(node == info->root){
     ccnt =0;
     node = path[ndepth+1].node;
      for (int i=0;i<node->used_count; i++){
         childnode = (btree_indx_node *)node->item[i];
         do_btree_node_detach(engine, childnode);
         node->item[i] = NULL;
         ccnt++;
      }
      node->used_count -= ccnt;
      assert(node->used_count ==0);
      tot_found += ccnt;
      if (node->next !=NULL) node = node->next;
      else ndepth++;
  }
  if(node==info->root){
    do_btree_node_unlink;
  }
  info->ccnt -= tot_found;
  if (info->stotal >0) {
       assert(stotal > 0 && stotal <= info->stotal);
       decrease_collection_space(engine,..); 
  }
}
}
do_btree_node_unlink_fast(p_node, c_node)
{
    c_node->prev = NULL;
    c_node->next = NULL;

    do_btree_node_free(engine, c_node);
}
do_btree_node_remove(info, path)
{ 
    btree_depth = 1;
    int tot_unlink = 0;
    size_t stotal ;
    node = path[btree_depth].node;

    do {
        int i =0;
        int ucnt = node->used_count;
        for (i = 0; i < ucnt; i++) {
               childnode = node->item[i];
               do_btree_node_unlink_fast();
               node->item[i] = NULL;
               node->ecnt[i] = 0;
               node->used_count--;
               tot_unlink++;
        } 
        assert(node->used_count == 0);
        if (node->next != NULL) { 
            node = node->next;
        } else { 
            if (btree_depth == 1) {
                 stotal = tot_unlink * slab_space_size(engine, sizeof(btree_leaf_node));
                 tot_unlink = 0;
            }
            btree_depth ++;  
            node = path[btree_depth].node;
        }
    } while(node == info->root);

   assert(node == info->root);
   info->root = NULL;
   stotal += slabs_space_size(engine, sizeof(btree_indx_node));

   if (info->stotal > 0) {
        stotal += tot_unlink * slabs_space_size(engine, sizeof(btree_indx_node));
        decrease_collection_space(engine, ITEM_TYPE_BTREE, (coll_meta_indo *)info, stotal);
   }

}