tkida / MR-Repair

6 stars 0 forks source link

replaceMaximalRepeat改良 #4

Open da0ka opened 4 years ago

da0ka commented 4 years ago

置換不要な規則を除外できます。例えば aaa のような文字列。

`static uint replaceMaximalRepeat(RDS rds, MAXREP maximal_repeat, CODE new_code){ SEQ seq = rds->seq; PAIR *origin_pair = maximal_repeat.origin_pair; uint mr_first_origin_pos = origin_pair->f_pos; uint mr_last_origin_pos = origin_pair->b_pos; uint mr_current_origin_pos = mr_first_origin_pos; uint mr_current_leftmost_pos; uint mr_current_rightmost_pos; uint mr_next_origin_pos; uint left_pos_diff = mr_first_origin_pos - maximal_repeat.first_pos; uint right_pos_diff = maximal_repeat.last_pos - mr_first_origin_pos; uint l_pos, r_pos; uint i_pos, j_pos; uint num_replaced = 0;

LOG("# %u=%u,%u\n", new_code, origin_pair->left, origin_pair->right);

while (mr_current_origin_pos <= mr_last_origin_pos) {
    mr_current_leftmost_pos = mr_current_origin_pos - left_pos_diff;
    mr_current_rightmost_pos = mr_current_origin_pos + right_pos_diff;
    mr_next_origin_pos = seq[mr_current_origin_pos].next; // save next because current is overwritten
    if (mr_next_origin_pos - left_pos_diff <= mr_current_rightmost_pos) { // if next overlap, then skip
        mr_next_origin_pos = seq[mr_next_origin_pos].next;
    }

    l_pos = leftPos_SQ(rds, mr_current_leftmost_pos);
    r_pos = rightPos_SQ(rds, mr_current_rightmost_pos);
    // check a[bwc] and decrement ab
    if (l_pos != DUMMY_POS) {
        decrementPairAtThePosition(rds, l_pos);
        removeLink_SQ(rds, l_pos);
    }
    // decrement all the pairs within [bwc]
    if(mr_next_origin_pos<=mr_last_origin_pos||num_replaced)
        for (i_pos = mr_current_leftmost_pos; i_pos < mr_current_rightmost_pos; ) {
            j_pos = rightPos_SQ(rds, i_pos);
            decrementPairAtThePosition(rds, i_pos);
            removeLink_SQ(rds, i_pos);
            seq[i_pos].code = DUMMY_CODE;
            i_pos = j_pos;
        }
    // check [bwc]d and decrement cd
    if (r_pos != DUMMY_POS) {
        decrementPairAtThePosition(rds, mr_current_rightmost_pos);
        removeLink_SQ(rds, mr_current_rightmost_pos);
    }
    if(mr_next_origin_pos>mr_last_origin_pos&&!num_replaced)break; // hit only 1 time
    seq[mr_current_rightmost_pos].code = DUMMY_CODE;

    // update block [bwc] to X then increment aX and Xd
    updateBlockToNewcode(rds, mr_current_leftmost_pos, mr_current_rightmost_pos, r_pos, new_code);
    incrementNewPairs(rds, l_pos, mr_current_leftmost_pos, r_pos, new_code);

    num_replaced += maximal_repeat.length - 1;
    mr_current_origin_pos = mr_next_origin_pos;
}
destructAllUniquePairs(rds);

ifndef NDEBUG

for (int j = 0; j < rds->text_length; j++) {
    LOG("s[%d]=[%d (%d) %d]\n", j, seq[j].prev, seq[j].code, seq[j].next);
}
puts("");

endif

return num_replaced;

} DICT CreateDictByRepair(FILE input){ RDS rds; DICT dict; PAIR *max_pair; MAXREP maximal_repeat; CODE new_code; uint cseqlen,c;

rds  = createRDS(input);
dict = createDictStructure(rds->text_length);
cseqlen = rds->text_length;

printf("Generating CFG..."); fflush(stdout);
while ((max_pair = getMaxPair(rds)) != NULL) {
    maximal_repeat = findMaximalRepeat(rds, max_pair);
    new_code = addMaximalRepeatAsNewRule(rds, dict, maximal_repeat);
    cseqlen -= c=replaceMaximalRepeat(rds, maximal_repeat, new_code);
    if(!c)--dict->num_rules, dict->size_rules -= maximal_repeat.length;
}
getCompSeq(rds, dict);
destructRDS(rds);
puts("Finished!");
return dict;

}`