Open da0ka opened 4 years ago
置換不要な規則を除外できます。例えば aaa のような文字列。
`static uint replaceMaximalRepeat(RDS rds, MAXREP maximal_repeat, CODE new_code){ SEQ seq = rds->seq; PAIR *origin_pair = maximal_repeat.origin_pair; uint mr_first_origin_pos = origin_pair->f_pos; uint mr_last_origin_pos = origin_pair->b_pos; uint mr_current_origin_pos = mr_first_origin_pos; uint mr_current_leftmost_pos; uint mr_current_rightmost_pos; uint mr_next_origin_pos; uint left_pos_diff = mr_first_origin_pos - maximal_repeat.first_pos; uint right_pos_diff = maximal_repeat.last_pos - mr_first_origin_pos; uint l_pos, r_pos; uint i_pos, j_pos; uint num_replaced = 0;
LOG("# %u=%u,%u\n", new_code, origin_pair->left, origin_pair->right); while (mr_current_origin_pos <= mr_last_origin_pos) { mr_current_leftmost_pos = mr_current_origin_pos - left_pos_diff; mr_current_rightmost_pos = mr_current_origin_pos + right_pos_diff; mr_next_origin_pos = seq[mr_current_origin_pos].next; // save next because current is overwritten if (mr_next_origin_pos - left_pos_diff <= mr_current_rightmost_pos) { // if next overlap, then skip mr_next_origin_pos = seq[mr_next_origin_pos].next; } l_pos = leftPos_SQ(rds, mr_current_leftmost_pos); r_pos = rightPos_SQ(rds, mr_current_rightmost_pos); // check a[bwc] and decrement ab if (l_pos != DUMMY_POS) { decrementPairAtThePosition(rds, l_pos); removeLink_SQ(rds, l_pos); } // decrement all the pairs within [bwc] if(mr_next_origin_pos<=mr_last_origin_pos||num_replaced) for (i_pos = mr_current_leftmost_pos; i_pos < mr_current_rightmost_pos; ) { j_pos = rightPos_SQ(rds, i_pos); decrementPairAtThePosition(rds, i_pos); removeLink_SQ(rds, i_pos); seq[i_pos].code = DUMMY_CODE; i_pos = j_pos; } // check [bwc]d and decrement cd if (r_pos != DUMMY_POS) { decrementPairAtThePosition(rds, mr_current_rightmost_pos); removeLink_SQ(rds, mr_current_rightmost_pos); } if(mr_next_origin_pos>mr_last_origin_pos&&!num_replaced)break; // hit only 1 time seq[mr_current_rightmost_pos].code = DUMMY_CODE; // update block [bwc] to X then increment aX and Xd updateBlockToNewcode(rds, mr_current_leftmost_pos, mr_current_rightmost_pos, r_pos, new_code); incrementNewPairs(rds, l_pos, mr_current_leftmost_pos, r_pos, new_code); num_replaced += maximal_repeat.length - 1; mr_current_origin_pos = mr_next_origin_pos; } destructAllUniquePairs(rds);
for (int j = 0; j < rds->text_length; j++) { LOG("s[%d]=[%d (%d) %d]\n", j, seq[j].prev, seq[j].code, seq[j].next); } puts("");
return num_replaced;
} DICT CreateDictByRepair(FILE input){ RDS rds; DICT dict; PAIR *max_pair; MAXREP maximal_repeat; CODE new_code; uint cseqlen,c;
rds = createRDS(input); dict = createDictStructure(rds->text_length); cseqlen = rds->text_length; printf("Generating CFG..."); fflush(stdout); while ((max_pair = getMaxPair(rds)) != NULL) { maximal_repeat = findMaximalRepeat(rds, max_pair); new_code = addMaximalRepeatAsNewRule(rds, dict, maximal_repeat); cseqlen -= c=replaceMaximalRepeat(rds, maximal_repeat, new_code); if(!c)--dict->num_rules, dict->size_rules -= maximal_repeat.length; } getCompSeq(rds, dict); destructRDS(rds); puts("Finished!"); return dict;
}`
置換不要な規則を除外できます。例えば aaa のような文字列。
`static uint replaceMaximalRepeat(RDS rds, MAXREP maximal_repeat, CODE new_code){ SEQ seq = rds->seq; PAIR *origin_pair = maximal_repeat.origin_pair; uint mr_first_origin_pos = origin_pair->f_pos; uint mr_last_origin_pos = origin_pair->b_pos; uint mr_current_origin_pos = mr_first_origin_pos; uint mr_current_leftmost_pos; uint mr_current_rightmost_pos; uint mr_next_origin_pos; uint left_pos_diff = mr_first_origin_pos - maximal_repeat.first_pos; uint right_pos_diff = maximal_repeat.last_pos - mr_first_origin_pos; uint l_pos, r_pos; uint i_pos, j_pos; uint num_replaced = 0;
ifndef NDEBUG
endif
} DICT CreateDictByRepair(FILE input){ RDS rds; DICT dict; PAIR *max_pair; MAXREP maximal_repeat; CODE new_code; uint cseqlen,c;
}`