public String filter(String text) {
StringBuilder result = new StringBuilder(text);
int index = 0;
while (index < result.length()) {
char c = result.charAt(index);
if (skip(c)) {
index++;
continue;
}
Word word = root;
int start = index;
boolean found = false;
for (int i = index; i < result.length(); i++) {
c = result.charAt(i);
if (skip(c)) {
continue;
}
if (c >= 'A' && c <= 'Z') {
c += 32;
}
word = word.next.get(c);
if (word == null) {
break;
}
if (word.end) {
found = true;
for (int j = start; j <= i; j++) {
result.setCharAt(j, replace);
}
// found 为ture,index 赋值为敏感词最后一个下标,但在外层if判断未+1,导致下一次循环必定skip为true走一次continue
// 解决方法,去掉fuound变量 以及 if判断。
index = i;
}
}
if (!found) {
index++;
}
}
return result.toString();
}
ACProTrie类,修复bug&优化代码&代码格式化
// 原代码 match方法节选
public String match(String matchWord) {
Word walkNode = root;
char[] wordArray = matchWord.toCharArray();
for (int i = 0; i < wordArray.length; i++) {
// 失败"回溯"
while (!walkNode.hasChild(wordArray[i]) && walkNode.failOver != null) {
walkNode = walkNode.failOver;
}
if (walkNode.hasChild(wordArray[i])) {
walkNode = walkNode.next.get(wordArray[i]);
if (walkNode.end) {
Word sentinelA = walkNode;
Word sentinelB = walkNode;
int k = i + 1;
boolean flag = false;
while (k < wordArray.length && sentinelA.hasChild(wordArray[k])) {
sentinelA = sentinelA.next.get(wordArray[k]);
k++;
if (sentinelA.end) {
sentinelB = sentinelA;
flag = true;
}
}
int len = flag ? sentinelB.depth : walkNode.depth;
while (len > 0) {
len--;
int index = flag ? i - walkNode.depth + 1 + len : i - len;
wordArray[index] = MASK;
}
// 问题来源。
// 此刻的i是第一个敏感词的最后一个字符下标,所以在加上总长度后需要再减去第一个敏感词的长度
i += flag ? sentinelB.depth : 0;
walkNode = flag ? sentinelB.failOver : walkNode.failOver;
}
}
}
return new String(wordArray);
}
DFAFilter类filter方法,优化代码,减少一次循环。
ACProTrie类,修复bug&优化代码&代码格式化
SensitiveTest类,添加一类测试数据