nlpinaction / learning-nlp

nlp in action
1.03k stars 823 forks source link

更简洁的IMM #23

Open Freakwill opened 5 years ago

Freakwill commented 5 years ago
# 更加简洁(more pythonic)的IMM算法(可用OOP)

_dictionary = {'南京', '南京市', '南京市长', '市长', '长江', '长江大桥', '江大桥', '大桥', '桥'}

def imm(text, maxlen=4):
    # inverse maximum matching method
    dictionary = _dictionary
    result = []
    index = len(text)
    while index > 0:
        m = min((index, maxlen))
        # dictionary = {e for e in _dictionary if e.endswith(text[index-1])}  # for speeding up
        for size in range(m, 0, -1):
            piece = text[(index-size):index]
            if piece in dictionary:
                result.insert(0, piece)
                index -= size
                break
        else:
            index -= 1
    return result

result = imm('南京市长江大桥')
print(result)