The split_2_short_text core code is as follows:
def split_2_short_text(text, include_symbol=False):
"""
Long sentences are divided into short sentences
:param text: str
:param include_symbol: bool
:return: (sentence, idx)
"""
result = []
blocks = re_han.split(text)
start_idx = 0
for blk in blocks:
if not blk:
continue
if include_symbol:
result.append((blk, start_idx))
else:
if re_han.match(blk):
result.append((blk, start_idx))
start_idx += len(blk)
return result
The split_2_short_text core code is as follows: def split_2_short_text(text, include_symbol=False): """ Long sentences are divided into short sentences :param text: str :param include_symbol: bool :return: (sentence, idx) """ result = [] blocks = re_han.split(text) start_idx = 0 for blk in blocks: if not blk: continue if include_symbol: result.append((blk, start_idx)) else: if re_han.match(blk): result.append((blk, start_idx)) start_idx += len(blk) return result