let text = "8年前、東京電力福島第一原発で事故がありました。事故のあと、福島県では、放射線を出す物質で汚れた土や草、木などを取る作業をしています。"
print(text)
let tokens = Tokenizer.tokenize(text: text)
for t in tokens {
let locale = CFLocaleCreate(kCFAllocatorDefault,
CFLocaleIdentifier("japanese" as CFString))
let tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault,
t as CFString,
CFRangeMake(0, t.count),
kCFStringTokenizerUnitWord,
locale)!
var result = CFStringTokenizerAdvanceToNextToken(tokenizer)
while result != .none {
let r = CFStringTokenizerCopyCurrentTokenAttribute(tokenizer,
kCFStringTokenizerAttributeLatinTranscription)
if let rr = r {
let rrr = (r as! String).applyingTransform(.latinToHiragana, reverse: false)
print("\(t) => \(rr) => \(rrr!)")
} else {
break
}
result = CFStringTokenizerAdvanceToNextToken(tokenizer)
}
}