Unfortunately, the framework is not well trained for Japanese.
func tag(text: String, language: NLLanguage = .english) {
print()
print(text)
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = text
let wholeText = text.startIndex..<text.endIndex
tagger.setLanguage(language, range: wholeText)
let options: NLTagger.Options = [.omitWhitespace, .omitPunctuation]
tagger.enumerateTags(in: wholeText, unit: .word, scheme: .lexicalClass, options: options) {tag, range in
print("\(text[range])->\(tag!.rawValue)")
return true
}
}
tag(text: "目の前を犬が歩いた", language: .japanese)
tag(text: "これをやっといてちょうだい", language: .japanese)
Output:
目の前を犬が歩いた
目->OtherWord
の->OtherWord
前->OtherWord
を->OtherWord
犬->OtherWord
が->OtherWord
歩い->OtherWord
た->OtherWord
これをやっといてちょうだい
これ->OtherWord
を->OtherWord
やっ->OtherWord
とい->OtherWord
て->OtherWord
ちょうだい->OtherWord
All words tagged as OtherWord.
The rawValue of `NSTag` seems to be just a symbol and you may need to localize it by yourself. (It's a thin wrapper of NSString in Swift.)
I hope the newer version coming in the near future would support more languages and syntactic parsing.
Pasting the suggested code below into a clean playground with Xcode Version 14.3 (14E222b) I get:
The dog walked The->OtherWord dog->OtherWord walked->OtherWord
Do this please Do->OtherWord this->OtherWord please->OtherWord
Is the natural language parse function no longer working?