KEINOS / go-joyokanjis

Simple Go library of Jouyou Kanji (常用漢字) character linter for Japanese writings.
MIT License
3 stars 0 forks source link
go golang kanji kanjidic linter

go1.19+ Go Reference

go-joyokanjis

go-joyokanjis is a simple Go library for Japanese writings that lint or determines whether a given kanji character is a jōyō-kanji (常用漢字, regular-use Chinese characters in Japan) or not.

Useful for uniforming the kanji characters in the writings. Currently, it supports 2,136 jōyō-kanji characters.

Note: This library is based on Unicode (UTF-8) and does not support other Japanese character encodings such as JIS/SJIS/EUC/etc.

Usage

go get "github.com/KEINOS/go-joyokanjis"
import "github.com/KEINOS/go-joyokanjis/kanjis"

Examples

Detection

// Detect if a given kanji is Joyo Kanji or not.
func ExampleIsJoyokanji() {
    newKanji := '漢'
    if kanjis.IsJoyokanji(newKanji) {
        fmt.Printf("%s (0x%x) is Joyo Kanji\n", string(newKanji), newKanji)
    }

    oldKanji := '漢'
    if !kanjis.IsJoyokanji(oldKanji) {
        fmt.Printf("%s (0x%x) is not a Joyo Kanji\n", string(oldKanji), oldKanji)
    }

    // Output:
    // 漢 (0x6f22) is Joyo Kanji
    // 漢 (0xfa47) is not a Joyo Kanji
}

Fixing

// Fix a string to replace all old kanji characters with Joyo Kanji (only if the
// old kanji is assigned to Joyo Kanji).
//
// Suitable if the input is less than 320 Bytes.
func ExampleFixStringAsJoyo() {
    input := "これは舊漢字です。And this is not a kanji."
    output := kanjis.FixStringAsJoyo(input)

    fmt.Println(output)
    // Output: これは旧漢字です。And this is not a kanji.
}
// Fix a file to replace all old kanji characters with Joyo Kanji (only if the
// old kanji is assigned to Joyo Kanji).
//
// This function is suitable if the input is larger than 320 Bytes.
func ExampleFixFileAsJoyo() {
    // File content
    input := strings.NewReader(heredoc.Doc(`
        いざ、これより樂しまむ、
        仕置を受くる憂なく、
        遊びたのしむ時ぞ來ぬ、
        時ぞ來ぬれば、いちはやく、
        讀本などは投げ捨てて行く。
        ――學校休暇の歌`))

    // Output buffer
    var output bytes.Buffer

    // Parse and fix to Joyo Kanji
    if err := kanjis.FixFileAsJoyo(input, &output); err != nil {
        log.Fatal(err)
    }

    fmt.Println(output.String())
    // Output:
    // いざ、これより楽しまむ、
    // 仕置を受くる憂なく、
    // 遊びたのしむ時ぞ来ぬ、
    // 時ぞ来ぬれば、いちはやく、
    // 読本などは投げ捨てて行く。
    // ――学校休暇の歌
}
func ExampleIgnore() {
    const input = "私は渡邉です。"

    {
        // Add '邉' and '邊' to be ignored when fixing.
        kanjis.Ignore('邉', '邊')

        fmt.Println("Fix with Ignore:", kanjis.FixStringAsJoyo(input))
    }
    {
        // Clear the ignore list.
        kanjis.ResetIgnore()

        fmt.Println("Fix with no-ignore:", kanjis.FixStringAsJoyo(input))
    }
    // Output:
    // Fix with Ignore: 私は渡邉です。
    // Fix with no-ignore: 私は渡辺です。
}

Benchmark

goos: darwin
goarch: amd64
pkg: github.com/KEINOS/go-joyokanjis/kanjis
cpu: Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
                              │ bench_results.txt │
                              │      sec/op       │
_small_size/FixStringAsJoyo-4        384.7n ±  3%
_small_size/FixFileAsJoyo-4          5.935µ ± 89%
_big_size/FixStringAsJoyo-4          308.5µ ±  2%
_big_size/FixFileAsJoyo-4            4.705µ ± 15%
geomean                              7.587µ

                              │ bench_results.txt │
                              │       B/op        │
_small_size/FixStringAsJoyo-4          32.00 ± 0%
_small_size/FixFileAsJoyo-4          8.156Ki ± 0%
_big_size/FixStringAsJoyo-4          46.62Ki ± 0%
_big_size/FixFileAsJoyo-4            8.156Ki ± 0%
geomean                              3.138Ki

                              │ bench_results.txt │
                              │     allocs/op     │
_small_size/FixStringAsJoyo-4          1.000 ± 0%
_small_size/FixFileAsJoyo-4            4.000 ± 0%
_big_size/FixStringAsJoyo-4            2.000 ± 0%
_big_size/FixFileAsJoyo-4              4.000 ± 0%
geomean                                2.378