github網(wǎng)址: https://github.com/mozillazg/go-pinyin
demo:
package pinyin
import (
"fmt"
"github.com/mozillazg/go-pinyin"
)
func Pinyin() {
hans := "中國人"
for _, r := range hans {
fmt.Println("r==", r)
fmt.Println("int(r)==", int(r))
fmt.Println("string(r)==", string(r))
}
// 默認(rèn)
a := pinyin.NewArgs()
fmt.Println(pinyin.Pinyin(hans, a))
// [[zhong] [guo] [ren]]
// 包含聲調(diào)
a.Style = pinyin.Tone
fmt.Println(pinyin.Pinyin(hans, a))
// [[zhōng] [guó] [rén]]
// 聲調(diào)用數(shù)字表示
a.Style = pinyin.Tone2
fmt.Println(pinyin.Pinyin(hans, a))
// [[zho1ng] [guo2] [re2n]]
// 開啟多音字模式
a = pinyin.NewArgs()
a.Heteronym = true
fmt.Println(pinyin.Pinyin(hans, a))
// [[zhong zhong] [guo] [ren]]
a.Style = pinyin.Tone2
fmt.Println(pinyin.Pinyin(hans, a))
// [[zho1ng zho4ng] [guo2] [re2n]]
fmt.Println(pinyin.LazyPinyin(hans, pinyin.NewArgs()))
// [zhong guo ren]
fmt.Println(pinyin.Convert(hans, nil))
// [[zhong] [guo] [ren]]
fmt.Println(pinyin.LazyConvert(hans, nil))
// [zhong guo ren]
}
源碼簡單分析:
Pinyin 函數(shù):
// Pinyin 漢字轉(zhuǎn)拼音,支持多音字模式.
func Pinyin(s string, a Args) [][]string {
pys := [][]string{}
for _, r := range s {
// r 是一個(gè)rune類型,(int32)
py := SinglePinyin(r, a)
if len(py) > 0 {
pys = append(pys, py)
}
}
return pys
}
SinglePinyin 函數(shù):
// SinglePinyin 把單個(gè) `rune` 類型的漢字轉(zhuǎn)換為拼音.
func SinglePinyin(r rune, a Args) []string {
if a.Fallback == nil {
a.Fallback = Fallback
}
value, ok := PinyinDict[int(r)]
pys := []string{}
if ok {
pys = strings.Split(value, ",")
} else {
pys = a.Fallback(r, a)
}
if len(pys) > 0 {
if !a.Heteronym {
pys = []string{pys[0]}
}
return applyStyle(pys, a)
}
return pys
}
Fallback 沒有拼音的字符自己返回空,即忽略
// Fallback 默認(rèn)配置: 如何處理沒有拼音的字符(忽略這個(gè)字符)
var Fallback = func(r rune, a Args) []string {
return []string{}
}
PinyinDict 是一個(gè)map[int]string 結(jié)構(gòu)的字典,里面保存的是: "十六進(jìn)制:pinyin". 這里不清楚為啥不直接用十進(jìn)制呢。
// PinyinDict is data map
// Warning: Auto-generated file, don't edit.
var PinyinDict = map[int]string{
0x3007: "líng,yuán,xīng",
0x3400: "qiū",
0x3401: "tiàn",
0x3404: "kuà",
0x3405: "wǔ",
0x3406: "yǐn",
0x340C: "yí",
......
}
applyStyle :這個(gè)函數(shù)就是采用什么風(fēng)格, 如聲母,聲標(biāo)。。等一些風(fēng)格,若想了解,自行參考代碼,就200多行。