代码拉取完成,页面将自动刷新
GoJieba is a Jieba Chinese Word Segmentation lib written by Go。
go get gitee.com/zxcvzxcv/gojieba
Chinese Word Segmentation Example:
package main
import (
"fmt"
"strings"
"gitee.com/zxcvzxcv/gojieba"
)
func main() {
var s string
var words []string
use_hmm := true
x := gojieba.NewJieba()
defer x.Free()
s = "我来到北京清华大学"
words = x.CutAll(s)
fmt.Println(s)
fmt.Println("全模式:", strings.Join(words, "/"))
words = x.Cut(s, use_hmm)
fmt.Println(s)
fmt.Println("精确模式:", strings.Join(words, "/"))
s = "比特币"
words = x.Cut(s, use_hmm)
fmt.Println(s)
fmt.Println("精确模式:", strings.Join(words, "/"))
x.AddWord("比特币")
s = "比特币"
words = x.Cut(s, use_hmm)
fmt.Println(s)
fmt.Println("添加词典后,精确模式:", strings.Join(words, "/"))
s = "他来到了网易杭研大厦"
words = x.Cut(s, use_hmm)
fmt.Println(s)
fmt.Println("新词识别:", strings.Join(words, "/"))
s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造"
words = x.CutForSearch(s, use_hmm)
fmt.Println(s)
fmt.Println("搜索引擎模式:", strings.Join(words, "/"))
s = "长春市长春药店"
words = x.Tag(s)
fmt.Println(s)
fmt.Println("词性标注:", strings.Join(words, ","))
s = "区块链"
words = x.Tag(s)
fmt.Println(s)
fmt.Println("词性标注:", strings.Join(words, ","))
s = "长江大桥"
words = x.CutForSearch(s, !use_hmm)
fmt.Println(s)
fmt.Println("搜索引擎模式:", strings.Join(words, "/"))
wordinfos := x.Tokenize(s, gojieba.SearchMode, !use_hmm)
fmt.Println(s)
fmt.Println("Tokenize:(搜索引擎模式)", wordinfos)
wordinfos = x.Tokenize(s, gojieba.DefaultMode, !use_hmm)
fmt.Println(s)
fmt.Println("Tokenize:(默认模式)", wordinfos)
keywords := x.ExtractWithWeight(s, 5)
fmt.Println("Extract:", keywords)
}
See example in jieba_test, extractor_test
GoJieba has a good enough performance, it maybe is the best of all the Chinese Word Segmentation lib from the angle of high performance.
Please see more details in jieba-performance-comparison, but the article is written by Chinese, Maybe someday it will be transferred to English.
i@yanyiwu.com
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。