1 Star 0 Fork 0

h79/goutils

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
api
auth
build
common
algorithm
app
archive
attributes
banner
bus
coder
compress
config
data
debug
file
git
http
images
json
list
logger
mapper
option
perf
queue
random
result
scheduler
secret
server
ssh
stringutil
svc
system
tag
template
timer
trie
trie.go
trie_test.go
version
watcher
xml
yaml
bit_flag.go
common.go
integer.go
dao
discovery
loader
mq
plugins
request
rpc
sensitive
thrift
.gitignore
LICENSE
Makefile
README.md
doc.go
error.md
go.mod
go.sum
goutils.go
goutils_test.go
version.go
克隆/下载
trie.go 6.49 KB
一键复制 编辑 原始数据 按行查看 历史
huqiuyun 提交于 2年前 . tire use rune
package trie
import (
"strconv"
"sync"
)
const (
Empty = ""
)
type Group struct {
No string
Text string `xml:"text" json:"text"`
Index int `xml:"index" json:"index"`
}
type Trie struct {
Root *Node
}
type Node struct {
end bool
character rune
weight int
rm sync.Mutex
children map[rune]*Node
}
func newNode(character rune) *Node {
return &Node{
weight: 0,
character: character,
children: make(map[rune]*Node, 0),
}
}
func newRootNode() *Node {
return newNode(0)
}
func (node *Node) Get(r rune) (*Node, bool) {
node.rm.Lock()
defer node.rm.Unlock()
node, ok := node.children[r]
return node, ok
}
func (node *Node) Add(r rune, n *Node) *Node {
node.rm.Lock()
defer node.rm.Unlock()
node.children[r] = n
n.weight = len(node.children)
return n
}
// IsLeaf 是否叶子节点
func (node *Node) IsLeaf() bool {
return len(node.children) == 0
}
// IsRoot 是否为根节点
func (node *Node) IsRoot() bool {
return node.character == 0
}
// IsPathEnd 某个路径的结束
func (node *Node) IsPathEnd() bool {
return node.end
}
// SoftDel 置软删除状态
func (node *Node) SoftDel() {
node.end = false
}
func NewTrie() *Trie {
return &Trie{
Root: newRootNode(),
}
}
// Add 添加, return no 唯一的编号
func (tree *Trie) Add(word string) (no string) {
return tree.AddRune([]rune(word))
}
func (tree *Trie) AddRune(word []rune) (no string) {
var (
parent = tree.Root
position = 0
found = false
r rune
current *Node
)
for position = 0; position < len(word); position++ {
if position > 0 {
no += "-"
}
r = word[position]
if current, found = parent.Get(r); found {
parent = current
} else {
parent = parent.Add(r, newNode(r))
}
no += strconv.Itoa(parent.weight)
if position == len(word)-1 {
parent.end = true
}
}
return
}
func (tree *Trie) Del(word string) {
tree.DelRune([]rune(word))
}
func (tree *Trie) DelRune(word []rune) {
var (
current = tree.Root
position = 0
found = false
)
for position = 0; position < len(word); position++ {
if current, found = current.Get(word[position]); !found {
return
}
if position == len(word)-1 {
current.SoftDel()
}
}
}
// Replace 词语替换
func (tree *Trie) Replace(text string, character rune) string {
return tree.ReplaceRune([]rune(text), character)
}
func (tree *Trie) ReplaceRune(text []rune, character rune) string {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
next = func() {
parent = tree.Root
position = left
left++
}
current *Node
)
for position = 0; position < len(text); position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if current.IsPathEnd() && left <= position {
for i := left; i <= position; i++ {
text[i] = character
}
}
parent = current
}
return string(text)
}
// Filter 直接过滤掉字符串中的敏感词
func (tree *Trie) Filter(text string) string {
return tree.FilterRune([]rune(text))
}
func (tree *Trie) FilterRune(text []rune) string {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
next = func() {
parent = tree.Root
position = left
left++
}
current *Node
resultRunes []rune
)
for position = 0; position < length; position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
resultRunes = append(resultRunes, text[left])
next()
continue
}
if current.IsPathEnd() {
left = position + 1
parent = tree.Root
} else {
parent = current
}
}
resultRunes = append(resultRunes, text[left:]...)
return string(resultRunes)
}
func (tree *Trie) Validate(text string) (bool, string) {
var validated, g = tree.ValidateRune([]rune(text))
return validated, g.Text
}
// ValidateReturnNo 验证字符串是否合法,如不合法则返回false和检测到
// 的第一个敏感词
func (tree *Trie) ValidateReturnNo(text string) (bool, string, string) {
var validated, g = tree.ValidateRune([]rune(text))
return validated, g.Text, g.No
}
func (tree *Trie) ValidateRune(text []rune) (bool, Group) {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
no = Empty
next = func() {
no = Empty
parent = tree.Root
position = left
left++
}
current *Node
)
for position = 0; position < len(text); position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if len(no) > 0 {
no += "-"
}
no += strconv.Itoa(current.weight)
if current.IsPathEnd() && left <= position {
return false, Group{No: no, Text: string(text[left : position+1]), Index: left}
}
parent = current
}
return true, Group{No: "", Text: "", Index: -1}
}
// FindInReturnNo 判断text中是否含有词库中的词
func (tree *Trie) FindInReturnNo(text string) (bool, string, string) {
var validated, g = tree.ValidateRune([]rune(text))
return !validated, g.Text, g.No
}
func (tree *Trie) FindInGroup(text string) (bool, Group) {
var validated, g = tree.ValidateRune([]rune(text))
return !validated, g
}
func (tree *Trie) FindRune(text []rune) (bool, Group) {
var validated, g = tree.ValidateRune(text)
return !validated, g
}
func (tree *Trie) FindIn(text string) (bool, string) {
validated, g := tree.ValidateRune([]rune(text))
return !validated, g.Text
}
func (tree *Trie) FindAll(text string) []*Group {
return tree.FindRunes([]rune(text))
}
// FindRunes 找有所有包含在词库中的词
func (tree *Trie) FindRunes(text []rune) []*Group {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
no = Empty
next = func() {
no = Empty
parent = tree.Root
position = left
left++
}
matches []*Group
current *Node
)
for position = 0; position < length; position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if len(no) > 0 {
no += "-"
}
no += strconv.Itoa(current.weight)
if current.IsPathEnd() && left <= position {
matches = append(matches, &Group{No: no, Text: string(text[left : position+1]), Index: left})
}
parent = current
}
return matches
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/h79/goutils.git
git@gitee.com:h79/goutils.git
h79
goutils
goutils
v1.9.6

搜索帮助