1 Star 0 Fork 0

h79/goutils

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
trie.go 5.75 KB
一键复制 编辑 原始数据 按行查看 历史
package trie
import (
"strconv"
"sync"
)
const (
Empty = ""
)
type Group struct {
No string
Text string `xml:"text" json:"text"`
Index int `xml:"index" json:"index"`
}
type Trie struct {
Root *Node
}
type Node struct {
end bool
character rune
weight int
rm sync.Mutex
children map[rune]*Node
}
func newNode(character rune) *Node {
return &Node{
weight: 0,
character: character,
children: make(map[rune]*Node, 0),
}
}
func newRootNode() *Node {
return newNode(0)
}
func (node *Node) Get(r rune) (*Node, bool) {
node.rm.Lock()
defer node.rm.Unlock()
node, ok := node.children[r]
return node, ok
}
func (node *Node) Add(r rune, n *Node) *Node {
node.rm.Lock()
defer node.rm.Unlock()
node.children[r] = n
n.weight = len(node.children)
return n
}
// IsLeaf 是否叶子节点
func (node *Node) IsLeaf() bool {
return len(node.children) == 0
}
// IsRoot 是否为根节点
func (node *Node) IsRoot() bool {
return node.character == 0
}
// IsPathEnd 某个路径的结束
func (node *Node) IsPathEnd() bool {
return node.end
}
// SoftDel 置软删除状态
func (node *Node) SoftDel() {
node.end = false
}
func NewTrie() *Trie {
return &Trie{
Root: newRootNode(),
}
}
// Add 添加, return no 唯一的编号
func (tree *Trie) Add(word string) (no string) {
var (
parent = tree.Root
position = 0
found = false
runes = []rune(word)
r rune
current *Node
)
for position = 0; position < len(runes); position++ {
if position > 0 {
no += "-"
}
r = runes[position]
if current, found = parent.Get(r); found {
parent = current
} else {
parent = parent.Add(r, newNode(r))
}
no += strconv.Itoa(parent.weight)
if position == len(runes)-1 {
parent.end = true
}
}
return
}
func (tree *Trie) Del(word string) {
var (
current = tree.Root
runes = []rune(word)
position = 0
found = false
)
for position = 0; position < len(runes); position++ {
if current, found = current.Get(runes[position]); !found {
return
}
if position == len(runes)-1 {
current.SoftDel()
}
}
}
// Replace 词语替换
func (tree *Trie) Replace(text string, character rune) string {
var (
parent = tree.Root
runes = []rune(text)
length = len(runes)
left = 0
position = 0
found = false
next = func() {
parent = tree.Root
position = left
left++
}
current *Node
)
for position = 0; position < len(runes); position++ {
if current, found = parent.children[runes[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if current.IsPathEnd() && left <= position {
for i := left; i <= position; i++ {
runes[i] = character
}
}
parent = current
}
return string(runes)
}
// Filter 直接过滤掉字符串中的敏感词
func (tree *Trie) Filter(text string) string {
var (
parent = tree.Root
runes = []rune(text)
length = len(runes)
left = 0
position = 0
found = false
next = func() {
parent = tree.Root
position = left
left++
}
current *Node
resultRunes []rune
)
for position = 0; position < length; position++ {
if current, found = parent.children[runes[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
resultRunes = append(resultRunes, runes[left])
next()
continue
}
if current.IsPathEnd() {
left = position + 1
parent = tree.Root
} else {
parent = current
}
}
resultRunes = append(resultRunes, runes[left:]...)
return string(resultRunes)
}
func (tree *Trie) Validate(text string) (bool, string) {
va, te, _ := tree.ValidateReturnNo(text)
return va, te
}
// ValidateReturnNo 验证字符串是否合法,如不合法则返回false和检测到
// 的第一个敏感词
func (tree *Trie) ValidateReturnNo(text string) (bool, string, string) {
var (
parent = tree.Root
runes = []rune(text)
length = len(runes)
left = 0
position = 0
found = false
no = Empty
next = func() {
no = Empty
parent = tree.Root
position = left
left++
}
current *Node
)
for position = 0; position < len(runes); position++ {
if current, found = parent.children[runes[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if len(no) > 0 {
no += "-"
}
no += strconv.Itoa(current.weight)
if current.IsPathEnd() && left <= position {
return false, string(runes[left : position+1]), no
}
parent = current
}
return true, Empty, Empty
}
// FindInReturnNo 判断text中是否含有词库中的词
func (tree *Trie) FindInReturnNo(text string) (bool, string, string) {
validated, first, no := tree.ValidateReturnNo(text)
return !validated, first, no
}
func (tree *Trie) FindIn(text string) (bool, string) {
validated, first, _ := tree.ValidateReturnNo(text)
return !validated, first
}
// FindAll 找有所有包含在词库中的词
func (tree *Trie) FindAll(text string) []*Group {
var (
parent = tree.Root
runes = []rune(text)
length = len(runes)
left = 0
position = 0
found = false
no = Empty
next = func() {
no = Empty
parent = tree.Root
position = left
left++
}
matches []*Group
current *Node
)
for position = 0; position < length; position++ {
if current, found = parent.children[runes[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if len(no) > 0 {
no += "-"
}
no += strconv.Itoa(current.weight)
if current.IsPathEnd() && left <= position {
s := string(runes[left : position+1])
matches = append(matches, &Group{No: no, Text: s, Index: left})
no = ""
}
parent = current
}
return matches
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/h79/goutils.git
git@gitee.com:h79/goutils.git
h79
goutils
goutils
v1.2.21

搜索帮助