1 Star 0 Fork 0

h79/goutils

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
trie.go 6.49 KB
一键复制 编辑 原始数据 按行查看 历史
huqiuyun 提交于 2023-04-09 04:45 . tire use rune
package trie
import (
"strconv"
"sync"
)
const (
Empty = ""
)
type Group struct {
No string
Text string `xml:"text" json:"text"`
Index int `xml:"index" json:"index"`
}
type Trie struct {
Root *Node
}
type Node struct {
end bool
character rune
weight int
rm sync.Mutex
children map[rune]*Node
}
func newNode(character rune) *Node {
return &Node{
weight: 0,
character: character,
children: make(map[rune]*Node, 0),
}
}
func newRootNode() *Node {
return newNode(0)
}
func (node *Node) Get(r rune) (*Node, bool) {
node.rm.Lock()
defer node.rm.Unlock()
node, ok := node.children[r]
return node, ok
}
func (node *Node) Add(r rune, n *Node) *Node {
node.rm.Lock()
defer node.rm.Unlock()
node.children[r] = n
n.weight = len(node.children)
return n
}
// IsLeaf 是否叶子节点
func (node *Node) IsLeaf() bool {
return len(node.children) == 0
}
// IsRoot 是否为根节点
func (node *Node) IsRoot() bool {
return node.character == 0
}
// IsPathEnd 某个路径的结束
func (node *Node) IsPathEnd() bool {
return node.end
}
// SoftDel 置软删除状态
func (node *Node) SoftDel() {
node.end = false
}
func NewTrie() *Trie {
return &Trie{
Root: newRootNode(),
}
}
// Add 添加, return no 唯一的编号
func (tree *Trie) Add(word string) (no string) {
return tree.AddRune([]rune(word))
}
func (tree *Trie) AddRune(word []rune) (no string) {
var (
parent = tree.Root
position = 0
found = false
r rune
current *Node
)
for position = 0; position < len(word); position++ {
if position > 0 {
no += "-"
}
r = word[position]
if current, found = parent.Get(r); found {
parent = current
} else {
parent = parent.Add(r, newNode(r))
}
no += strconv.Itoa(parent.weight)
if position == len(word)-1 {
parent.end = true
}
}
return
}
func (tree *Trie) Del(word string) {
tree.DelRune([]rune(word))
}
func (tree *Trie) DelRune(word []rune) {
var (
current = tree.Root
position = 0
found = false
)
for position = 0; position < len(word); position++ {
if current, found = current.Get(word[position]); !found {
return
}
if position == len(word)-1 {
current.SoftDel()
}
}
}
// Replace 词语替换
func (tree *Trie) Replace(text string, character rune) string {
return tree.ReplaceRune([]rune(text), character)
}
func (tree *Trie) ReplaceRune(text []rune, character rune) string {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
next = func() {
parent = tree.Root
position = left
left++
}
current *Node
)
for position = 0; position < len(text); position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if current.IsPathEnd() && left <= position {
for i := left; i <= position; i++ {
text[i] = character
}
}
parent = current
}
return string(text)
}
// Filter 直接过滤掉字符串中的敏感词
func (tree *Trie) Filter(text string) string {
return tree.FilterRune([]rune(text))
}
func (tree *Trie) FilterRune(text []rune) string {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
next = func() {
parent = tree.Root
position = left
left++
}
current *Node
resultRunes []rune
)
for position = 0; position < length; position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
resultRunes = append(resultRunes, text[left])
next()
continue
}
if current.IsPathEnd() {
left = position + 1
parent = tree.Root
} else {
parent = current
}
}
resultRunes = append(resultRunes, text[left:]...)
return string(resultRunes)
}
func (tree *Trie) Validate(text string) (bool, string) {
var validated, g = tree.ValidateRune([]rune(text))
return validated, g.Text
}
// ValidateReturnNo 验证字符串是否合法,如不合法则返回false和检测到
// 的第一个敏感词
func (tree *Trie) ValidateReturnNo(text string) (bool, string, string) {
var validated, g = tree.ValidateRune([]rune(text))
return validated, g.Text, g.No
}
func (tree *Trie) ValidateRune(text []rune) (bool, Group) {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
no = Empty
next = func() {
no = Empty
parent = tree.Root
position = left
left++
}
current *Node
)
for position = 0; position < len(text); position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if len(no) > 0 {
no += "-"
}
no += strconv.Itoa(current.weight)
if current.IsPathEnd() && left <= position {
return false, Group{No: no, Text: string(text[left : position+1]), Index: left}
}
parent = current
}
return true, Group{No: "", Text: "", Index: -1}
}
// FindInReturnNo 判断text中是否含有词库中的词
func (tree *Trie) FindInReturnNo(text string) (bool, string, string) {
var validated, g = tree.ValidateRune([]rune(text))
return !validated, g.Text, g.No
}
func (tree *Trie) FindInGroup(text string) (bool, Group) {
var validated, g = tree.ValidateRune([]rune(text))
return !validated, g
}
func (tree *Trie) FindRune(text []rune) (bool, Group) {
var validated, g = tree.ValidateRune(text)
return !validated, g
}
func (tree *Trie) FindIn(text string) (bool, string) {
validated, g := tree.ValidateRune([]rune(text))
return !validated, g.Text
}
func (tree *Trie) FindAll(text string) []*Group {
return tree.FindRunes([]rune(text))
}
// FindRunes 找有所有包含在词库中的词
func (tree *Trie) FindRunes(text []rune) []*Group {
var (
parent = tree.Root
length = len(text)
left = 0
position = 0
found = false
no = Empty
next = func() {
no = Empty
parent = tree.Root
position = left
left++
}
matches []*Group
current *Node
)
for position = 0; position < length; position++ {
if current, found = parent.children[text[position]]; !found {
next()
continue
}
if !current.IsPathEnd() && position == length-1 {
next()
continue
}
if len(no) > 0 {
no += "-"
}
no += strconv.Itoa(current.weight)
if current.IsPathEnd() && left <= position {
matches = append(matches, &Group{No: no, Text: string(text[left : position+1]), Index: left})
}
parent = current
}
return matches
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/h79/goutils.git
git@gitee.com:h79/goutils.git
h79
goutils
goutils
v1.8.54

搜索帮助