1 Star 0 Fork 0

h79/goutils

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
filter.go 3.16 KB
一键复制 编辑 原始数据 按行查看 历史
huqiuyun 提交于 2022-10-30 21:01 . trie
package sensitive
import (
"bufio"
trie "gitee.com/h79/goutils/common/trie"
"io"
"net/http"
"os"
"regexp"
"time"
)
// Filter 敏感词过滤器
type Filter struct {
regexp *Regexp
trie *trie.Trie
noise *regexp.Regexp
}
// New 返回一个敏感词过滤器
func New(reg bool) *Filter {
noise := regexp.MustCompile(`[\|\s&%$@*]+`)
if reg {
return &Filter{
regexp: NewRegexp(),
trie: nil,
noise: noise,
}
}
return &Filter{
regexp: nil,
trie: trie.NewTrie(),
noise: noise,
}
}
// UpdateNoisePattern 更新去噪模式
func (filter *Filter) UpdateNoisePattern(pattern string) {
filter.noise = regexp.MustCompile(pattern)
}
// LoadWordDict 加载敏感词字典
func (filter *Filter) LoadWordDict(path string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
return filter.Load(f)
}
// LoadWordDictByNetworkFile 加载网络敏感词字典,文本文件
func (filter *Filter) LoadWordDictByNetworkFile(url string) error {
c := http.Client{
Timeout: 5 * time.Second,
}
rsp, err := c.Get(url)
if err != nil {
return err
}
defer rsp.Body.Close()
return filter.Load(rsp.Body)
}
// Load common method to add words
func (filter *Filter) Load(rd io.Reader) error {
buf := bufio.NewReader(rd)
for {
line, _, err := buf.ReadLine()
if err != nil {
if err != io.EOF {
return err
}
break
}
filter.add(string(line))
}
return nil
}
func (filter *Filter) add(words string) {
if filter.trie != nil {
filter.trie.Add(words)
} else if filter.regexp != nil {
_, _ = filter.regexp.Add(words)
}
}
func (filter *Filter) del(word string) {
if filter.trie != nil {
filter.trie.Del(word)
} else if filter.regexp != nil {
filter.regexp.Del(word)
}
}
// AddWord 添加敏感词
func (filter *Filter) AddWord(words ...string) {
for i := range words {
filter.add(words[i])
}
}
// DelWord 删除敏感词
func (filter *Filter) DelWord(words ...string) {
for i := range words {
filter.del(words[i])
}
}
// Filter 过滤敏感词
func (filter *Filter) Filter(text string) string {
if filter.trie != nil {
return filter.trie.Filter(text)
}
return filter.regexp.Filter(text)
}
// Replace 和谐敏感词
func (filter *Filter) Replace(text string, repl rune, replaceF func(repl rune) string) string {
if filter.trie != nil {
return filter.trie.Replace(text, repl)
}
return filter.regexp.Replace(text, replaceF(repl))
}
// FindIn 检测敏感词
func (filter *Filter) FindIn(text string) (bool, string) {
text = filter.RemoveNoise(text)
if filter.trie != nil {
return filter.trie.FindIn(text)
}
return filter.regexp.FindIn(text)
}
// FindAll 找到所有匹配词
func (filter *Filter) FindAll(text string) []*trie.Group {
if filter.trie != nil {
return filter.trie.FindAll(text)
}
return filter.regexp.FindAll(text)
}
// Validate 检测字符串是否合法
func (filter *Filter) Validate(text string) (bool, string) {
text = filter.RemoveNoise(text)
if filter.trie != nil {
return filter.trie.Validate(text)
}
return filter.regexp.Validate(text)
}
// RemoveNoise 去除空格等噪音
func (filter *Filter) RemoveNoise(text string) string {
return filter.noise.ReplaceAllString(text, "")
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/h79/goutils.git
git@gitee.com:h79/goutils.git
h79
goutils
goutils
v1.3.19

搜索帮助