代码拉取完成,页面将自动刷新
package heap
import (
C "gitee.com/ljfirst/algo-go-sdk/common/constant"
"reflect"
)
/**
* @author ljfirst
* @version V1.0
* @date 2023/6/27 01:12
* @author-Email ljfirst@mail.ustc.edu.cn
* @blogURL https://blog.csdn.net/ljfirst
* @description 词频统计
* 实现统计一篇英文文章内每个单词的出现频率,并返回出现频率最高的前3个单词及其出现次数
* */
type WordFrequency struct {
}
func (m *WordFrequency) Statistics(words []string) []string {
if len(words) == 0 {
return nil
}
statMap := make(map[string]*wordsType)
ans := make([]string, 0)
topFrequency := 3
pq := NewPriorityQueue([]C.Options{C.WithQueueType(true),
C.WithLimitSize(topFrequency),
C.WithGT(m.gt()),
C.WithChange(m.change()),
C.WithCompare(m.comp())}...)
for _, word := range words {
w, exist := statMap[word]
if !exist {
w = &wordsType{Word: word, Frequency: 1}
} else {
w.Frequency += 1
}
statMap[word] = w
if w.QueueExist {
pq.Update(w)
} else if pq.Size() < pq.limitSize {
w.QueueExist = true
pq.Offer(w)
} else if pq.Comp(w, pq.Peak()) {
v, ok := pq.Peak().(*wordsType)
if ok {
v.QueueExist = false
}
w.QueueExist = true
pq.Offer(w)
}
}
for pq.Size() != 0 {
w, exist := pq.Poll().(*wordsType)
if !exist {
continue
}
ans = append(ans, w.Word)
}
return ans
}
func (m *WordFrequency) gt() C.GT {
return func(arrayV interface{}, index1 int, index2 int) bool {
value := reflect.ValueOf(arrayV)
if value.Kind() != reflect.Array && value.Kind() != reflect.Slice {
return false
}
if value.Len() <= index1 || value.Len() <= index2 {
return false
}
array, ok := arrayV.([]interface{})
if !ok {
return false
}
w1, ok := array[index1].(*wordsType)
if !ok {
return false
}
w2, ok := array[index2].(*wordsType)
if !ok {
return false
}
return w1.Frequency > w2.Frequency
}
}
func (m *WordFrequency) change() C.Change {
return func(arrayV interface{}, index1 int, index2 int) {
value := reflect.ValueOf(arrayV)
if value.Kind() != reflect.Array && value.Kind() != reflect.Slice {
return
}
if value.Len() <= index1 || value.Len() <= index2 {
return
}
array, ok := arrayV.([]interface{})
if !ok {
return
}
array[index1], array[index2] = array[index2], array[index1]
}
}
func (m *WordFrequency) comp() C.Compare {
return func(value1 interface{}, value2 interface{}) bool {
w1, ok := value1.(*wordsType)
if !ok {
return false
}
w2, ok := value2.(*wordsType)
if !ok {
return false
}
return w1.Frequency > w2.Frequency
}
}
type wordsType struct {
Word string
Frequency int
QueueExist bool // 用于标识元素是否存在于优先队列中,以及获取index值,便于操作
}
func (m *WordFrequency) GetAttribute() *C.Attribute {
return &C.Attribute{
Tags: []string{C.Heap},
Desc: &C.Desc{
Name: "WordFrequency",
NameCn: "词频统计",
},
}
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。