v1.0.3

分支 (1)

标签 (3)

管理

管理

master

v1.0.3

v1.0.2

v1.0.1

se
/
hit.go

package se

import (
	"io/ioutil"
	"reflect"
	"strings"
	"unicode"

	"gitee.com/xuender/se/pb"
)

// Hit 命中.
func (p *FileService) Hit(file *pb.File, words []string) {
	bs, err := ioutil.ReadFile(file.Name)
	if err != nil {
		file.Hits = Hits("", words)
		p.Delete(file.Name)

		return
	}

	file.Hits = Hits(string(bs), words)
}

// Hits 命中.
func Hits(text string, words []string) []*pb.Hit {
	hits := []*pb.Hit{}

	for i, str := range strings.Split(text, "\n") {
		h := &pb.Hit{
			Text:  str,
			Line:  uint32(i + 1),
			Words: make(map[uint32]uint32),
		}

		lower := strings.ToLower(str)
		lower = strings.ReplaceAll(lower, "_", " ")

		for _, w := range words {
			index, e := Index(lower, str, w)
			if index >= 0 {
				h.Words[uint32(index)] = uint32(e)
			}
		}

		if len(h.Words) > 0 {
			hits = append(hits, h)
		}
	}

	return hits
}

// Index 位置.
func Index(lower, str, word string) (int, int) {
	wr := []rune(word)
	sr := []rune(str)
	lr := []rune(lower)

	for i := range lr {
		l := len(wr)
		if i+l > len(sr) {
			break
		}

		if !reflect.DeepEqual(lr[i:i+l], wr) {
			continue
		}

		before := rune(0)
		if i > 0 {
			before = sr[i-1]
		}

		after := rune(0)
		if i+l < len(sr) {
			after = sr[i+l]
		}

		now := sr[i : i+l]

		if !isOneWord(now, before, after) || !isWord(sr[i:i+l]) {
			r, e := Index(lower[i+1:], str[i+1:], word)
			if r < 0 {
				return r, e
			}

			return r + i + 1, e + i + 1
		}

		return i, i + l
	}

	return -1, 0
}

func isSplit(r, one rune) bool {
	if r > 0 {
		isLetter := unicode.IsLetter(r)
		bHan := unicode.Is(unicode.Scripts["Han"], r)

		if isLetter || bHan {
			bLower := unicode.IsLower(r)
			isHan := unicode.Is(unicode.Scripts["Han"], one)
			isLower := unicode.IsLower(one)

			if !isHan && isHan == bHan && isLower == bLower {
				return false
			}
		}
	}

	return true
}

func isOneWord(rs []rune, before, after rune) bool {
	// 首字母之前还有
	if !isSplit(before, rs[0]) {
		return false
	}
	// 尾字母之后还有
	if !isSplit(after, rs[len(rs)-1]) {
		return false
	}

	return true
}

func isWord(rs []rune) bool {
	if len(rs) < 1 {
		return false
	}

	if len(rs) == 1 {
		return true
	}

	// 首字母小写，全部要是小写
	// 第二个字母小写，后面的全部是小写
	if unicode.IsLower(rs[0]) || unicode.IsLower(rs[1]) {
		for i := 1; i < len(rs); i++ {
			if !unicode.IsLower(rs[i]) {
				return false
			}
		}

		return true
	}
	// 第二个是大写，后面的全部是大写
	for _, r := range rs {
		if unicode.IsLower(r) {
			return false
		}
	}

	return true
}