24 Star 65 Fork 17

wgliang / goreporter

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
syntax.go 3.51 KB
一键复制 编辑 原始数据 按行查看 历史
package syntax
import (
"crypto/sha1"
"github.com/360EntSecGroup-Skylar/goreporter/linters/copycheck/suffixtree"
)
type Node struct {
Type int
Filename string
Pos, End int
Children []*Node
Owns int
}
func NewNode() *Node {
return &Node{}
}
func (n *Node) AddChildren(children ...*Node) {
n.Children = append(n.Children, children...)
}
func (n *Node) Val() int {
return n.Type
}
type Match struct {
Hash string
Frags [][]*Node
}
func Serialize(n *Node) []*Node {
stream := make([]*Node, 0, 10)
serial(n, &stream)
return stream
}
func serial(n *Node, stream *[]*Node) int {
*stream = append(*stream, n)
var count int
for _, child := range n.Children {
count += serial(child, stream)
}
n.Owns = count
return count + 1
}
// FindSyntaxUnits finds all complete syntax units in the match group and returns them
// with the corresponding hash.
func FindSyntaxUnits(data []*Node, m suffixtree.Match, threshold int) Match {
if len(m.Ps) == 0 {
return Match{}
}
firstSeq := data[m.Ps[0] : m.Ps[0]+m.Len]
indexes := getUnitsIndexes(firstSeq, threshold)
// TODO: is this really working?
indexCnt := len(indexes)
if indexCnt > 0 {
lasti := indexes[indexCnt-1]
firstn := firstSeq[lasti]
for i := 1; i < len(m.Ps); i++ {
n := data[int(m.Ps[i])+lasti]
if firstn.Owns != n.Owns {
indexes = indexes[:indexCnt-1]
break
}
}
}
if len(indexes) == 0 || isCyclic(indexes, firstSeq) || spansMultipleFiles(indexes, firstSeq) {
return Match{}
}
match := Match{Frags: make([][]*Node, len(m.Ps))}
for i, pos := range m.Ps {
match.Frags[i] = make([]*Node, len(indexes))
for j, index := range indexes {
match.Frags[i][j] = data[int(pos)+index]
}
}
lastIndex := indexes[len(indexes)-1]
match.Hash = hashSeq(firstSeq[indexes[0] : lastIndex+firstSeq[lastIndex].Owns])
return match
}
func getUnitsIndexes(nodeSeq []*Node, threshold int) []int {
var indexes []int
var split bool
for i := 0; i < len(nodeSeq); {
n := nodeSeq[i]
switch {
case n.Owns >= len(nodeSeq)-i:
// not complete syntax unit
i++
split = true
continue
case n.Owns+1 < threshold:
split = true
default:
if split {
indexes = indexes[:0]
split = false
}
indexes = append(indexes, i)
}
i += n.Owns + 1
}
return indexes
}
// isCyclic finds out whether there is a repetive pattern in the found clone. If positive,
// it return false to point out that the clone would be redundant.
func isCyclic(indexes []int, nodes []*Node) bool {
cnt := len(indexes)
if cnt <= 1 {
return false
}
alts := make(map[int]bool)
for i := 1; i <= cnt/2; i++ {
if cnt%i == 0 {
alts[i] = true
}
}
for i := 0; i < indexes[cnt/2]; i++ {
nstart := nodes[i+indexes[0]]
AltLoop:
for alt := range alts {
for j := alt; j < cnt; j += alt {
index := i + indexes[j]
if index < len(nodes) {
nalt := nodes[index]
if nstart.Owns == nalt.Owns && nstart.Type == nalt.Type {
continue
}
} else if i >= indexes[alt] {
return true
}
delete(alts, alt)
continue AltLoop
}
}
if len(alts) == 0 {
return false
}
}
return true
}
func spansMultipleFiles(indexes []int, nodes []*Node) bool {
if len(indexes) < 2 {
return false
}
f := nodes[indexes[0]].Filename
for i := 1; i < len(indexes); i++ {
if nodes[indexes[i]].Filename != f {
return true
}
}
return false
}
func hashSeq(nodes []*Node) string {
h := sha1.New()
bytes := make([]byte, len(nodes))
for i, node := range nodes {
bytes[i] = byte(node.Type)
}
h.Write(bytes)
return string(h.Sum(nil))
}
Go
1
https://gitee.com/wgliang/goreporter.git
git@gitee.com:wgliang/goreporter.git
wgliang
goreporter
goreporter
df1b20f7c5d0

搜索帮助