From ef9a50f7389f15275497ecf980302ed0662729e1 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Sat, 23 Jul 2022 09:52:48 +0800 Subject: [PATCH 01/13] =?UTF-8?q?=E4=BF=AE=E6=94=B9spdx=E9=83=A8=E5=88=86?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/engine/engine.go | 2 +- util/report/spdx.go | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/analyzer/engine/engine.go b/analyzer/engine/engine.go index 4a06505..bd07a32 100644 --- a/analyzer/engine/engine.go +++ b/analyzer/engine/engine.go @@ -57,7 +57,7 @@ func (e Engine) ParseFile(filepath string) (depRoot *model.DepTree, taskInfo rep dirRoot := model.NewDirTree() depRoot = model.NewDepTree(nil) taskInfo = report.TaskInfo{ - AppName: filepath, + AppName: strings.TrimSuffix(path.Base(filepath), path.Ext(path.Base(filepath))), StartTime: time.Now().Format("2006-01-02 15:04:05"), } s := time.Now() diff --git a/util/report/spdx.go b/util/report/spdx.go index 47270c3..beb1ff0 100644 --- a/util/report/spdx.go +++ b/util/report/spdx.go @@ -103,7 +103,8 @@ func addPkgToDoc(root *model.DepTree, doc *Document) { if root.Name == "" { root.Name = doc.DocumentName } - q := []*model.DepTree{root} + q := []*model.DepTree{} + q = append(q, root.Children...) for len(q) > 0 { n := q[0] q = append(q[1:], n.Children...) @@ -129,7 +130,7 @@ func buildPkg(dep *model.DepTree) Package { PackageComment: setPkgComments(dep), RootPackage: isParent(dep), } - pkg.SPDXID = setPkgSPDXID(dep.Name, dep.VersionStr) + pkg.SPDXID = setPkgSPDXID(path.Base(dep.Name), dep.VersionStr) nodePkg[dep] = pkg return pkg } @@ -140,7 +141,7 @@ func buildDocument(root *model.DepTree, taskInfo TaskInfo) *Document { SPDXVersion: "SPDX-2.2", DataLicense: "", SPDXID: "SPDXRef-DOCUMENT", - DocumentName: path.Base(taskInfo.AppName), + DocumentName: taskInfo.AppName, DocumentNamespace: "", CreationInfo: CreationInfo{ Creators: []string{"OpenSCA-Cli"}, -- Gitee From dfebca20f5a8c7ce7be9efc0ab044083d387f638 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Fri, 29 Jul 2022 17:48:18 +0800 Subject: [PATCH 02/13] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=87=AA=E5=AE=9A?= =?UTF-8?q?=E4=B9=89maven=E4=BB=93=E5=BA=93=E6=8B=89=E5=8F=96=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/java/ext.go | 58 +++++++++++++++++++++++++++++++++----------- util/args/args.go | 7 ++++++ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/analyzer/java/ext.go b/analyzer/java/ext.go index 2afd04f..45d8cd4 100644 --- a/analyzer/java/ext.go +++ b/analyzer/java/ext.go @@ -7,6 +7,7 @@ package java import ( "bytes" + "crypto/tls" "fmt" "io/ioutil" "net/http" @@ -14,6 +15,7 @@ import ( "os/exec" "regexp" "strings" + "util/args" "util/cache" "util/enum/language" "util/logs" @@ -103,27 +105,37 @@ func downloadPom(dep model.Dependency, repos ...string) (data []byte, err error) tags = append(tags, dep.Name) tags = append(tags, dep.Version.Org) tags = append(tags, fmt.Sprintf("%s-%s.pom", dep.Name, dep.Version.Org)) - // 遍历仓库地址, 默认maven仓库 - for i, repo := range append(repos, `https://repo.maven.apache.org/maven2/`) { + if len(args.Config.RepUrl) > 0 { + for _, url := range args.Config.RepUrl { + url = strings.TrimSuffix(url, `/`) + `/` + repos = append(repos, url) + } + } + repos = append(repos, `https://repo.maven.apache.org/maven2/`) + // 遍历仓库地址, 默认maven仓库,可以指定私有仓库 + for i, repo := range repos { // 是否是最后一个仓库(默认的maven仓库) - last := i == len(repos) + last := i == len(repos)-1 // 拼接完整的pom下载地址 url := repo + strings.Join(tags, "/") - if rep, err := http.Get(url); err != nil { - if last { - return nil, err - } else { + if !last { + name := args.Config.RepName + password := args.Config.RepPassword + if name == "" || password == "" { + logs.Error(fmt.Errorf("missing username or password")) continue } + data, err = getFromRepo(url, name, password) + if data != nil { + return + } + continue + } + if rep, err := http.Get(url); err != nil { + return nil, err } else { defer rep.Body.Close() - if rep.StatusCode != 200 { - if last { - return ioutil.ReadAll(rep.Body) - } else { - continue - } - } else { + if rep.StatusCode == 200 { return ioutil.ReadAll(rep.Body) } } @@ -132,6 +144,24 @@ func downloadPom(dep model.Dependency, repos ...string) (data []byte, err error) return nil, fmt.Errorf("download failure") } +// 从私服库获取pom文件 +func getFromRepo(url string, name string, password string) (data []byte, err error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } else { + req.SetBasicAuth(name, password) + if rep, err := (&http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}}).Do(req); err == nil { + defer rep.Body.Close() + logs.Debug(fmt.Sprintf("status code: %d url: %s", rep.StatusCode, url)) + if rep.StatusCode == 200 { + return ioutil.ReadAll(rep.Body) + } + } + } + return nil, fmt.Errorf("download from repository failure") +} + // getpom is get pom from index func getpom(groupId, artifactId, version string) (p *Pom) { p = &Pom{Properties: PomProperties{}} diff --git a/util/args/args.go b/util/args/args.go index d0f5406..8fe5847 100644 --- a/util/args/args.go +++ b/util/args/args.go @@ -30,6 +30,10 @@ var ( Token string `json:"token"` // local vuldb VulnDB string `json:"db"` + // prvate repository + RepUrl []string `json:"repurl"` + RepName string `json:"name"` + RepPassword string `json:"password"` }{} ) @@ -44,6 +48,9 @@ func init() { flag.StringVar(&Config.VulnDB, "db", Config.VulnDB, "(可选) 指定本地漏洞库文件,希望使用自己漏洞库时可用,漏洞库文件为json格式,具体格式会在开源项目文档中给出;若同时使用云端漏洞库与本地漏洞库,漏洞查询结果取并集,例: -db db.json") flag.BoolVar(&Config.Bar, "progress", Config.Bar, "(可选) 显示进度条") flag.BoolVar(&Config.Dedup, "dedup", Config.Dedup, "(可选) 相同组件去重") + // flag.StringVar(&Config.RepUrl, "repurl", Config.RepUrl, "(可选) 自定义仓库地址,例: -repurl http://192.168.0.10:8081/repository/maven-central/") + // flag.StringVar(&Config.RepName, "name", Config.RepName, "(可选) 自定义仓库登录名") + // flag.StringVar(&Config.RepPassword, "password", Config.RepPassword, "(可选) 自定义仓库登录密码") } func Parse() { -- Gitee From 02c0bdd088da930bc387cd72ad4ecdda84ef8143 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Fri, 29 Jul 2022 17:50:29 +0800 Subject: [PATCH 03/13] =?UTF-8?q?=E5=88=A9=E7=94=A8pipenv=E7=8E=AF?= =?UTF-8?q?=E5=A2=83=E8=B0=83=E7=94=A8pip-compile=E5=91=BD=E4=BB=A4?= =?UTF-8?q?=E8=8E=B7=E5=8F=96python=E7=89=B9=E5=BE=81=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/python/analyzer.go | 12 ++- analyzer/python/pipfile.go | 26 +++++ analyzer/python/req.go | 194 ++++++++++++++++++++++++++++++++++++ analyzer/python/setup.go | 52 +++++++++- util/bar/bar.go | 17 ++-- util/ex/python.go | 91 +++++++++++++++++ util/filter/file.go | 12 ++- 7 files changed, 391 insertions(+), 13 deletions(-) create mode 100644 analyzer/python/req.go create mode 100644 util/ex/python.go diff --git a/analyzer/python/analyzer.go b/analyzer/python/analyzer.go index 047d352..8b7c32b 100644 --- a/analyzer/python/analyzer.go +++ b/analyzer/python/analyzer.go @@ -22,7 +22,9 @@ func (Analyzer) GetLanguage() language.Type { func (Analyzer) CheckFile(filename string) bool { return filter.PythonSetup(filename) || filter.PythonPipfile(filename) || - filter.PythonPipfileLock(filename) + filter.PythonPipfileLock(filename) || + filter.PythonRequirementsTxt(filename) || + filter.PythonSetupCfg(filename) } // ParseFiles parse dependency from file @@ -32,11 +34,17 @@ func (Analyzer) ParseFiles(files []*model.FileInfo) []*model.DepTree { dep := model.NewDepTree(nil) dep.Path = f.Name if filter.PythonSetup(f.Name) { - parseSetup(dep, f) + if !parseSetup(dep, f) { + parseSetupPy(dep, f) + } + } else if filter.PythonSetupCfg(f.Name) { + parseSetupCfg(dep, f) } else if filter.PythonPipfile(f.Name) { parsePipfile(dep, f) } else if filter.PythonPipfileLock(f.Name) { parsePipfileLock(dep, f) + } else if filter.PythonRequirementsTxt(f.Name) || filter.PythonRequirementsIn(f.Name) { + parseRequirementsin(dep, f) } deps = append(deps, dep) } diff --git a/analyzer/python/pipfile.go b/analyzer/python/pipfile.go index 7157522..924779f 100644 --- a/analyzer/python/pipfile.go +++ b/analyzer/python/pipfile.go @@ -2,6 +2,8 @@ package python import ( "encoding/json" + "os" + "path" "strings" "util/logs" "util/model" @@ -11,6 +13,12 @@ import ( // parsePipfile parse Pipfile file func parsePipfile(root *model.DepTree, file *model.FileInfo) { + dir := path.Dir(file.Name) + if fileExist(path.Join(dir, "setup.py")) || + fileExist(path.Join(dir, "setup.cfg")) || + fileExist(path.Join(dir, "requirements.txt")) { + return + } pip := struct { DevPackages map[string]string `toml:"dev-packages"` Packages map[string]string `toml:"packages"` @@ -30,8 +38,26 @@ func parsePipfile(root *model.DepTree, file *model.FileInfo) { } } +// Determine whether the file exists +func fileExist(path string) bool { + _, err := os.Stat(path) + if err == nil { + return true + } + if os.IsNotExist(err) { + return false + } + return false +} + // parsePipfileLock parse pipfile.lock file func parsePipfileLock(root *model.DepTree, file *model.FileInfo) { + dir := path.Dir(file.Name) + if fileExist(path.Join(dir, "setup.py")) || + fileExist(path.Join(dir, "setup.cfg")) || + fileExist(path.Join(dir, "requirements.txt")) { + return + } lock := struct { Default map[string]struct { Version string `json:"version"` diff --git a/analyzer/python/req.go b/analyzer/python/req.go new file mode 100644 index 0000000..7f5910c --- /dev/null +++ b/analyzer/python/req.go @@ -0,0 +1,194 @@ +package python + +import ( + "bufio" + "os" + "path" + "regexp" + "strings" + "util/bar" + "util/ex" + "util/logs" + "util/model" + "util/temp" +) + +var replacer *strings.Replacer +var reg *regexp.Regexp + +func init() { + replacers := []string{"#", "", " ", ""} + replacer = strings.NewReplacer(replacers...) + // requirments.txt文件内组件可能没有版本号 + reg = regexp.MustCompile(`^[\w-_]+[=<~>]{0,2}[\d\.\w]*$`) +} + +func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { + // 检查python环境 + if _, err := ex.CheckPython(ex.Python); err != nil { + return + } + temp.DoInTempDir(func(tempdir string) { + // 安装piptools + _, err := ex.Do(ex.PipinstallPiptoos, tempdir) + if err != nil { + return + } + // 从txt文件中提取组件信息放入新建的in文件 + if !createInfromTxt(file, tempdir) { + return + } + // 运行pip-compile获取依赖数据 + res, err := ex.Do(ex.PipCompilein, tempdir) + if err != nil { + return + } + // 解析依赖数据 + parseRes(root, res) + // 删除虚拟环境 + defer ex.Do(ex.RemoveVirtualCmd, tempdir) + }) +} + +func createInfromTxt(file *model.FileInfo, dir string) bool { + s := string(file.Data) + list := []string{} + reqpath := path.Join(dir, `requirements.in`) + out, err := os.Create(reqpath) + if err != nil { + logs.Error(err) + return false + } + out.Close() + for _, v := range strings.Split(s, "\n") { + if reg.MatchString(v) { + bar.PipCompile.Add(1) + // 为了避免单个组件失败影响整体解析,这里加一段判断组件合法性的逻辑,此步骤会使整体速度相对来说要慢一些。 + // 相当于pip-compile进行了两遍 + if !isValid(reqpath, v) { + continue + } + list = append(list, v) + } + } + f, err := os.OpenFile(reqpath, os.O_CREATE, 0666) + f.Seek(0, 0) + f.Truncate(0) + if err != nil { + logs.Error(err) + return false + } + defer f.Close() + for _, v := range list { + w := bufio.NewWriter(f) + _, err = w.WriteString(v + "\n") + if err != nil { + continue + } + w.Flush() + } + return true +} + +func isValid(reqpath string, elem string) bool { + f, err := os.OpenFile(reqpath, os.O_CREATE, 0666) + if err != nil { + return false + } + f.Seek(0, 0) + f.Truncate(0) + f.WriteString(elem) + f.Close() + if _, err := ex.Do(ex.PipCompilein, path.Dir(reqpath)); err != nil { + return false + } + return true +} + +type node struct { + name string + version string + dependFrom map[string][]string +} + +func parseRes(root *model.DepTree, s string) { + if len(s) == 0 { + return + } + lines := strings.Split(s, "\n") + for i, v := range lines { + if strings.Contains(v, "==") { + lines = lines[i:] + break + } + } + nodes := []node{} + dm := map[string]*model.DepTree{} + cur := node{name: "", version: "", dependFrom: make(map[string][]string)} + for _, v := range lines { + if strings.Contains(v, "==") { + nodes = append(nodes, cur) + cur = node{name: "", version: "", dependFrom: make(map[string][]string)} + v = strings.TrimSuffix(v, "\r") + nv := strings.Split(v, `==`) + if len(nv) == 2 { + cur.name = nv[0] + cur.version = nv[1] + } + } else if strings.Contains(v, "#") { + v = replacer.Replace(v) + v = strings.TrimPrefix(v, "via") + v = strings.TrimSuffix(v, "\r") + if v == "" { + continue + } + cur.dependFrom[cur.name] = append(cur.dependFrom[cur.name], v) + } + } + if len(nodes) == 0 { + return + } + nodes = nodes[1:] + for _, v := range nodes { + flag := false + for _, s := range v.dependFrom[v.name] { + if (strings.Contains(s, "requirements.in") || + strings.Contains(s, "(setup.py)") || + strings.Contains(s, "(setup.cfg)")) && len(v.dependFrom[v.name]) == 1 { + n := model.NewDepTree(root) + n.Name = v.name + n.Version = model.NewVersion(v.version) + flag = true + } + } + if flag { + continue + } + n := model.NewDepTree(nil) + n.Name = v.name + n.Version = model.NewVersion(v.version) + dm[v.name] = n + } + q := []*model.DepTree{} + q = append(q, root.Children...) + for len(q) > 0 { + treeNode := q[0] + for _, node := range nodes { + for _, parentName := range node.dependFrom[node.name] { + if parentName == treeNode.Name { + if d, ok := dm[node.name]; ok { + if d.Parent != nil { + nd := model.NewDepTree(nil) + nd.Dependency = d.Dependency + d = nd + } + treeNode.Children = append(treeNode.Children, d) + d.Parent = treeNode + break + } + } + } + } + q = append(q[1:], treeNode.Children...) + } +} diff --git a/analyzer/python/setup.go b/analyzer/python/setup.go index 4f79b56..d8a4286 100644 --- a/analyzer/python/setup.go +++ b/analyzer/python/setup.go @@ -7,6 +7,8 @@ import ( "os/exec" "path" "strings" + "util/bar" + "util/ex" "util/logs" "util/model" "util/temp" @@ -26,7 +28,8 @@ type setupDep struct { } // parseSetup 解析 setup.py 文件 -func parseSetup(root *model.DepTree, file *model.FileInfo) { +func parseSetup(root *model.DepTree, file *model.FileInfo) (suc bool) { + suc = false temp.DoInTempDir(func(tempdir string) { ossfile := path.Join(tempdir, "oss.py") setupfile := path.Join(tempdir, "setup.py") @@ -71,5 +74,52 @@ func parseSetup(root *model.DepTree, file *model.FileInfo) { } } }) + suc = true return } + +// 如果第一种方式解析setup.py不成功,则用另外一种方式 +func parseSetupPy(root *model.DepTree, file *model.FileInfo) { + bar.PipSetupPyCompile.Add(1) + // 检查python环境 + if _, err := ex.CheckPython(ex.Python); err != nil { + return + } + dir := path.Dir(file.Name) + _, err := ex.Do(ex.PipinstallPiptoos, dir) + if err != nil { + return + } + res, err := ex.Do(ex.PipcompileSetup, dir) + if err != nil { + return + } + os.Remove(path.Join(dir, "temp.txt")) + parseRes(root, res) + defer ex.Do(ex.RemoveVirtualCmd, dir) +} + +// 同目录下已经解析了setup.py则无需再解析setup.cfg,结果是一样的。 +func parseSetupCfg(root *model.DepTree, file *model.FileInfo) { + dir := path.Dir(file.Name) + if fileExist(path.Join(dir, "setup.py")) { + return + } + bar.PipSetupCfgCompile.Add(1) + // 检查python环境 + if _, err := ex.CheckPython(ex.Python); err != nil { + return + } + dir = path.Dir(file.Name) + _, err := ex.Do(ex.PipinstallPiptoos, dir) + if err != nil { + return + } + res, err := ex.Do(ex.PipCompileCfg, dir) + if err != nil { + return + } + os.Remove(path.Join(dir, "temp.txt")) + parseRes(root, res) + defer ex.Do(ex.RemoveVirtualCmd, dir) +} diff --git a/util/bar/bar.go b/util/bar/bar.go index 9085e4e..ff5b539 100644 --- a/util/bar/bar.go +++ b/util/bar/bar.go @@ -11,13 +11,16 @@ import ( ) var ( - id int = 0 - Dir *Bar = newBar("scan dir") - Archive *Bar = newBar("unarchive") - Maven *Bar = newBar("parse maven indirect dependency") - Npm *Bar = newBar("parse npm indirect dependency") - Composer *Bar = newBar("parse composer indirect dependency") - Dependency *Bar = newBar("parse project dependency") + id int = 0 + Dir *Bar = newBar("scan dir") + Archive *Bar = newBar("unarchive") + Maven *Bar = newBar("parse maven indirect dependency") + Npm *Bar = newBar("parse npm indirect dependency") + Composer *Bar = newBar("parse composer indirect dependency") + Dependency *Bar = newBar("parse project dependency") + PipCompile *Bar = newBar("parse requirements.txt module") + PipSetupPyCompile *Bar = newBar("parse pip setup.py") + PipSetupCfgCompile *Bar = newBar("parse pip setup.cfg") ) // mult pargress bar diff --git a/util/ex/python.go b/util/ex/python.go new file mode 100644 index 0000000..e1687d0 --- /dev/null +++ b/util/ex/python.go @@ -0,0 +1,91 @@ +package ex + +import ( + "bytes" + "os/exec" + "strings" + "util/logs" + + "github.com/axgle/mahonia" +) + +const ( + Python string = "python" + PipinstallPiptoos string = "pipenv install pip-tools --skip-lock" + PipCompilein string = "pipenv run pip-compile requirements.in" + PipCompileCfg string = "pipenv run pip-compile setup.cfg -o temp.txt" + PipcompileSetup string = "pipenv run pip-compile setup.py" + // PipShowCmd string = "pipenv graph" + RemoveVirtualCmd string = "pipenv --rm" +) + +type CmdOpts struct { + Name string + Args []string + Dir string +} + +func Do(c string, dir string) (out string, err error) { + cmd := GetCmdOpts(c, dir).BuildCmd() + out, err = Excute(cmd) + if err != nil { + logs.Error(err) + return + } + return +} + +func CheckPython(py string) (s string, err error) { + s, err = exec.LookPath(py) + if err != nil { + logs.Error(err) + } + return +} + +func GetCmdOpts(c string, dir string) *CmdOpts { + list := strings.Fields(string(c)) + if len(list) == 0 { + return &CmdOpts{} + } + return &CmdOpts{ + Name: list[0], + Args: list[1:], + Dir: dir, + } +} + +func (c *CmdOpts) BuildCmd() (ec *exec.Cmd) { + ec = exec.Command(c.Name, c.Args...) + ec.Dir = c.Dir + return +} + +// 执行 +func Excute(cmd *exec.Cmd) (s string, err error) { + var out bytes.Buffer + var errs bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &errs + err = cmd.Run() + if err != nil { + logs.Error(err) + return + } + if len(out.String()) == 0 { + s = Convert(errs.String(), "gbk", "utf-8") + return + } + s = Convert(out.String(), "gbk", "utf-8") + return +} + +// 编码转换 +func Convert(s string, source string, target string) string { + srcCoder := mahonia.NewDecoder(source) + res := srcCoder.ConvertString(s) + t := mahonia.NewDecoder(target) + _, cdata, _ := t.Translate([]byte(res), true) + result := string(cdata) + return result +} diff --git a/util/filter/file.go b/util/filter/file.go index 55f1b70..4826d52 100644 --- a/util/filter/file.go +++ b/util/filter/file.go @@ -91,9 +91,15 @@ var ( // python var ( - PythonSetup = filterFunc(strings.HasSuffix, "setup.py") - PythonPipfile = filterFunc(strings.HasSuffix, "Pipfile") - PythonPipfileLock = filterFunc(strings.HasSuffix, "Pipfile.lock") + PythonSetup = filterFunc(strings.HasSuffix, "setup.py") + PythonPipfile = filterFunc(strings.HasSuffix, "Pipfile") + PythonPipfileLock = filterFunc(strings.HasSuffix, "Pipfile.lock") + PythonRequirementsTxt = func(filename string) bool { + return filterFunc(strings.HasSuffix, ".txt")(filename) && + filterFunc(strings.Contains, "requirements")(path.Base(filename)) && !filterFunc(strings.Contains, "test")(path.Base(filename)) + } + PythonRequirementsIn = filterFunc(strings.HasSuffix, "requirements.in") + PythonSetupCfg = filterFunc(strings.HasSuffix, "setup.cfg") ) // 用于筛选可能有copyright信息的文件 -- Gitee From 92085406b335011f42aa08787b976c74e4a7c2a6 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Tue, 16 Aug 2022 15:53:37 +0800 Subject: [PATCH 04/13] =?UTF-8?q?=E4=BF=AE=E6=94=B9pipenv=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/python/analyzer.go | 8 +- analyzer/python/pipfile.go | 25 +--- analyzer/python/req.go | 233 +++++++++++++++--------------------- analyzer/python/setup.go | 57 +-------- util/args/args.go | 13 +- util/ex/python.go | 19 +-- util/filter/file.go | 2 +- 7 files changed, 121 insertions(+), 236 deletions(-) diff --git a/analyzer/python/analyzer.go b/analyzer/python/analyzer.go index 8b7c32b..807dacb 100644 --- a/analyzer/python/analyzer.go +++ b/analyzer/python/analyzer.go @@ -24,7 +24,7 @@ func (Analyzer) CheckFile(filename string) bool { filter.PythonPipfile(filename) || filter.PythonPipfileLock(filename) || filter.PythonRequirementsTxt(filename) || - filter.PythonSetupCfg(filename) + filter.PythonRequirementsIn(filename) } // ParseFiles parse dependency from file @@ -34,11 +34,7 @@ func (Analyzer) ParseFiles(files []*model.FileInfo) []*model.DepTree { dep := model.NewDepTree(nil) dep.Path = f.Name if filter.PythonSetup(f.Name) { - if !parseSetup(dep, f) { - parseSetupPy(dep, f) - } - } else if filter.PythonSetupCfg(f.Name) { - parseSetupCfg(dep, f) + parseSetup(dep, f) } else if filter.PythonPipfile(f.Name) { parsePipfile(dep, f) } else if filter.PythonPipfileLock(f.Name) { diff --git a/analyzer/python/pipfile.go b/analyzer/python/pipfile.go index 924779f..223bf0f 100644 --- a/analyzer/python/pipfile.go +++ b/analyzer/python/pipfile.go @@ -4,7 +4,6 @@ import ( "encoding/json" "os" "path" - "strings" "util/logs" "util/model" @@ -14,9 +13,7 @@ import ( // parsePipfile parse Pipfile file func parsePipfile(root *model.DepTree, file *model.FileInfo) { dir := path.Dir(file.Name) - if fileExist(path.Join(dir, "setup.py")) || - fileExist(path.Join(dir, "setup.cfg")) || - fileExist(path.Join(dir, "requirements.txt")) { + if fileExist(path.Join(dir, "requirements.txt")) { return } pip := struct { @@ -29,12 +26,12 @@ func parsePipfile(root *model.DepTree, file *model.FileInfo) { for name, version := range pip.Packages { dep := model.NewDepTree(root) dep.Name = name - dep.Version = model.NewVersion(formatVer(version)) + dep.Version = model.NewVersion(version) } for name, version := range pip.DevPackages { dep := model.NewDepTree(root) dep.Name = name - dep.Version = model.NewVersion(formatVer(version)) + dep.Version = model.NewVersion(version) } } @@ -53,9 +50,7 @@ func fileExist(path string) bool { // parsePipfileLock parse pipfile.lock file func parsePipfileLock(root *model.DepTree, file *model.FileInfo) { dir := path.Dir(file.Name) - if fileExist(path.Join(dir, "setup.py")) || - fileExist(path.Join(dir, "setup.cfg")) || - fileExist(path.Join(dir, "requirements.txt")) { + if fileExist(path.Join(dir, "requirements.txt")) { return } lock := struct { @@ -76,17 +71,7 @@ func parsePipfileLock(root *model.DepTree, file *model.FileInfo) { if v != "" { dep := model.NewDepTree(root) dep.Name = n - dep.Version = model.NewVersion(formatVer(v)) + dep.Version = model.NewVersion(v) } } - return -} - -// 后续使用其他办法确定版本号 -func formatVer(v string) string { - res := strings.ReplaceAll(v, "==", "") - res = strings.ReplaceAll(res, "~=", "") - res = strings.ReplaceAll(res, ">=", "") - res = strings.ReplaceAll(res, "<=", "") - return res } diff --git a/analyzer/python/req.go b/analyzer/python/req.go index 7f5910c..8e6cb92 100644 --- a/analyzer/python/req.go +++ b/analyzer/python/req.go @@ -1,10 +1,11 @@ package python import ( - "bufio" + "fmt" "os" "path" "regexp" + "sort" "strings" "util/bar" "util/ex" @@ -20,7 +21,7 @@ func init() { replacers := []string{"#", "", " ", ""} replacer = strings.NewReplacer(replacers...) // requirments.txt文件内组件可能没有版本号 - reg = regexp.MustCompile(`^[\w-_]+[=<~>]{0,2}[\d\.\w]*$`) + reg = regexp.MustCompile(`^\w[\w-_\.]+[!=<~>]{0,2}[\.\w]*`) } func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { @@ -28,167 +29,131 @@ func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { if _, err := ex.CheckPython(ex.Python); err != nil { return } + strArry := []string{} temp.DoInTempDir(func(tempdir string) { // 安装piptools - _, err := ex.Do(ex.PipinstallPiptoos, tempdir) - if err != nil { + if _, err := ex.Do(ex.PipinstallPiptoos, tempdir); err != nil { + logs.Error(err) return } - // 从txt文件中提取组件信息放入新建的in文件 - if !createInfromTxt(file, tempdir) { - return - } - // 运行pip-compile获取依赖数据 - res, err := ex.Do(ex.PipCompilein, tempdir) - if err != nil { - return - } - // 解析依赖数据 - parseRes(root, res) // 删除虚拟环境 defer ex.Do(ex.RemoveVirtualCmd, tempdir) + // 获取输出数据 + strArry = getOutData(file, tempdir) }) + // 解析输出数据构建依赖树 + parseOutData(root, strArry) } -func createInfromTxt(file *model.FileInfo, dir string) bool { +func parseOutData(root *model.DepTree, strs []string) { + directMap := map[string]*model.DepTree{} + for _, str := range strs { + lines := strings.Split(str, "\n") + for i, line := range lines { + if reg.MatchString(line) { + lines = lines[i:] + break + } + } + nodes := []*model.DepTree{} + var depFrom = make(map[string][]string) + cur := model.NewDepTree(nil) + for _, line := range lines { + if strings.Contains(line, "==") { + nodes = append(nodes, cur) + cur = model.NewDepTree(nil) + line = strings.TrimSuffix(line, "\r") + nv := strings.Split(line, `==`) + if len(nv) == 2 { + cur.Name = strings.TrimSpace(nv[0]) + cur.Version = model.NewVersion(strings.TrimSpace(nv[1])) + } + } else if strings.Contains(line, "#") { + line = replacer.Replace(line) + line = strings.TrimPrefix(line, "via") + line = strings.TrimSuffix(line, "\r") + if line == "" { + continue + } + depFrom[cur.Name] = append(depFrom[cur.Name], line) + } + } + nodes = append(nodes, cur) + if len(nodes) == 0 { + continue + } + nodes = nodes[1:] + depMap := map[string]*model.DepTree{} + for _, n := range nodes { + depMap[n.Name] = n + } + for _, n := range nodes { + if dep, ok := directMap[n.Name]; ok { + n.Dependency = dep.Dependency + n.Children = dep.Children + } + if v, ok := depFrom[n.Name]; ok { + for _, x := range v { + if len(v) == 1 && strings.Contains(v[0], "requirements") { + if dep, ok := depMap[n.Name]; ok { + directMap[dep.Name] = dep + } + continue + } + if parent, ok := depMap[x]; ok { + n.Parent = parent + parent.Children = append(parent.Children, n) + } + } + } + } + } + direct := []*model.DepTree{} + for _, n := range directMap { + direct = append(direct, n) + } + sort.Slice(direct, func(i, j int) bool { + return direct[i].Name < direct[j].Name + }) + for _, d := range direct { + root.Children = append(root.Children, d) + d.Parent = root + } +} + +func getOutData(file *model.FileInfo, dir string) []string { s := string(file.Data) - list := []string{} + strList := []string{} reqpath := path.Join(dir, `requirements.in`) out, err := os.Create(reqpath) if err != nil { logs.Error(err) - return false + return strList } out.Close() for _, v := range strings.Split(s, "\n") { if reg.MatchString(v) { bar.PipCompile.Add(1) - // 为了避免单个组件失败影响整体解析,这里加一段判断组件合法性的逻辑,此步骤会使整体速度相对来说要慢一些。 - // 相当于pip-compile进行了两遍 - if !isValid(reqpath, v) { - continue - } - list = append(list, v) - } - } - f, err := os.OpenFile(reqpath, os.O_CREATE, 0666) - f.Seek(0, 0) - f.Truncate(0) - if err != nil { - logs.Error(err) - return false - } - defer f.Close() - for _, v := range list { - w := bufio.NewWriter(f) - _, err = w.WriteString(v + "\n") - if err != nil { - continue + strList = append(strList, getSingleModStr(reqpath, v)) } - w.Flush() } - return true + return strList } -func isValid(reqpath string, elem string) bool { +func getSingleModStr(reqpath string, elem string) string { f, err := os.OpenFile(reqpath, os.O_CREATE, 0666) if err != nil { - return false + return "" } f.Seek(0, 0) f.Truncate(0) f.WriteString(elem) f.Close() - if _, err := ex.Do(ex.PipCompilein, path.Dir(reqpath)); err != nil { - return false - } - return true -} - -type node struct { - name string - version string - dependFrom map[string][]string -} - -func parseRes(root *model.DepTree, s string) { - if len(s) == 0 { - return - } - lines := strings.Split(s, "\n") - for i, v := range lines { - if strings.Contains(v, "==") { - lines = lines[i:] - break - } - } - nodes := []node{} - dm := map[string]*model.DepTree{} - cur := node{name: "", version: "", dependFrom: make(map[string][]string)} - for _, v := range lines { - if strings.Contains(v, "==") { - nodes = append(nodes, cur) - cur = node{name: "", version: "", dependFrom: make(map[string][]string)} - v = strings.TrimSuffix(v, "\r") - nv := strings.Split(v, `==`) - if len(nv) == 2 { - cur.name = nv[0] - cur.version = nv[1] - } - } else if strings.Contains(v, "#") { - v = replacer.Replace(v) - v = strings.TrimPrefix(v, "via") - v = strings.TrimSuffix(v, "\r") - if v == "" { - continue - } - cur.dependFrom[cur.name] = append(cur.dependFrom[cur.name], v) - } - } - if len(nodes) == 0 { - return - } - nodes = nodes[1:] - for _, v := range nodes { - flag := false - for _, s := range v.dependFrom[v.name] { - if (strings.Contains(s, "requirements.in") || - strings.Contains(s, "(setup.py)") || - strings.Contains(s, "(setup.cfg)")) && len(v.dependFrom[v.name]) == 1 { - n := model.NewDepTree(root) - n.Name = v.name - n.Version = model.NewVersion(v.version) - flag = true - } - } - if flag { - continue - } - n := model.NewDepTree(nil) - n.Name = v.name - n.Version = model.NewVersion(v.version) - dm[v.name] = n - } - q := []*model.DepTree{} - q = append(q, root.Children...) - for len(q) > 0 { - treeNode := q[0] - for _, node := range nodes { - for _, parentName := range node.dependFrom[node.name] { - if parentName == treeNode.Name { - if d, ok := dm[node.name]; ok { - if d.Parent != nil { - nd := model.NewDepTree(nil) - nd.Dependency = d.Dependency - d = nd - } - treeNode.Children = append(treeNode.Children, d) - d.Parent = treeNode - break - } - } - } - } - q = append(q[1:], treeNode.Children...) + if str, err := ex.Do(ex.PipCompilein, path.Dir(reqpath)); err != nil { + logs.Error(err) + logs.Error(fmt.Errorf("get info err:%s", elem)) + return "" + } else { + return str } } diff --git a/analyzer/python/setup.go b/analyzer/python/setup.go index d8a4286..e48dc73 100644 --- a/analyzer/python/setup.go +++ b/analyzer/python/setup.go @@ -7,8 +7,6 @@ import ( "os/exec" "path" "strings" - "util/bar" - "util/ex" "util/logs" "util/model" "util/temp" @@ -28,8 +26,7 @@ type setupDep struct { } // parseSetup 解析 setup.py 文件 -func parseSetup(root *model.DepTree, file *model.FileInfo) (suc bool) { - suc = false +func parseSetup(root *model.DepTree, file *model.FileInfo) { temp.DoInTempDir(func(tempdir string) { ossfile := path.Join(tempdir, "oss.py") setupfile := path.Join(tempdir, "setup.py") @@ -59,7 +56,7 @@ func parseSetup(root *model.DepTree, file *model.FileInfo) (suc bool) { logs.Warn(err) } root.Name = dep.Name - root.Version = model.NewVersion(formatVer(dep.Version)) + root.Version = model.NewVersion(dep.Version) root.Licenses = append(root.Licenses, dep.License) for _, pkg := range [][]string{dep.Packages, dep.InstallRequires, dep.Requires} { for _, p := range pkg { @@ -67,59 +64,11 @@ func parseSetup(root *model.DepTree, file *model.FileInfo) (suc bool) { sub := model.NewDepTree(root) if index > -1 { sub.Name = p[:index] - sub.Version = model.NewVersion(formatVer(p[index:])) + sub.Version = model.NewVersion(p[index:]) } else { sub.Name = p } } } }) - suc = true - return -} - -// 如果第一种方式解析setup.py不成功,则用另外一种方式 -func parseSetupPy(root *model.DepTree, file *model.FileInfo) { - bar.PipSetupPyCompile.Add(1) - // 检查python环境 - if _, err := ex.CheckPython(ex.Python); err != nil { - return - } - dir := path.Dir(file.Name) - _, err := ex.Do(ex.PipinstallPiptoos, dir) - if err != nil { - return - } - res, err := ex.Do(ex.PipcompileSetup, dir) - if err != nil { - return - } - os.Remove(path.Join(dir, "temp.txt")) - parseRes(root, res) - defer ex.Do(ex.RemoveVirtualCmd, dir) -} - -// 同目录下已经解析了setup.py则无需再解析setup.cfg,结果是一样的。 -func parseSetupCfg(root *model.DepTree, file *model.FileInfo) { - dir := path.Dir(file.Name) - if fileExist(path.Join(dir, "setup.py")) { - return - } - bar.PipSetupCfgCompile.Add(1) - // 检查python环境 - if _, err := ex.CheckPython(ex.Python); err != nil { - return - } - dir = path.Dir(file.Name) - _, err := ex.Do(ex.PipinstallPiptoos, dir) - if err != nil { - return - } - res, err := ex.Do(ex.PipCompileCfg, dir) - if err != nil { - return - } - os.Remove(path.Join(dir, "temp.txt")) - parseRes(root, res) - defer ex.Do(ex.RemoveVirtualCmd, dir) } diff --git a/util/args/args.go b/util/args/args.go index 8fe5847..1348a64 100644 --- a/util/args/args.go +++ b/util/args/args.go @@ -31,12 +31,16 @@ var ( // local vuldb VulnDB string `json:"db"` // prvate repository - RepUrl []string `json:"repurl"` - RepName string `json:"name"` - RepPassword string `json:"password"` + Maven []repos `json:"maven"` }{} ) +type repos struct { + Repo string `json:"repo"` + User string `json:"user"` + Password string `json:"password"` +} + func init() { flag.StringVar(&ConfigPath, "config", "", "(可选) 指定配置文件路径,指定后启动程序时将默认使用配置参数,配置参数与命令行输入参数冲突时优先使用输入参数") flag.StringVar(&Config.Path, "path", Config.Path, "(必须) 指定要检测的文件或目录路径,例: -path ./foo 或 -path ./foo.zip") @@ -48,9 +52,6 @@ func init() { flag.StringVar(&Config.VulnDB, "db", Config.VulnDB, "(可选) 指定本地漏洞库文件,希望使用自己漏洞库时可用,漏洞库文件为json格式,具体格式会在开源项目文档中给出;若同时使用云端漏洞库与本地漏洞库,漏洞查询结果取并集,例: -db db.json") flag.BoolVar(&Config.Bar, "progress", Config.Bar, "(可选) 显示进度条") flag.BoolVar(&Config.Dedup, "dedup", Config.Dedup, "(可选) 相同组件去重") - // flag.StringVar(&Config.RepUrl, "repurl", Config.RepUrl, "(可选) 自定义仓库地址,例: -repurl http://192.168.0.10:8081/repository/maven-central/") - // flag.StringVar(&Config.RepName, "name", Config.RepName, "(可选) 自定义仓库登录名") - // flag.StringVar(&Config.RepPassword, "password", Config.RepPassword, "(可选) 自定义仓库登录密码") } func Parse() { diff --git a/util/ex/python.go b/util/ex/python.go index e1687d0..797a9b4 100644 --- a/util/ex/python.go +++ b/util/ex/python.go @@ -1,7 +1,6 @@ package ex import ( - "bytes" "os/exec" "strings" "util/logs" @@ -15,8 +14,7 @@ const ( PipCompilein string = "pipenv run pip-compile requirements.in" PipCompileCfg string = "pipenv run pip-compile setup.cfg -o temp.txt" PipcompileSetup string = "pipenv run pip-compile setup.py" - // PipShowCmd string = "pipenv graph" - RemoveVirtualCmd string = "pipenv --rm" + RemoveVirtualCmd string = "pipenv --rm" ) type CmdOpts struct { @@ -29,7 +27,6 @@ func Do(c string, dir string) (out string, err error) { cmd := GetCmdOpts(c, dir).BuildCmd() out, err = Excute(cmd) if err != nil { - logs.Error(err) return } return @@ -45,7 +42,7 @@ func CheckPython(py string) (s string, err error) { func GetCmdOpts(c string, dir string) *CmdOpts { list := strings.Fields(string(c)) - if len(list) == 0 { + if len(list) <= 1 { return &CmdOpts{} } return &CmdOpts{ @@ -63,20 +60,12 @@ func (c *CmdOpts) BuildCmd() (ec *exec.Cmd) { // 执行 func Excute(cmd *exec.Cmd) (s string, err error) { - var out bytes.Buffer - var errs bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &errs - err = cmd.Run() + stdoutStderr, err := cmd.CombinedOutput() if err != nil { logs.Error(err) return } - if len(out.String()) == 0 { - s = Convert(errs.String(), "gbk", "utf-8") - return - } - s = Convert(out.String(), "gbk", "utf-8") + s = Convert(string(stdoutStderr), "gbk", "utf-8") return } diff --git a/util/filter/file.go b/util/filter/file.go index 4826d52..5160836 100644 --- a/util/filter/file.go +++ b/util/filter/file.go @@ -99,7 +99,7 @@ var ( filterFunc(strings.Contains, "requirements")(path.Base(filename)) && !filterFunc(strings.Contains, "test")(path.Base(filename)) } PythonRequirementsIn = filterFunc(strings.HasSuffix, "requirements.in") - PythonSetupCfg = filterFunc(strings.HasSuffix, "setup.cfg") + // PythonSetupCfg = filterFunc(strings.HasSuffix, "setup.cfg") ) // 用于筛选可能有copyright信息的文件 -- Gitee From 72a3b061163b3ba1732d80c47aeffea816a06977 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Tue, 16 Aug 2022 15:54:12 +0800 Subject: [PATCH 05/13] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=E5=90=8D=E5=B8=A6=E8=B7=AF=E5=BE=84=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/engine/engine.go | 1 + 1 file changed, 1 insertion(+) diff --git a/analyzer/engine/engine.go b/analyzer/engine/engine.go index bd07a32..4f6c7d4 100644 --- a/analyzer/engine/engine.go +++ b/analyzer/engine/engine.go @@ -56,6 +56,7 @@ func (e Engine) ParseFile(filepath string) (depRoot *model.DepTree, taskInfo rep // 目录树 dirRoot := model.NewDirTree() depRoot = model.NewDepTree(nil) + filepath = strings.ReplaceAll(filepath, `\`, `/`) taskInfo = report.TaskInfo{ AppName: strings.TrimSuffix(path.Base(filepath), path.Ext(path.Base(filepath))), StartTime: time.Now().Format("2006-01-02 15:04:05"), -- Gitee From 1242ef074c6fa2d2471830e2b615c2645274c5af Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Tue, 16 Aug 2022 15:54:44 +0800 Subject: [PATCH 06/13] =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=8C=87=E5=AE=9Amaven?= =?UTF-8?q?=E7=A7=81=E6=9C=8D=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/java/ext.go | 71 +++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/analyzer/java/ext.go b/analyzer/java/ext.go index 45d8cd4..9a82e38 100644 --- a/analyzer/java/ext.go +++ b/analyzer/java/ext.go @@ -15,6 +15,7 @@ import ( "os/exec" "regexp" "strings" + "time" "util/args" "util/cache" "util/enum/language" @@ -97,47 +98,32 @@ func buildMvnDepTree(root *model.DepTree, lines []string) { } // downloadPom 下载pom文件 -func downloadPom(dep model.Dependency, repos ...string) (data []byte, err error) { - if repos == nil { - repos = []string{} - } +func downloadPom(dep model.Dependency) (data []byte, err error) { tags := strings.Split(dep.Vendor, ".") tags = append(tags, dep.Name) tags = append(tags, dep.Version.Org) tags = append(tags, fmt.Sprintf("%s-%s.pom", dep.Name, dep.Version.Org)) - if len(args.Config.RepUrl) > 0 { - for _, url := range args.Config.RepUrl { - url = strings.TrimSuffix(url, `/`) + `/` - repos = append(repos, url) - } - } - repos = append(repos, `https://repo.maven.apache.org/maven2/`) - // 遍历仓库地址, 默认maven仓库,可以指定私有仓库 - for i, repo := range repos { - // 是否是最后一个仓库(默认的maven仓库) - last := i == len(repos)-1 - // 拼接完整的pom下载地址 - url := repo + strings.Join(tags, "/") - if !last { - name := args.Config.RepName - password := args.Config.RepPassword - if name == "" || password == "" { - logs.Error(fmt.Errorf("missing username or password")) - continue - } - data, err = getFromRepo(url, name, password) - if data != nil { - return - } + // 先扫描指定仓库 + for _, m := range args.Config.Maven { + url := strings.TrimSuffix(m.Repo, `/`) + `/` + url = url + strings.Join(tags, "/") + name := m.User + password := m.Password + data, err = getFromRepo(url, name, password) + if data == nil { continue } - if rep, err := http.Get(url); err != nil { - return nil, err - } else { - defer rep.Body.Close() - if rep.StatusCode == 200 { - return ioutil.ReadAll(rep.Body) - } + return + } + // 指定仓库都没有就去官方仓库查询 + d := `https://repo.maven.apache.org/maven2/` + url := d + strings.Join(tags, "/") + if rep, err := http.Get(url); err != nil { + return nil, err + } else { + defer rep.Body.Close() + if rep.StatusCode == 200 { + return ioutil.ReadAll(rep.Body) } } // 应该走不到这里 @@ -146,17 +132,16 @@ func downloadPom(dep model.Dependency, repos ...string) (data []byte, err error) // 从私服库获取pom文件 func getFromRepo(url string, name string, password string) (data []byte, err error) { - req, err := http.NewRequest("GET", url, nil) + c := http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, Timeout: time.Duration(1 * time.Second)} + resp, err := c.Get(url) if err != nil { return nil, err } else { - req.SetBasicAuth(name, password) - if rep, err := (&http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}}).Do(req); err == nil { - defer rep.Body.Close() - logs.Debug(fmt.Sprintf("status code: %d url: %s", rep.StatusCode, url)) - if rep.StatusCode == 200 { - return ioutil.ReadAll(rep.Body) - } + resp.Request.SetBasicAuth(name, password) + defer resp.Body.Close() + logs.Debug(fmt.Sprintf("status code: %d url: %s", resp.StatusCode, url)) + if resp.StatusCode == 200 { + return ioutil.ReadAll(resp.Body) } } return nil, fmt.Errorf("download from repository failure") -- Gitee From eac4189f230cf8f6b32b4ba84099c912514822b7 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Tue, 16 Aug 2022 15:55:11 +0800 Subject: [PATCH 07/13] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E6=BC=8F=E6=B4=9E?= =?UTF-8?q?=E8=AF=AF=E6=8A=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- util/model/version.go | 254 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 205 insertions(+), 49 deletions(-) diff --git a/util/model/version.go b/util/model/version.go index 01df4e2..52bfebc 100644 --- a/util/model/version.go +++ b/util/model/version.go @@ -6,6 +6,7 @@ package model import ( + "regexp" "strconv" "strings" ) @@ -16,24 +17,205 @@ type Version struct { Nums []int `json:"nums,omitempty"` Suffix string `json:"suffix,omitempty"` } +type token struct { + // 连接符 + // true 代表 -, false 代表 . + link bool + // 值 整数 + num int + // 值 字符串 + str string + // 标记是否为值 + isnum bool +} + +var ( + // 后缀权重 + suffixs = map[string]int{"alpha": 1, "beta": 2, "milestone": 3, "rc": 4, "cr": 4, "snapshot": 5, "release": 6, "final": 6, "ga": 6, "sp": 7} + // 数字or字母匹配 + numStrReg = regexp.MustCompile(`((\d+)|([a-zA-Z]+))`) +) -// weight 获取当前版本的后缀权重 -func (ver *Version) weight() (weight int) { - if len(ver.Suffix) > 0 { - // 后缀权重 - suffixs := map[string]int{"alpha": 1, "beta": 2, "milestone": 3, "rc": 4, "cr": 4, "snapshot": 5, "release": 6, "final": 6, "ga": 6, "sp": 7} - if w, ok := suffixs[ver.Suffix]; ok { - // 后缀在后缀列表中取对应后缀权重 - weight = w +func (t token) compare(t2 token) int { + // 比较数字 + if t.isnum && !t2.isnum { + return 1 + } else if !t.isnum && t2.isnum { + return -1 + } else if t.isnum && t2.isnum { + if t.num == t2.num { + if !t.link && t2.link { + return 1 + } else if t.link && !t2.link { + return -1 + } else { + return 0 + } } else { - // 后缀不在后缀列表中 - weight = 8 + return t.num - t2.num + } + } + // 比较字符串 + if t.str != t2.str { + w, ok := suffixs[strings.ToLower(t.str)] + w2, ok2 := suffixs[strings.ToLower(t2.str)] + if ok && ok2 { + return w - w2 + } else if ok && !ok2 { + return -1 + } else if !ok && ok2 { + return 1 + } + if t.str > t2.str { + return 1 + } else { + return -1 + } + } + // 比较分隔符 + if t.link != t2.link { + if t.num != 0 { + // 数字.分隔符优先级高 + if !t.link { + return 1 + } else { + return -1 + } + } + if t.str != "" { + // 字符串-分隔符优先级高 + if t.link { + return 1 + } else { + return -1 + } } + } + return 0 +} + +// compareToken 比较两组token +// return a - b +func compareToken(a, b []token) int { + var min int + if len(a) > len(b) { + if a[len(b)].str != "" { + b = append(b, token{link: true, str: "ga"}) + } + min = len(b) + } else if len(a) < len(b) { + if b[len(a)].str != "" { + a = append(a, token{link: true, str: "ga"}) + } + min = len(a) } else { - // 不存在后缀 - weight = 6 + min = len(a) + } + // 依次比较token + for i := 0; i < min; i++ { + r := a[i].compare(b[i]) + if r != 0 { + return r + } + } + // 返回长的那个 + return len(a) - len(b) +} + +// parseToken 从版本号字符串中解析token +func parseToken(ver string) (tokens []token) { + ver = strings.ToLower(strings.TrimLeft(ver, "vV")) + tokens = []token{} + t := token{isnum: true} + for len(ver) > 0 { + // 按-和.分割 + index := strings.IndexAny(ver, `.-`) + for index == 0 { + next := strings.IndexAny(ver[1:], `.-`) + if next == -1 { + index = len(ver) + } else { + // 从ver[1:]开始搜索,所以需要下标+1 + index = next + 1 + } + } + if index == -1 { + index = len(ver) + } + word := ver[:index] + ver = ver[index:] + // 检测到分隔符重新创建新token + if word[0] == '.' || word[0] == '-' { + tokens = append(tokens, t) + t = token{link: word[0] == '-', isnum: word[0] == '.'} + word = word[1:] + } + // 尝试解析数字 + if n, err := strconv.Atoi(word); err == nil { + t.num = n + t.isnum = true + } else if !strings.ContainsAny(word, `1234567890`) { + // 不含数字则保存限定符 + t.str = word + } else { + // 标记下一个token是否是额外创建的'-'分隔符 + link := false + // 解析数字与字符串 + matchs := numStrReg.FindAllString(word, -1) + for i, match := range matchs { + if n, err := strconv.Atoi(match); err == nil { + t.num = n + t.isnum = true + } else { + // 为单个字母并后面存在数字 + if len(match) == 1 && i+1 < len(matchs) { + if match == "a" { + match = "alpha" + } else if match == "b" { + match = "beta" + } else if match == "m" { + match = "milestone" + } + } + t.str = match + } + tokens = append(tokens, t) + t = token{link: true} + link = true + } + if link { + t.link = false + } + } + } + tokens = append(tokens, t) + // 处理限定符 + for i := range tokens { + if tokens[i].str != "" { + s := tokens[i].str + if s == "final" || s == "ga" { + s = "" + } + tokens[i].str = s + tokens[i].isnum = false + } + } + isZero := true + for i := len(tokens) - 1; i >= 0; i-- { + t := tokens[i] + if t.num == 0 { + if t.str == "" { + if isZero || !t.isnum { + tokens = append(tokens[:i], tokens[i+1:]...) + } + } else if t.str != "" { + isZero = true + } + } else { + isZero = false + } } - return weight + return } // NewVersion 解析版本字符串 @@ -71,33 +253,11 @@ func NewVersion(verStr string) *Version { // Less 判断是否严格小于另一个版本号 func (ver *Version) Less(other *Version) bool { - length := len(ver.Nums) - if length > len(other.Nums) { - length = len(other.Nums) - } - // 比较数字大小 - for i := 0; i < length; i++ { - if ver.Nums[i] < other.Nums[i] { - return true - } else if ver.Nums[i] > other.Nums[i] { - return false - } - } - // 数字多时查看是否有非零值 - if len(ver.Nums) < len(other.Nums) { - for i := len(other.Nums) - 1; i >= len(ver.Nums); i-- { - if other.Nums[i] != 0 { - return true - } - } - } - // 比较后缀 - vw, ow := ver.weight(), other.weight() - if vw == ow { - return ver.Suffix < other.Suffix - } else { - return vw < ow - } + va := strings.TrimLeft(ver.Org, "vV^<>=~!, ") + vb := strings.TrimLeft(other.Org, "vV^<>=~!, ") + ta := parseToken(va) + tb := parseToken(vb) + return compareToken(ta, tb) < 0 } // Equal 判断是否等于另一个版本号 @@ -105,15 +265,11 @@ func (ver *Version) Equal(other *Version) bool { if len(ver.Nums) != len(other.Nums) { return false } - // 比较数字大小 - for i, n := range ver.Nums { - if other.Nums[i] != n { - return false - } - } - // 比较后缀 - vw, ow := ver.weight(), other.weight() - return vw == ow + va := strings.TrimLeft(ver.Org, "vV^<>=~!, ") + vb := strings.TrimLeft(other.Org, "vV^<>=~!, ") + ta := parseToken(va) + tb := parseToken(vb) + return compareToken(ta, tb) == 0 } // InRangeInterval 判断一个版本是否在一个版本区间内 -- Gitee From 741d9e305594a49fa79fba3515019fccaca6431e Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Thu, 18 Aug 2022 11:32:07 +0800 Subject: [PATCH 08/13] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=BF=9B=E5=BA=A6?= =?UTF-8?q?=E6=9D=A1=E6=8F=90=E7=A4=BA=E5=AD=97=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/python/pipfile.go | 27 +++------------------------ analyzer/python/req.go | 4 ++-- util/bar/bar.go | 18 ++++++++---------- 3 files changed, 13 insertions(+), 36 deletions(-) diff --git a/analyzer/python/pipfile.go b/analyzer/python/pipfile.go index 223bf0f..f73cad7 100644 --- a/analyzer/python/pipfile.go +++ b/analyzer/python/pipfile.go @@ -2,8 +2,7 @@ package python import ( "encoding/json" - "os" - "path" + "strings" "util/logs" "util/model" @@ -12,10 +11,6 @@ import ( // parsePipfile parse Pipfile file func parsePipfile(root *model.DepTree, file *model.FileInfo) { - dir := path.Dir(file.Name) - if fileExist(path.Join(dir, "requirements.txt")) { - return - } pip := struct { DevPackages map[string]string `toml:"dev-packages"` Packages map[string]string `toml:"packages"` @@ -26,7 +21,7 @@ func parsePipfile(root *model.DepTree, file *model.FileInfo) { for name, version := range pip.Packages { dep := model.NewDepTree(root) dep.Name = name - dep.Version = model.NewVersion(version) + dep.Version = model.NewVersion(strings.TrimLeft(version, "vV^=<~>")) } for name, version := range pip.DevPackages { dep := model.NewDepTree(root) @@ -35,24 +30,8 @@ func parsePipfile(root *model.DepTree, file *model.FileInfo) { } } -// Determine whether the file exists -func fileExist(path string) bool { - _, err := os.Stat(path) - if err == nil { - return true - } - if os.IsNotExist(err) { - return false - } - return false -} - // parsePipfileLock parse pipfile.lock file func parsePipfileLock(root *model.DepTree, file *model.FileInfo) { - dir := path.Dir(file.Name) - if fileExist(path.Join(dir, "requirements.txt")) { - return - } lock := struct { Default map[string]struct { Version string `json:"version"` @@ -71,7 +50,7 @@ func parsePipfileLock(root *model.DepTree, file *model.FileInfo) { if v != "" { dep := model.NewDepTree(root) dep.Name = n - dep.Version = model.NewVersion(v) + dep.Version = model.NewVersion(strings.TrimLeft(v, "vV^=<~>")) } } } diff --git a/analyzer/python/req.go b/analyzer/python/req.go index 8e6cb92..4aa8aa7 100644 --- a/analyzer/python/req.go +++ b/analyzer/python/req.go @@ -21,7 +21,7 @@ func init() { replacers := []string{"#", "", " ", ""} replacer = strings.NewReplacer(replacers...) // requirments.txt文件内组件可能没有版本号 - reg = regexp.MustCompile(`^\w[\w-_\.]+[!=<~>]{0,2}[\.\w]*`) + reg = regexp.MustCompile(`^\w`) } func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { @@ -141,7 +141,7 @@ func getOutData(file *model.FileInfo, dir string) []string { } func getSingleModStr(reqpath string, elem string) string { - f, err := os.OpenFile(reqpath, os.O_CREATE, 0666) + f, err := os.OpenFile(reqpath, os.O_CREATE, 0744) if err != nil { return "" } diff --git a/util/bar/bar.go b/util/bar/bar.go index ff5b539..13df825 100644 --- a/util/bar/bar.go +++ b/util/bar/bar.go @@ -11,16 +11,14 @@ import ( ) var ( - id int = 0 - Dir *Bar = newBar("scan dir") - Archive *Bar = newBar("unarchive") - Maven *Bar = newBar("parse maven indirect dependency") - Npm *Bar = newBar("parse npm indirect dependency") - Composer *Bar = newBar("parse composer indirect dependency") - Dependency *Bar = newBar("parse project dependency") - PipCompile *Bar = newBar("parse requirements.txt module") - PipSetupPyCompile *Bar = newBar("parse pip setup.py") - PipSetupCfgCompile *Bar = newBar("parse pip setup.cfg") + id int = 0 + Dir *Bar = newBar("scan dir") + Archive *Bar = newBar("unarchive") + Maven *Bar = newBar("parse maven indirect dependency") + Npm *Bar = newBar("parse npm indirect dependency") + Composer *Bar = newBar("parse composer indirect dependency") + Dependency *Bar = newBar("parse project dependency") + PipCompile *Bar = newBar("parse python module") ) // mult pargress bar -- Gitee From a95094dc50f52a9095f37815a96c05c8cb377866 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Thu, 18 Aug 2022 11:39:53 +0800 Subject: [PATCH 09/13] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 727538b..20710bd 100644 --- a/README.md +++ b/README.md @@ -16,17 +16,17 @@ ## 检测能力 `OpenSCA`现已支持以下编程语言相关的配置文件解析及对应的包管理器,后续会逐步支持更多的编程语言,丰富相关配置文件的解析。 -| 支持语言 | 包管理器 | 解析文件 | -| ------------ | ---------- | ---------------------------------------------- | -| `Java` | `Maven` | `pom.xml` | -| `Java` | `Gradle` | `.gradle` `.gradle.kts` | -| `JavaScript` | `Npm` | `package-lock.json` `package.json` `yarn.lock` | -| `PHP` | `Composer` | `composer.json` `composer.lock` | -| `Ruby` | `gem` | `gemfile.lock` | -| `Golang` | `gomod` | `go.mod` `go.sum` | -| `Rust` | `cargo` | `Cargo.lock` | -| `Erlang` | `Rebar` | `rebar.lock` | -| `Python` | `Pip` | `Pipfile` `Pipfile.lock` `setup.py` | +| 支持语言 | 包管理器 | 解析文件 | +| ------------ | ---------- | ------------------------------------------------------------ | +| `Java` | `Maven` | `pom.xml` | +| `Java` | `Gradle` | `.gradle` `.gradle.kts` | +| `JavaScript` | `Npm` | `package-lock.json` `package.json` `yarn.lock` | +| `PHP` | `Composer` | `composer.json` `composer.lock` | +| `Ruby` | `gem` | `gemfile.lock` | +| `Golang` | `gomod` | `go.mod` `go.sum` | +| `Rust` | `cargo` | `Cargo.lock` | +| `Erlang` | `Rebar` | `rebar.lock` | +| `Python` | `Pip` | `Pipfile` `Pipfile.lock` `setup.py` `requirements.txt` `requirements.in` (后两者的解析需要具备pipenv环境,需要联网。) | ## 下载安装 @@ -82,7 +82,7 @@ opensca-cli -db db.json -path ${project_path} | `token` | `string` | 云服务验证 `token`,需要在云服务平台申请,与 `url` 参数一起使用 | `-token xxxxxxx` | | `cache` | `bool` | 建议开启,缓存下载的文件(例如 `.pom` 文件),重复检测相同组件时会节省时间,下载的文件会保存到工具所在目录的.cache 目录下 | `-cache` | | `vuln` | `bool` | 结果仅保留有漏洞信息的组件,使用该参数将不会保留组件层级结构 | `-vuln` | -| `out` | `string` | 将检测结果保存到指定文件,根据后缀生成不同格式的文件,默认为 `json` 格式;支持以`spdx`格式展示`sbom`清单只需更换相应输出文件后缀即可 | `-out output.json` | +| `out` | `string` | 将检测结果保存到指定文件,根据后缀生成不同格式的文件,默认为 `json` 格式;支持以`spdx`格式展示`sbom`清单,只需更换相应输出文件后缀即可 | `-out output.json` | | `db` | `string` | 指定本地漏洞库文件,希望使用自己漏洞库时可用,漏洞库文件为 `json` 格式,具体格式会在之后给出;若同时使用云端漏洞库与本地漏洞库,漏洞查询结果取并集 | `-db db.json` | | `progress` | `bool` | 显示进度条 | `-progress` | | `dedup` | `bool` | 相同组件去重 | `-dedup` | -- Gitee From 73d3cb0d9edcaadd5e2aedfeaf59baaea8f1922e Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Fri, 19 Aug 2022 16:11:24 +0800 Subject: [PATCH 10/13] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/python/req.go | 87 +++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/analyzer/python/req.go b/analyzer/python/req.go index 4aa8aa7..de989ca 100644 --- a/analyzer/python/req.go +++ b/analyzer/python/req.go @@ -14,13 +14,9 @@ import ( "util/temp" ) -var replacer *strings.Replacer var reg *regexp.Regexp func init() { - replacers := []string{"#", "", " ", ""} - replacer = strings.NewReplacer(replacers...) - // requirments.txt文件内组件可能没有版本号 reg = regexp.MustCompile(`^\w`) } @@ -45,7 +41,9 @@ func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { parseOutData(root, strArry) } +// 解析各组件所打印的信息 func parseOutData(root *model.DepTree, strs []string) { + // 直接依赖 directMap := map[string]*model.DepTree{} for _, str := range strs { lines := strings.Split(str, "\n") @@ -55,54 +53,51 @@ func parseOutData(root *model.DepTree, strs []string) { break } } - nodes := []*model.DepTree{} - var depFrom = make(map[string][]string) + // parentsMap一个组件名对应其所有父组件名 + var parentsMap = make(map[string][]string) cur := model.NewDepTree(nil) + nodes := []string{} + depMap := map[string]*model.DepTree{} for _, line := range lines { if strings.Contains(line, "==") { - nodes = append(nodes, cur) + // 在输出内容"=="符号左右对应名字与版本号 cur = model.NewDepTree(nil) line = strings.TrimSuffix(line, "\r") nv := strings.Split(line, `==`) if len(nv) == 2 { cur.Name = strings.TrimSpace(nv[0]) cur.Version = model.NewVersion(strings.TrimSpace(nv[1])) + depMap[cur.Name] = cur + nodes = append(nodes, cur.Name) } } else if strings.Contains(line, "#") { - line = replacer.Replace(line) - line = strings.TrimPrefix(line, "via") + // "#"符号后有父组件名字信息 line = strings.TrimSuffix(line, "\r") + line = strings.TrimLeft(line, " #via") if line == "" { continue } - depFrom[cur.Name] = append(depFrom[cur.Name], line) + parentsMap[cur.Name] = append(parentsMap[cur.Name], line) } } - nodes = append(nodes, cur) - if len(nodes) == 0 { - continue - } - nodes = nodes[1:] - depMap := map[string]*model.DepTree{} - for _, n := range nodes { - depMap[n.Name] = n - } - for _, n := range nodes { - if dep, ok := directMap[n.Name]; ok { - n.Dependency = dep.Dependency - n.Children = dep.Children - } - if v, ok := depFrom[n.Name]; ok { - for _, x := range v { - if len(v) == 1 && strings.Contains(v[0], "requirements") { - if dep, ok := depMap[n.Name]; ok { - directMap[dep.Name] = dep - } - continue + depMap[cur.Name] = cur + nodes = append(nodes, cur.Name) + for _, name := range nodes { + parNames := parentsMap[name] + for _, parName := range parNames { + if len(parNames) == 1 && strings.Contains(parNames[0], "requirements") { + if dep, ok := depMap[name]; ok { + directMap[dep.Name] = dep } - if parent, ok := depMap[x]; ok { - n.Parent = parent - parent.Children = append(parent.Children, n) + } + if parent, ok := depMap[parName]; ok { + if dep, ok := depMap[name]; ok { + parent.Children = append(parent.Children, dep) + dep.Parent = parent + if d, ok := directMap[name]; ok { + d.Move(depMap[name]) + } + delete(depMap, name) } } } @@ -119,8 +114,31 @@ func parseOutData(root *model.DepTree, strs []string) { root.Children = append(root.Children, d) d.Parent = root } + removeRepetition(root) +} + +// 去除每个节点children列表中重复的节点 +func removeRepetition(root *model.DepTree) { + q := []*model.DepTree{root} + for len(q) > 0 { + n := q[0] + Rep := map[string]struct{}{} + l := len(n.Children) + for i, c := range n.Children { + if _, ok := Rep[c.Name]; ok { + if i == l-1 { + c.Parent.Children = c.Parent.Children[:i] + } else { + c.Parent.Children = append(c.Parent.Children[:i], c.Parent.Children[i+1:]...) + } + } + Rep[c.Name] = struct{}{} + } + q = append(q[1:], n.Children...) + } } +// 获取打印数据 func getOutData(file *model.FileInfo, dir string) []string { s := string(file.Data) strList := []string{} @@ -140,6 +158,7 @@ func getOutData(file *model.FileInfo, dir string) []string { return strList } +// 将组件名与版本号写入requirements.in文件单独调用pip-compile,获取打印数据 func getSingleModStr(reqpath string, elem string) string { f, err := os.OpenFile(reqpath, os.O_CREATE, 0744) if err != nil { -- Gitee From bafe1cd0c1113aa1d5e524d10d8c72cba68b27cc Mon Sep 17 00:00:00 2001 From: huyongfeng <578209081@qq.com> Date: Fri, 19 Aug 2022 11:15:39 +0000 Subject: [PATCH 11/13] update analyzer/python/req.go. Signed-off-by: huyongfeng <578209081@qq.com> --- analyzer/python/req.go | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/analyzer/python/req.go b/analyzer/python/req.go index de989ca..17574c6 100644 --- a/analyzer/python/req.go +++ b/analyzer/python/req.go @@ -94,9 +94,6 @@ func parseOutData(root *model.DepTree, strs []string) { if dep, ok := depMap[name]; ok { parent.Children = append(parent.Children, dep) dep.Parent = parent - if d, ok := directMap[name]; ok { - d.Move(depMap[name]) - } delete(depMap, name) } } @@ -114,28 +111,6 @@ func parseOutData(root *model.DepTree, strs []string) { root.Children = append(root.Children, d) d.Parent = root } - removeRepetition(root) -} - -// 去除每个节点children列表中重复的节点 -func removeRepetition(root *model.DepTree) { - q := []*model.DepTree{root} - for len(q) > 0 { - n := q[0] - Rep := map[string]struct{}{} - l := len(n.Children) - for i, c := range n.Children { - if _, ok := Rep[c.Name]; ok { - if i == l-1 { - c.Parent.Children = c.Parent.Children[:i] - } else { - c.Parent.Children = append(c.Parent.Children[:i], c.Parent.Children[i+1:]...) - } - } - Rep[c.Name] = struct{}{} - } - q = append(q[1:], n.Children...) - } } // 获取打印数据 @@ -175,4 +150,4 @@ func getSingleModStr(reqpath string, elem string) string { } else { return str } -} +} \ No newline at end of file -- Gitee From 1b869f2518365cb8923fc07187497d2c51dcd33c Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Fri, 19 Aug 2022 19:22:39 +0800 Subject: [PATCH 12/13] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/python/req.go | 62 +++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/analyzer/python/req.go b/analyzer/python/req.go index 4aa8aa7..bbf21c7 100644 --- a/analyzer/python/req.go +++ b/analyzer/python/req.go @@ -14,13 +14,9 @@ import ( "util/temp" ) -var replacer *strings.Replacer var reg *regexp.Regexp func init() { - replacers := []string{"#", "", " ", ""} - replacer = strings.NewReplacer(replacers...) - // requirments.txt文件内组件可能没有版本号 reg = regexp.MustCompile(`^\w`) } @@ -45,7 +41,9 @@ func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { parseOutData(root, strArry) } +// 解析各组件所打印的信息 func parseOutData(root *model.DepTree, strs []string) { + // 直接依赖 directMap := map[string]*model.DepTree{} for _, str := range strs { lines := strings.Split(str, "\n") @@ -55,54 +53,48 @@ func parseOutData(root *model.DepTree, strs []string) { break } } - nodes := []*model.DepTree{} - var depFrom = make(map[string][]string) + // parentsMap一个组件名对应其所有父组件名 + var parentsMap = make(map[string][]string) cur := model.NewDepTree(nil) + nodes := []string{} + depMap := map[string]*model.DepTree{} for _, line := range lines { if strings.Contains(line, "==") { - nodes = append(nodes, cur) + // 在输出内容"=="符号左右对应名字与版本号 cur = model.NewDepTree(nil) line = strings.TrimSuffix(line, "\r") nv := strings.Split(line, `==`) if len(nv) == 2 { cur.Name = strings.TrimSpace(nv[0]) cur.Version = model.NewVersion(strings.TrimSpace(nv[1])) + depMap[cur.Name] = cur + nodes = append(nodes, cur.Name) } } else if strings.Contains(line, "#") { - line = replacer.Replace(line) - line = strings.TrimPrefix(line, "via") + // "#"符号后有父组件名字信息 line = strings.TrimSuffix(line, "\r") + line = strings.TrimLeft(line, " #via") if line == "" { continue } - depFrom[cur.Name] = append(depFrom[cur.Name], line) + parentsMap[cur.Name] = append(parentsMap[cur.Name], line) } } - nodes = append(nodes, cur) - if len(nodes) == 0 { - continue - } - nodes = nodes[1:] - depMap := map[string]*model.DepTree{} - for _, n := range nodes { - depMap[n.Name] = n - } - for _, n := range nodes { - if dep, ok := directMap[n.Name]; ok { - n.Dependency = dep.Dependency - n.Children = dep.Children - } - if v, ok := depFrom[n.Name]; ok { - for _, x := range v { - if len(v) == 1 && strings.Contains(v[0], "requirements") { - if dep, ok := depMap[n.Name]; ok { - directMap[dep.Name] = dep - } - continue + depMap[cur.Name] = cur + nodes = append(nodes, cur.Name) + for _, name := range nodes { + parNames := parentsMap[name] + for _, parName := range parNames { + if len(parNames) == 1 && strings.Contains(parNames[0], "requirements") { + if dep, ok := depMap[name]; ok { + directMap[dep.Name] = dep } - if parent, ok := depMap[x]; ok { - n.Parent = parent - parent.Children = append(parent.Children, n) + } + if parent, ok := depMap[parName]; ok { + if dep, ok := depMap[name]; ok { + parent.Children = append(parent.Children, dep) + dep.Parent = parent + delete(depMap, name) } } } @@ -121,6 +113,7 @@ func parseOutData(root *model.DepTree, strs []string) { } } +// 获取打印数据 func getOutData(file *model.FileInfo, dir string) []string { s := string(file.Data) strList := []string{} @@ -140,6 +133,7 @@ func getOutData(file *model.FileInfo, dir string) []string { return strList } +// 将组件名与版本号写入requirements.in文件单独调用pip-compile,获取打印数据 func getSingleModStr(reqpath string, elem string) string { f, err := os.OpenFile(reqpath, os.O_CREATE, 0744) if err != nil { -- Gitee From 6de8888eb804be5ea746d0991b048e18bb93d258 Mon Sep 17 00:00:00 2001 From: huyongfeng Date: Sat, 20 Aug 2022 18:02:01 +0800 Subject: [PATCH 13/13] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E7=BB=93=E6=9E=84=EF=BC=8C=E6=B7=BB=E5=8A=A0git=E8=BF=9E?= =?UTF-8?q?=E6=8E=A5=E5=88=A4=E6=96=AD=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzer/python/req.go | 54 ++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/analyzer/python/req.go b/analyzer/python/req.go index 17574c6..87b7f23 100644 --- a/analyzer/python/req.go +++ b/analyzer/python/req.go @@ -14,10 +14,14 @@ import ( "util/temp" ) -var reg *regexp.Regexp +var reg1 *regexp.Regexp +var regGit *regexp.Regexp +var replacer *strings.Replacer func init() { - reg = regexp.MustCompile(`^\w`) + reg1 = regexp.MustCompile(`^\w`) + regGit = regexp.MustCompile(`\/([\w-]+)\.git`) + replacer = strings.NewReplacer("# via","","\r",""," ","","#","") } func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { @@ -45,10 +49,11 @@ func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { func parseOutData(root *model.DepTree, strs []string) { // 直接依赖 directMap := map[string]*model.DepTree{} + childMap := map[*model.DepTree]map[string]struct{}{} for _, str := range strs { lines := strings.Split(str, "\n") for i, line := range lines { - if reg.MatchString(line) { + if reg1.MatchString(line) { lines = lines[i:] break } @@ -68,12 +73,13 @@ func parseOutData(root *model.DepTree, strs []string) { cur.Name = strings.TrimSpace(nv[0]) cur.Version = model.NewVersion(strings.TrimSpace(nv[1])) depMap[cur.Name] = cur + m := make(map[string]struct{}) + childMap[cur] = m nodes = append(nodes, cur.Name) } } else if strings.Contains(line, "#") { // "#"符号后有父组件名字信息 - line = strings.TrimSuffix(line, "\r") - line = strings.TrimLeft(line, " #via") + line = replacer.Replace(line) if line == "" { continue } @@ -83,23 +89,37 @@ func parseOutData(root *model.DepTree, strs []string) { depMap[cur.Name] = cur nodes = append(nodes, cur.Name) for _, name := range nodes { + if _,ok := depMap[name]; !ok { + continue + } parNames := parentsMap[name] for _, parName := range parNames { - if len(parNames) == 1 && strings.Contains(parNames[0], "requirements") { + if len(parNames) == 1 && strings.Contains(parName, "requirements") { if dep, ok := depMap[name]; ok { directMap[dep.Name] = dep } } - if parent, ok := depMap[parName]; ok { - if dep, ok := depMap[name]; ok { - parent.Children = append(parent.Children, dep) - dep.Parent = parent - delete(depMap, name) + if _,ok := depMap[parName]; !ok { + continue + } + parent := depMap[parName] + dep := depMap[name] + if m,ok := childMap[dep]; ok { + if _,ok := m[dep.Name];ok { + continue } + m[dep.Name] = struct{}{} } + parent.Children = append(parent.Children, dep) + dep.Parent = parent } } } + withRoot(root,directMap) +} + +// 所有直接依赖连接至root +func withRoot(root *model.DepTree,directMap map[string]*model.DepTree) { direct := []*model.DepTree{} for _, n := range directMap { direct = append(direct, n) @@ -125,7 +145,17 @@ func getOutData(file *model.FileInfo, dir string) []string { } out.Close() for _, v := range strings.Split(s, "\n") { - if reg.MatchString(v) { + // 少部分情况会有git连接 + if regGit.MatchString(v) { + res := regGit.FindStringSubmatch(v) + if len(res) == 2 { + bar.PipCompile.Add(1) + strList = append(strList, getSingleModStr(reqpath, res[1])) + continue + } + } + // 一般情况下字母开头的行内容都是组件名 + if reg1.MatchString(v) { bar.PipCompile.Add(1) strList = append(strList, getSingleModStr(reqpath, v)) } -- Gitee