10 Star 49 Fork 6

Lione / 妹图狩猎者

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
single_task.go 3.06 KB
一键复制 编辑 原始数据 按行查看 历史
Lione 提交于 2018-12-21 10:57 . 去除GOMAXPROCS参数
package main
import (
"fmt"
"io/ioutil"
"net/http"
"path/filepath"
"time"
"gitee.com/Lione/sister_hunter/core"
)
type SingleTasks struct {
Tasks []SingleTask
}
type SingleTask struct {
Log bool
Save bool
SetHeader bool
RequestTimeout int
Dir string
Sources []string
Prey core.Prey
Pagination core.Pagination
}
func RunSingleTask(jsonPath string) {
// 读取JSON文件任务
var singleTasks SingleTasks
err := core.DecodeJsonFile(jsonPath, &singleTasks)
if err != nil {
fmt.Println(err)
}
for _, singleTask := range singleTasks.Tasks {
// 创建目录
err = core.MkDirAll("./save/" + singleTask.Dir)
if err != nil {
fmt.Println(err)
}
// 分页分析
var pages []string
for _, source := range singleTask.Sources {
pagesTemp, err := core.PaginationUrlAnalysis(source, singleTask.Pagination)
if err != nil {
fmt.Println(err)
}
pages = append(pages, pagesTemp...)
}
fmt.Println("页面分析完毕")
// 分页结果日志记录
logPath := "./logs/" + singleTask.Dir + ".log"
if singleTask.Log {
err := core.WriteLogs(logPath, pages, true)
if err != nil {
fmt.Println(err)
}
}
// 猎物分析
var preys []string
chs := make([]chan []string, len(pages))
runPage := func(task_id int, page string, ch chan []string) {
results, err := core.UrlAnalysis(page, singleTask.Prey)
if err != nil {
fmt.Println(err)
}
if singleTask.Log {
core.WriteLogs(logPath, results, true)
}
preys = append(preys, results...)
chs[task_id] <- results
}
for task_id, page := range pages {
chs[task_id] = make(chan []string, 1)
go runPage(task_id, page, chs[task_id])
}
// 等待分析
for _, ch := range chs {
<-ch
}
fmt.Println("猎物分析完毕")
// 猎物捕捉
if singleTask.Save {
client := http.Client{
Timeout: time.Second * time.Duration(singleTask.RequestTimeout),
}
filePath := "./save/" + singleTask.Dir
phs := make([]chan int, len(preys))
savePrey := func(pid int, prey string, ph chan int) {
var resp *http.Response
if singleTask.SetHeader {
req, err := http.NewRequest("GET", prey, nil)
if err != nil {
fmt.Println(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
req.Header.Set("Referer", prey)
resp, err = client.Do(req)
} else {
resp, err = client.Get(prey)
}
if err != nil {
fmt.Println(err)
} else {
data, _ := ioutil.ReadAll(resp.Body)
resp.Body.Close()
path := fmt.Sprintf("%s/pid_%d_%s", filePath, pid, filepath.Base(prey))
err = core.SaveFile(path, data)
if err != nil {
fmt.Println(err)
} else {
fmt.Println("已捕捉: " + prey)
}
}
phs[pid] <- pid
}
for pid, prey := range preys {
phs[pid] = make(chan int, 1)
go savePrey(pid, prey, phs[pid])
}
// 等待捕捉
for _, ph := range phs {
<-ph
}
}
// 结束
fmt.Println("任务已处理完毕")
}
}
Go
1
https://gitee.com/Lione/sister_hunter.git
git@gitee.com:Lione/sister_hunter.git
Lione
sister_hunter
妹图狩猎者
master

搜索帮助