62 Star 377 Fork 123

admpub / nging

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
collector.go 16.28 KB
一键复制 编辑 原始数据 按行查看 历史
shen_wen_hui 提交于 2019-07-19 10:38 . improved
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
/*
Nging is a toolbox for webmasters
Copyright (C) 2018-present Wenhui Shen <swh@admpub.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package collector
import (
"strings"
"github.com/admpub/gopiper"
"github.com/admpub/nging/application/dbschema"
"github.com/admpub/nging/application/handler"
"github.com/admpub/nging/application/library/collector"
"github.com/admpub/nging/application/library/collector/exec"
"github.com/admpub/nging/application/library/collector/export"
"github.com/admpub/nging/application/library/collector/sender"
"github.com/admpub/nging/application/library/cron"
"github.com/admpub/nging/application/library/notice"
"github.com/admpub/nging/application/model"
"github.com/webx-top/com"
"github.com/webx-top/db"
"github.com/webx-top/echo"
)
func init() {
handler.RegisterToGroup(`/collector`, func(g echo.RouteRegister) {
e := handler.Echo()
g.Route(`GET,POST`, `/export`, e.MetaHandler(echo.H{`name`: `导出管理`}, Export))
g.Route(`GET,POST`, `/export_log`, e.MetaHandler(echo.H{`name`: `日子列表`}, ExportLog))
g.Route(`GET,POST`, `/export_log_view/:id`, e.MetaHandler(echo.H{`name`: `日志详情`}, ExportLogView))
g.Route(`GET,POST`, `/export_log_delete`, e.MetaHandler(echo.H{`name`: `删除日志`}, ExportLogDelete))
g.Route(`GET,POST`, `/export_add`, e.MetaHandler(echo.H{`name`: `添加导出规则`}, ExportAdd))
g.Route(`GET,POST`, `/export_edit`, e.MetaHandler(echo.H{`name`: `修改导出规则`}, ExportEdit))
g.Route(`GET,POST`, `/export_edit_status`, e.MetaHandler(echo.H{`name`: `修改导出规则`}, ExportEditStatus))
g.Route(`GET,POST`, `/export_delete`, e.MetaHandler(echo.H{`name`: `删除导出规则`}, ExportDelete))
g.Route(`GET,POST`, `/history`, e.MetaHandler(echo.H{`name`: `历史记录`}, History))
g.Route(`GET,POST`, `/history_view`, e.MetaHandler(echo.H{`name`: `查看历史内容`}, HistoryView))
g.Route(`GET,POST`, `/history_delete`, e.MetaHandler(echo.H{`name`: `删除历史记录`}, HistoryDelete))
g.Route(`GET,POST`, `/rule`, e.MetaHandler(echo.H{`name`: `规则列表`}, Rule))
g.Route(`GET,POST`, `/rule_add`, e.MetaHandler(echo.H{`name`: `添加规则`}, RuleAdd))
g.Route(`GET,POST`, `/rule_edit`, e.MetaHandler(echo.H{`name`: `修改规则`}, RuleEdit))
g.Route(`GET,POST`, `/rule_delete`, e.MetaHandler(echo.H{`name`: `删除规则`}, RuleDelete))
g.Route(`GET,POST`, `/rule_collect`, e.MetaHandler(echo.H{`name`: `采集`}, RuleCollect))
g.Route(`GET,POST`, `/group`, e.MetaHandler(echo.H{`name`: `任务分组列表`}, Group))
g.Route(`GET,POST`, `/group_add`, e.MetaHandler(echo.H{`name`: `添加分组`}, GroupAdd))
g.Route(`GET,POST`, `/group_edit`, e.MetaHandler(echo.H{`name`: `修改分组`}, GroupEdit))
g.Route(`GET,POST`, `/group_delete`, e.MetaHandler(echo.H{`name`: `删除分组`}, GroupDelete))
g.Route(`GET,POST`, `/regexp_test`, e.MetaHandler(echo.H{`name`: `测试正则表达式`}, RegexpTest))
})
cron.AddSYSJob(`collect_page`, CollectPageJob, `>collect_page:1`, `网页采集`)
}
func Rule(c echo.Context) error {
m := model.NewCollectorPage(c)
groupID := c.Formx(`groupId`).Uint()
cond := db.Compounds{db.Cond{`parent_id`: 0}}
if groupID > 0 {
cond.AddKV(`group_id`, groupID)
}
q := c.Formx(`q`).String()
if len(q) > 0 {
cond.AddKV(`name`, db.Like(`%`+q+`%`))
}
page, size, totalRows, p := handler.PagingWithPagination(c)
cnt, err := m.List(nil, func(r db.Result) db.Result {
return r.OrderBy(`-id`)
}, page, size, cond.And())
ret := handler.Err(c, err)
if totalRows <= 0 {
totalRows = int(cnt())
p.SetRows(totalRows)
}
rows := m.Objects()
gIds := []uint{}
rowAndGroup := make([]*model.CollectorPageAndGroup, len(rows))
for k, u := range rows {
rowAndGroup[k] = &model.CollectorPageAndGroup{
CollectorPage: u,
}
if u.GroupId < 1 {
continue
}
if !com.InUintSlice(u.GroupId, gIds) {
gIds = append(gIds, u.GroupId)
}
}
mg := model.NewCollectorGroup(c)
var groupList []*dbschema.CollectorGroup
if len(gIds) > 0 {
_, err = mg.List(&groupList, nil, 1, 1000, db.And(
db.Cond{`id IN`: gIds},
db.Cond{`type`: `page`},
))
if err != nil {
if ret == nil {
ret = err
}
} else {
for k, v := range rowAndGroup {
for _, g := range groupList {
if g.Id == v.GroupId {
rowAndGroup[k].Group = g
break
}
}
}
}
}
c.Set(`pagination`, p)
c.Set(`listData`, rowAndGroup)
mg.ListByOffset(&groupList, nil, 0, -1, db.Cond{`type`: `page`})
c.Set(`groupList`, groupList)
c.Set(`groupId`, groupID)
return c.Render(`collector/rule`, ret)
}
func RuleAdd(c echo.Context) error {
user := handler.User(c)
var err error
pageM := model.NewCollectorPage(c)
if c.IsPost() {
result := c.Data()
err = c.MustBind(pageM.CollectorPage, func(key string, values []string) (string, []string) {
if strings.HasPrefix(key, `rule[`) {
return ``, nil
}
if strings.HasPrefix(key, `extra[`) {
return ``, nil
}
return key, values
})
if err != nil {
return c.JSON(result.SetError(err))
}
pageM.CollectorPage.Uid = user.Id
c.Begin()
pageM.CollectorPage.Id = 0
_, err = parseFormToDb(c, pageM.CollectorPage, `rule`, false)
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
pages := c.FormValues(`extra[index][]`)
urls := c.FormValues(`extra[enterUrl][]`)
urlCount := len(urls)
//browsers := c.FormValues(`extra[browser][]`)
//browserCount := len(browsers)
types := c.FormValues(`extra[type][]`)
typeCount := len(types)
scopeRules := c.FormValues(`extra[scopeRule][]`)
scopeRuleCount := len(scopeRules)
contentTypes := c.FormValues(`extra[contentType][]`)
contentTypeCount := len(contentTypes)
charsets := c.FormValues(`extra[charset][]`)
charsetCount := len(charsets)
parentID := pageM.Id
err = pageM.CollectorPage.SetField(nil, `root_id`, pageM.Id, `id`, pageM.Id)
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
for key, index := range pages {
pageData := &dbschema.CollectorPage{
Uid: user.Id,
ParentId: parentID,
RootId: pageM.Id,
GroupId: pageM.GroupId,
Sort: key,
HasChild: `N`,
}
if key >= urlCount {
break
}
pageData.EnterUrl = urls[key]
/*
if key >= browserCount {
break
}
pageData.Browser = browsers[key]
*/
if key >= typeCount {
break
}
pageData.Type = types[key]
if key >= contentTypeCount {
break
}
pageData.ContentType = contentTypes[key]
if key >= scopeRuleCount {
break
}
pageData.ScopeRule = scopeRules[key]
if key >= charsetCount {
break
}
pageData.Charset = charsets[key]
pageData.Use(pageM.Trans())
//extra[rule][{=idx=}]
_, err = parseFormToDb(c, pageData, `extra[rule][`+index+`]`, false)
if err == nil {
err = pageM.CollectorPage.SetField(nil, `has_child`, `Y`, `id`, pageData.ParentId)
}
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
parentID = pageData.Id
}
c.End(err == nil)
return c.JSON(result.SetInfo(c.T(`操作成功`), 1))
}
c.Set(`data`, exec.NewRules())
id := c.Formx(`copyId`).Uint()
if id > 0 {
err = pageM.Get(nil, `id`, id)
if err == nil {
setFormData(c, pageM)
c.Request().Form().Set(`id`, `0`)
}
}
mg := model.NewCollectorGroup(c)
if _, e := mg.ListByOffset(nil, nil, 0, -1, db.Cond{`type`: `page`}); e != nil {
err = e
}
c.Set(`groupList`, mg.Objects())
c.Set(`activeURL`, `/collector/rule`)
c.Set(`dataTypes`, dataTypeList())
c.Set(`browserList`, collector.BrowserKeys())
c.Set(`allFilter`, gopiper.AllFilter())
return c.Render(`collector/rule_edit`, handler.Err(c, err))
}
func RuleEdit(c echo.Context) error {
user := handler.User(c)
id := c.Formx(`id`).Uint()
pageM := model.NewCollectorPage(c)
err := pageM.Get(nil, `id`, id)
if err != nil {
if err == db.ErrNoMoreRows {
err = c.E(`不存在id为%d的数据`, id)
}
}
if c.IsPost() {
result := c.Data()
if err != nil {
return c.JSON(result.SetError(err))
}
err = c.MustBind(pageM.CollectorPage, func(key string, values []string) (string, []string) {
if strings.HasPrefix(key, `rule[`) {
return ``, nil
}
if strings.HasPrefix(key, `extra[`) {
return ``, nil
}
return key, values
})
if err != nil {
return c.JSON(result.SetError(err))
}
pageM.CollectorPage.Uid = user.Id
pageM.CollectorPage.Id = id
c.Begin()
var rules []*dbschema.CollectorRule
//保存页面配置和规则
rules, err = parseFormToDb(c, pageM.CollectorPage, `rule`, true)
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
ruleIds := []uint{}
for _, rule := range rules {
ruleIds = append(ruleIds, rule.Id)
}
ruleM := model.NewCollectorRule(c)
conds := []db.Compound{
db.Cond{`page_id`: id},
}
if len(ruleIds) > 0 {
conds = append(conds, db.Cond{`id`: db.NotIn(ruleIds)})
}
//删除已不再使用的规则
err = ruleM.Delete(nil, db.And(conds...))
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
pages := c.FormValues(`extra[index][]`)
pageIds := c.FormxValues(`extra[id][]`).Uint()
pageIdCount := len(pageIds)
urls := c.FormValues(`extra[enterUrl][]`)
urlCount := len(urls)
//browsers := c.FormValues(`extra[browser][]`)
//browserCount := len(browsers)
types := c.FormValues(`extra[type][]`)
typeCount := len(types)
scopeRules := c.FormValues(`extra[scopeRule][]`)
scopeRuleCount := len(scopeRules)
contentTypes := c.FormValues(`extra[contentType][]`)
contentTypeCount := len(contentTypes)
charsets := c.FormValues(`extra[charset][]`)
charsetCount := len(charsets)
parentID := pageM.Id
postPageIds := []uint{}
for key, index := range pages {
pageData := &dbschema.CollectorPage{
Uid: user.Id,
ParentId: parentID,
RootId: pageM.Id,
GroupId: pageM.GroupId,
Sort: key,
HasChild: `N`,
}
if key >= pageIdCount {
break
}
pageData.Id = pageIds[key]
if key >= urlCount {
break
}
pageData.EnterUrl = urls[key]
/*
if key >= browserCount {
break
}
pageData.Browser = browsers[key]
*/
if key >= typeCount {
break
}
pageData.Type = types[key]
if key >= contentTypeCount {
break
}
pageData.ContentType = contentTypes[key]
if key >= scopeRuleCount {
break
}
pageData.ScopeRule = scopeRules[key]
if key >= charsetCount {
break
}
pageData.Charset = charsets[key]
pageData.Use(pageM.Trans())
//extra[rule][{=idx=}]
//保存页面配置和规则
rules, err = parseFormToDb(c, pageData, `extra[rule][`+index+`]`, true)
if err == nil {
err = pageM.CollectorPage.SetField(nil, `has_child`, `Y`, `id`, pageData.ParentId)
}
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
ruleIds = []uint{}
for _, rule := range rules {
ruleIds = append(ruleIds, rule.Id)
}
conds = []db.Compound{
db.Cond{`page_id`: pageData.Id},
}
if len(ruleIds) > 0 {
conds = append(conds, db.Cond{`id`: db.NotIn(ruleIds)})
}
//删除已不再使用的规则
err = ruleM.Delete(nil, db.And(conds...))
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
parentID = pageData.Id
postPageIds = append(postPageIds, pageData.Id)
}
conds = []db.Compound{
db.Cond{`root_id`: id},
db.Cond{`parent_id`: db.Gt(0)},
}
if len(postPageIds) > 0 {
conds = append(conds, db.Cond{`id`: db.NotIn(postPageIds)})
}
var cnt func() int64
cnt, err = pageM.ListByOffset(nil, nil, 0, -1, db.And(conds...))
n := cnt()
if n > 0 {
ids := []uint{}
for _, pageRow := range pageM.Objects() {
ids = append(ids, pageRow.Id)
}
//删除已不再使用的规则
err = ruleM.Delete(nil, db.Cond{`page_id`: db.In(ids)})
if err == nil {
//删除已不再使用的页面配置
err = pageM.Delete(nil, db.And(conds...))
}
if err != nil {
c.Rollback()
return c.JSON(result.SetError(err))
}
}
_ = rules
c.End(err == nil)
return c.JSON(result.SetInfo(c.T(`修改成功`), 1))
}
if err == nil {
setFormData(c, pageM)
}
if err != nil {
handler.SendFail(c, err.Error())
return c.Redirect(handler.URLFor(`/collector/rule`))
}
mg := model.NewCollectorGroup(c)
if _, e := mg.ListByOffset(nil, nil, 0, -1, db.Cond{`type`: `page`}); e != nil {
err = e
}
c.Set(`groupList`, mg.Objects())
c.Set(`activeURL`, `/collector/rule`)
c.Set(`dataTypes`, dataTypeList())
c.Set(`browserList`, collector.BrowserKeys())
c.Set(`allFilter`, gopiper.AllFilter())
return c.Render(`collector/rule_edit`, handler.Err(c, err))
}
func RuleDelete(c echo.Context) error {
id := c.Formx(`id`).Uint()
m := model.NewCollectorPage(c)
c.Begin()
err := m.Delete(nil, db.Cond{`id`: id})
if err == nil {
ruleM := model.NewCollectorRule(c)
ruleM.Use(m.Trans())
_, err = m.ListByOffset(nil, nil, 0, -1, db.Cond{`root_id`: id})
if err != nil {
c.Rollback()
handler.SendFail(c, err.Error())
return c.Redirect(handler.URLFor(`/collector/rule`))
}
ids := []uint{id}
for _, row := range m.Objects() {
ids = append(ids, row.Id)
}
err = ruleM.Delete(nil, db.Cond{`page_id`: db.In(ids)})
if err == nil {
err = m.Delete(nil, db.Cond{`root_id`: id})
}
if err != nil {
c.Rollback()
handler.SendFail(c, err.Error())
return c.Redirect(handler.URLFor(`/collector/rule`))
}
handler.SendOk(c, c.T(`操作成功`))
} else {
handler.SendFail(c, err.Error())
}
c.End(err == nil)
return c.Redirect(handler.URLFor(`/collector/rule`))
}
func RuleCollect(c echo.Context) error {
var err error
id := c.Formx(`id`).Int()
if id < 1 {
return c.E(`id值不正确`)
}
clientID := c.Formx(`clientID`).Uint()
if clientID < 0 {
return c.E(`clientID值不正确`)
}
m := model.NewCollectorPage(c)
err = m.Get(nil, db.Cond{`id`: id})
if err != nil {
return err
}
collected, err := m.FullData()
if err != nil {
return err
}
collected.SetExportFn(export.Export)
user := handler.User(c)
if c.Format() == `json` {
data := c.Data()
op := c.Form(`op`)
if op == `stop` {
_, err = Exit(m.Id)
if err != nil {
data.SetError(err)
} else {
data.SetInfo(c.T(`采集已终止`))
}
return c.JSON(data)
}
err = Go(m.Id, collected, func() {
var noticeSender sender.Notice
progress := notice.NewProgress()
if user != nil {
notice.OpenMessage(user.Username, `collector`)
defer notice.CloseMessage(user.Username, `collector`)
noticeSender = func(message interface{}, statusCode int, progs ...*notice.Progress) error {
msg := notice.NewMessageWithValue(
`collector`,
``,
message,
statusCode,
).SetMode(`element`).SetID(id)
if len(progs) > 0 && progs[0] != nil {
progress = progs[0]
}
msg.SetProgress(progress).CalcPercent().SetClientID(clientID)
sendErr := notice.Send(user.Username, msg)
return sendErr
}
} else {
noticeSender = sender.Default
}
_, err = collected.Collect(false, noticeSender, progress)
if err != nil {
if exec.ErrForcedExit == err {
noticeSender(c.T(`[规则:%d] 采集结束`, id)+`: `+c.T(`强制退出`), 0)
} else {
noticeSender(c.T(`[规则:%d] 采集出错`, id)+`: `+err.Error(), 0)
}
} else {
if progress.Total < 0 {
progress.Total = 0
}
progress.Percent = 100
progress.Finish = progress.Total
progress.Complete = true
noticeSender(c.T(`[规则:%d] 采集完毕(%d/%d)`, id, progress.Finish, progress.Total), 1, progress)
}
}, c)
if err != nil {
data.SetError(err)
}
data.SetInfo(c.T(`[规则:%d] 开始采集中...`, id))
return c.JSON(data)
}
result, err := collected.Collect(true, nil, nil)
if err != nil {
return err
}
c.Set(`data`, m)
c.Set(`result`, result)
c.Set(`activeURL`, `/collector/rule`)
return c.Render(`/collector/rule_collect`, handler.Err(c, err))
}
Go
1
https://gitee.com/admpub/nging.git
git@gitee.com:admpub/nging.git
admpub
nging
nging
v2.1.2

搜索帮助