Ai
1 Star 0 Fork 1

egege/colly

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
scraper_server.go 1.25 KB
一键复制 编辑 原始数据 按行查看 历史
Adam Tauber 提交于 2018-04-05 19:08 +08:00 . [fix] use valid local address
package main
import (
"encoding/json"
"log"
"net/http"
"github.com/gocolly/colly"
)
type pageInfo struct {
StatusCode int
Links map[string]int
}
func handler(w http.ResponseWriter, r *http.Request) {
URL := r.URL.Query().Get("url")
if URL == "" {
log.Println("missing URL argument")
return
}
log.Println("visiting", URL)
c := colly.NewCollector()
p := &pageInfo{Links: make(map[string]int)}
// count links
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Request.AbsoluteURL(e.Attr("href"))
if link != "" {
p.Links[link]++
}
})
// extract status code
c.OnResponse(func(r *colly.Response) {
log.Println("response received", r.StatusCode)
p.StatusCode = r.StatusCode
})
c.OnError(func(r *colly.Response, err error) {
log.Println("error:", r.StatusCode, err)
p.StatusCode = r.StatusCode
})
c.Visit(URL)
// dump results
b, err := json.Marshal(p)
if err != nil {
log.Println("failed to serialize response:", err)
return
}
w.Header().Add("Content-Type", "application/json")
w.Write(b)
}
func main() {
// example usage: curl -s 'http://127.0.0.1:7171/?url=http://go-colly.org/'
addr := ":7171"
http.HandleFunc("/", handler)
log.Println("listening on", addr)
log.Fatal(http.ListenAndServe(addr, nil))
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/egege_gitee/colly.git
git@gitee.com:egege_gitee/colly.git
egege_gitee
colly
colly
v1.2.0

搜索帮助