Ai
1 Star 1 Fork 0

灵狐/go-web-crawler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
HttpGet.go 1.72 KB
一键复制 编辑 原始数据 按行查看 历史
灵狐 提交于 2025-05-10 20:30 +08:00 . 提交代码
package WebCrawler
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"io"
"net/http"
"strings"
)
type httpGet struct {
}
func (e *httpGet) tryGet(url string) (*goquery.Document, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("请求失败,状态码:%d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
return doc, nil
}
func (e *httpGet) HttpGet(url string, try int) (*goquery.Document, error) {
// 第一次查询
doc, err := e.tryGet(url)
if err == nil {
return doc, nil
}
// 重试
for i := 0; i < try; i++ {
doc, err = e.tryGet(url)
if err != nil {
continue
}
return doc, err
}
return nil, err
}
func (e *httpGet) tryGetBin(url string) ([]byte, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("请求失败,状态码:%d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return body, nil
}
func (e *httpGet) HttpGetBin(url string, try int) ([]byte, error) {
// 第一次查询
bin, err := e.tryGetBin(url)
if err == nil {
return bin, nil
}
// 重试
for i := 0; i < try; i++ {
bin, err = e.tryGetBin(url)
if err != nil {
continue
}
return bin, err
}
return nil, err
}
func (e *httpGet) MakeUrl(host, url string) string {
href := ""
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
href = url
} else if strings.HasPrefix(url, "/") {
href = host + url
} else {
href = host + "/" + url
}
return href
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/fierce_wolf/go-web-crawler.git
git@gitee.com:fierce_wolf/go-web-crawler.git
fierce_wolf
go-web-crawler
go-web-crawler
v1.0.1

搜索帮助