代码拉取完成,页面将自动刷新
package WebCrawler
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"io"
"net/http"
"strings"
)
type httpGet struct {
}
func (e *httpGet) tryGet(url string) (*goquery.Document, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("请求失败,状态码:%d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
return doc, nil
}
func (e *httpGet) HttpGet(url string, try int) (*goquery.Document, error) {
// 第一次查询
doc, err := e.tryGet(url)
if err == nil {
return doc, nil
}
// 重试
for i := 0; i < try; i++ {
doc, err = e.tryGet(url)
if err != nil {
continue
}
return doc, err
}
return nil, err
}
func (e *httpGet) tryGetBin(url string) ([]byte, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("请求失败,状态码:%d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return body, nil
}
func (e *httpGet) HttpGetBin(url string, try int) ([]byte, error) {
// 第一次查询
bin, err := e.tryGetBin(url)
if err == nil {
return bin, nil
}
// 重试
for i := 0; i < try; i++ {
bin, err = e.tryGetBin(url)
if err != nil {
continue
}
return bin, err
}
return nil, err
}
func (e *httpGet) MakeUrl(host, url string) string {
href := ""
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
href = url
} else if strings.HasPrefix(url, "/") {
href = host + url
} else {
href = host + "/" + url
}
return href
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。