代码拉取完成,页面将自动刷新
package main
import (
"fmt"
"github.com/doumadou/mahonia"
"io/ioutil"
"net/http"
"regexp"
)
func Get(url string) (content []byte, statusCode int) {
resp, err := http.Get(url)
if err != nil {
statusCode = -100
return
}
defer resp.Body.Close()
content, err = ioutil.ReadAll(resp.Body)
if err != nil {
statusCode = -200
return
}
statusCode = 200
return
}
func GetUtf8(url string) (content string, statusCode int) {
originContent, statusCode := Get(url)
if statusCode != 200 {
return
}
reg := regexp.MustCompile(`charset=\w+`)
charsetBytes := reg.Find(originContent)
var charset string
if charsetBytes != nil && len(charsetBytes) > 0 {
charset = string(charsetBytes[8:])
} else {
charset = "utf8"
}
charsetDecoder := mahonia.NewDecoder(charset)
content = charsetDecoder.ConvertString(string(originContent))
return
}
func GetName(content string) (name string) {
reg := regexp.MustCompile(`title>.*<`)
title := reg.FindString(content)
reg = regexp.MustCompile(`《.*》`)
name = reg.FindString(title)
reg = regexp.MustCompile(`[《》\s]`)
name = reg.ReplaceAllString(name, "")
return
}
func GetAllLinks(content string, keyWord string) (links []string) {
regLink := regexp.MustCompile(`<a href=".*"`)
regKeyWord := regexp.MustCompile(keyWord)
allLinks := regLink.FindAllString(content, -1)
for i := 0; i < len(allLinks); i++ {
if regKeyWord.MatchString(allLinks[i]) {
url := allLinks[i][9:len(allLinks[i])-1]
links = append(links, url)
}
}
return
}
func main() {
content, statusCode := GetUtf8("http://www.dy2018.com/i/95808.html")
if statusCode != 200 {
fmt.Errorf("status code: %d", statusCode)
return
}
name := GetName(content)
fmt.Println(name)
links := GetAllLinks(content, name)
fmt.Println(links)
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。