1 Star 0 Fork 0

yinqiang / follow

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
follow.go 1.74 KB
一键复制 编辑 原始数据 按行查看 历史
yinqiang 提交于 2015-11-25 16:07 . get links
package main
import (
"fmt"
"github.com/doumadou/mahonia"
"io/ioutil"
"net/http"
"regexp"
)
func Get(url string) (content []byte, statusCode int) {
resp, err := http.Get(url)
if err != nil {
statusCode = -100
return
}
defer resp.Body.Close()
content, err = ioutil.ReadAll(resp.Body)
if err != nil {
statusCode = -200
return
}
statusCode = 200
return
}
func GetUtf8(url string) (content string, statusCode int) {
originContent, statusCode := Get(url)
if statusCode != 200 {
return
}
reg := regexp.MustCompile(`charset=\w+`)
charsetBytes := reg.Find(originContent)
var charset string
if charsetBytes != nil && len(charsetBytes) > 0 {
charset = string(charsetBytes[8:])
} else {
charset = "utf8"
}
charsetDecoder := mahonia.NewDecoder(charset)
content = charsetDecoder.ConvertString(string(originContent))
return
}
func GetName(content string) (name string) {
reg := regexp.MustCompile(`title>.*<`)
title := reg.FindString(content)
reg = regexp.MustCompile(`《.*》`)
name = reg.FindString(title)
reg = regexp.MustCompile(`[《》\s]`)
name = reg.ReplaceAllString(name, "")
return
}
func GetAllLinks(content string, keyWord string) (links []string) {
regLink := regexp.MustCompile(`<a href=".*"`)
regKeyWord := regexp.MustCompile(keyWord)
allLinks := regLink.FindAllString(content, -1)
for i := 0; i < len(allLinks); i++ {
if regKeyWord.MatchString(allLinks[i]) {
url := allLinks[i][9:len(allLinks[i])-1]
links = append(links, url)
}
}
return
}
func main() {
content, statusCode := GetUtf8("http://www.dy2018.com/i/95808.html")
if statusCode != 200 {
fmt.Errorf("status code: %d", statusCode)
return
}
name := GetName(content)
fmt.Println(name)
links := GetAllLinks(content, name)
fmt.Println(links)
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/yinqiang/follow.git
git@gitee.com:yinqiang/follow.git
yinqiang
follow
follow
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891