Fetch the repository succeeded.
package main
import (
"fmt"
"os"
"time"
"github.com/gocolly/colly"
)
type item struct {
StoryURL string
Source string
comments string
CrawledAt time.Time
Comments string
Title string
}
func main() {
stories := []item{}
// Instantiate default collector
c := colly.NewCollector(
// Visit only domains: old.reddit.com
colly.AllowedDomains("old.reddit.com"),
colly.Async(true),
)
// On every a element which has .top-matter attribute call callback
// This class is unique to the div that holds all information about a story
c.OnHTML(".top-matter", func(e *colly.HTMLElement) {
temp := item{}
temp.StoryURL = e.ChildAttr("a[data-event-action=title]", "href")
temp.Source = "https://old.reddit.com/r/programming/"
temp.Title = e.ChildText("a[data-event-action=title]")
temp.Comments = e.ChildAttr("a[data-event-action=comments]", "href")
temp.CrawledAt = time.Now()
stories = append(stories, temp)
})
// On every span tag with the class next-button
c.OnHTML("span.next-button", func(h *colly.HTMLElement) {
t := h.ChildAttr("a", "href")
c.Visit(t)
})
// Set max Parallelism and introduce a Random Delay
c.Limit(&colly.LimitRule{
Parallelism: 2,
RandomDelay: 5 * time.Second,
})
// Before making a request print "Visiting ..."
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL.String())
})
// Crawl all reddits the user passes in
reddits := os.Args[1:]
for _, reddit := range reddits {
c.Visit(reddit)
}
c.Wait()
fmt.Println(stories)
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。