1 Star 0 Fork 0

cajan2/SW-Crawler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.go 14.38 KB
一键复制 编辑 原始数据 按行查看 历史
liuyt 提交于 2018-12-21 22:33 . modified
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
package main
import (
"database/sql"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"regexp"
"strings"
"github.com/Go-GraphQL-Group/SW-Crawler/model"
mySql "github.com/Go-GraphQL-Group/SW-Crawler/sql"
"github.com/boltdb/bolt"
_ "github.com/go-sql-driver/mysql"
)
const peopleBucket = "People"
const filmsBucket = "Film"
const planetsBucket = "Planet"
const speciesBucket = "Specie"
const starshipsBucket = "Starship"
const vehiclesBucket = "Vehicle"
const (
userName = "root"
password = ""
ip = "127.0.0.1"
port = "3306"
dbName = "data"
)
const origin = "https://swapi.co/api/"
const origin2 = "http://localhost:8080/query/+[a-zA-Z_]+/"
const replace = "http://localhost:8080/query/"
const NoneRe = ""
// "people": "https://swapi.co/api/people/?format=json"
// "planets": "https://swapi.co/api/planets/?format=json"
// "films": "https://swapi.co/api/films/?format=json"
// "species": "https://swapi.co/api/species/?format=json"
// "vehicles": "https://swapi.co/api/vehicles/?format=json"
// "starships": "https://swapi.co/api/starships/?format=json"
// get people
func peopleGet(url string) (string, *model.PeopleRes) {
res, err := http.Get(url)
CheckErr(err)
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
CheckErr(err)
page := &model.PeopleRes{}
err = json.Unmarshal(body, page)
CheckErr(err)
next := page.Next
// 正则替换
re, _ := regexp.Compile(origin)
rep := re.ReplaceAllString(string(body), replace)
err = json.Unmarshal([]byte(rep), page)
CheckErr(err)
return next, page
}
func getAllPeople() []model.People {
var allPeople []model.People
next, res := peopleGet(string("https://swapi.co/api/people/?format=json"))
for _, it := range res.Result {
allPeople = append(allPeople, it)
}
for next != "" {
next, res = peopleGet(next)
for _, it := range res.Result {
allPeople = append(allPeople, it)
}
}
return allPeople
}
// get planets
func planetsGet(url string) (string, *model.PlanetsRes) {
res, err := http.Get(url)
CheckErr(err)
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
CheckErr(err)
page := &model.PlanetsRes{}
err = json.Unmarshal(body, page)
CheckErr(err)
next := page.Next
// 正则替换
re, _ := regexp.Compile(origin)
rep := re.ReplaceAllString(string(body), replace)
err = json.Unmarshal([]byte(rep), page)
CheckErr(err)
return next, page
}
func getAllPlanets() []model.Planet {
var allPlanets []model.Planet
next, res := planetsGet(string("https://swapi.co/api/planets/?format=json"))
for _, it := range res.Result {
allPlanets = append(allPlanets, it)
}
for next != "" {
next, res = planetsGet(next)
for _, it := range res.Result {
allPlanets = append(allPlanets, it)
}
}
return allPlanets
}
// get films
func filmsGet(url string) (string, *model.FilmsRes) {
res, err := http.Get(url)
CheckErr(err)
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
CheckErr(err)
page := &model.FilmsRes{}
err = json.Unmarshal(body, page)
CheckErr(err)
next := page.Next
// 正则替换
re, _ := regexp.Compile(origin)
rep := re.ReplaceAllString(string(body), replace)
err = json.Unmarshal([]byte(rep), page)
CheckErr(err)
return next, page
}
func getAllFilms() []model.Film {
var allFilms []model.Film
next, res := filmsGet(string("https://swapi.co/api/films/?format=json"))
for _, it := range res.Result {
allFilms = append(allFilms, it)
}
for res.Next != "" {
next, res = filmsGet(next)
for _, it := range res.Result {
allFilms = append(allFilms, it)
}
}
return allFilms
}
// get species
func speciesGet(url string) (string, *model.SpeciesRes) {
res, err := http.Get(url)
CheckErr(err)
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
CheckErr(err)
page := &model.SpeciesRes{}
err = json.Unmarshal(body, page)
CheckErr(err)
next := page.Next
// 正则替换
re, _ := regexp.Compile(origin)
rep := re.ReplaceAllString(string(body), replace)
err = json.Unmarshal([]byte(rep), page)
CheckErr(err)
return next, page
}
func getAllSpecies() []model.Species {
var allSpecies []model.Species
next, res := speciesGet(string("https://swapi.co/api/species/?format=json"))
for _, it := range res.Result {
allSpecies = append(allSpecies, it)
}
for next != "" {
next, res = speciesGet(next)
for _, it := range res.Result {
allSpecies = append(allSpecies, it)
}
}
return allSpecies
}
// get vehicles
func vehiclesGet(url string) (string, *model.VehiclesRes) {
res, err := http.Get(url)
CheckErr(err)
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
CheckErr(err)
page := &model.VehiclesRes{}
err = json.Unmarshal(body, page)
CheckErr(err)
next := page.Next
// 正则替换
re, _ := regexp.Compile(origin)
rep := re.ReplaceAllString(string(body), replace)
err = json.Unmarshal([]byte(rep), page)
CheckErr(err)
return next, page
}
func getAllVehicles() []model.Vehicle {
var allVehicles []model.Vehicle
next, res := vehiclesGet(string("https://swapi.co/api/vehicles/?format=json"))
for _, it := range res.Result {
allVehicles = append(allVehicles, it)
}
for next != "" {
next, res = vehiclesGet(next)
for _, it := range res.Result {
allVehicles = append(allVehicles, it)
}
}
return allVehicles
}
// get starships
func starshipsGet(url string) (string, *model.StarshipsRes) {
res, err := http.Get(url)
CheckErr(err)
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
CheckErr(err)
page := &model.StarshipsRes{}
err = json.Unmarshal(body, page)
CheckErr(err)
next := page.Next
// 正则替换
re, _ := regexp.Compile(origin)
rep := re.ReplaceAllString(string(body), replace)
err = json.Unmarshal([]byte(rep), page)
CheckErr(err)
return next, page
}
func getAllStarships() []model.Starship {
var allStarship []model.Starship
next, res := starshipsGet(string("https://swapi.co/api/starships/?format=json"))
for _, it := range res.Result {
allStarship = append(allStarship, it)
}
for next != "" {
next, res = starshipsGet(next)
for _, it := range res.Result {
allStarship = append(allStarship, it)
}
}
return allStarship
}
// err
func CheckErr(err error) {
if err != nil {
fmt.Println("Error occur: ", err)
os.Exit(1)
}
}
// 存储信息
func storeData() {
db, err := bolt.Open("./data/bolt/data.db", 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// fmt.Println(len(starships))
// for _, it := range starships {
// fmt.Println(it)
// }
// create bucket
db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucketIfNotExists([]byte(peopleBucket))
CheckErr(err)
_, err = tx.CreateBucketIfNotExists([]byte(planetsBucket))
CheckErr(err)
_, err = tx.CreateBucketIfNotExists([]byte(filmsBucket))
CheckErr(err)
_, err = tx.CreateBucketIfNotExists([]byte(speciesBucket))
CheckErr(err)
_, err = tx.CreateBucketIfNotExists([]byte(vehiclesBucket))
CheckErr(err)
_, err = tx.CreateBucketIfNotExists([]byte(starshipsBucket))
CheckErr(err)
return nil
})
// store people data
fmt.Println("people")
people := getAllPeople()
for _, it := range people {
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(peopleBucket))
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(it.Url, "")
it.ID = string(rep)[0 : len(string(rep))-1]
jsons, errs := json.Marshal(it)
CheckErr(errs)
err := b.Put([]byte(it.ID), jsons)
return err
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(peopleBucket))
v := b.Get([]byte(it.ID))
fmt.Printf("%s\n", v)
return nil
})
}
// store planets data
fmt.Println("planets")
planests := getAllPlanets()
for _, it := range planests {
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(planetsBucket))
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(it.Url, "")
it.ID = string(rep)[0 : len(string(rep))-1]
jsons, errs := json.Marshal(it)
CheckErr(errs)
err := b.Put([]byte(it.ID), jsons)
return err
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(planetsBucket))
v := b.Get([]byte(it.ID))
fmt.Printf("%s\n", v)
return nil
})
}
// store films data
fmt.Println("films")
films := getAllFilms()
for _, it := range films {
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(filmsBucket))
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(it.Url, "")
it.ID = string(rep)[0 : len(string(rep))-1]
jsons, errs := json.Marshal(it)
CheckErr(errs)
err := b.Put([]byte(it.ID), jsons)
return err
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(filmsBucket))
v := b.Get([]byte(it.ID))
fmt.Printf("%s\n", v)
return nil
})
}
// store species data
fmt.Println("species")
species := getAllSpecies()
for _, it := range species {
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(speciesBucket))
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(it.Url, "")
it.ID = string(rep)[0 : len(string(rep))-1]
jsons, errs := json.Marshal(it)
CheckErr(errs)
err := b.Put([]byte(it.ID), jsons)
return err
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(speciesBucket))
v := b.Get([]byte(it.ID))
fmt.Printf("%s\n", v)
return nil
})
}
// store vehicles data
fmt.Println("vehicles")
vehicles := getAllVehicles()
for _, it := range vehicles {
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(vehiclesBucket))
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(it.Url, "")
it.ID = string(rep)[0 : len(string(rep))-1]
jsons, errs := json.Marshal(it)
CheckErr(errs)
err := b.Put([]byte(it.ID), jsons)
return err
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(vehiclesBucket))
v := b.Get([]byte(it.ID))
fmt.Printf("%s\n", v)
return nil
})
}
// store starships data
fmt.Println("starships")
starships := getAllStarships()
for _, it := range starships {
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(starshipsBucket))
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(it.Url, "")
it.ID = string(rep)[0 : len(string(rep))-1]
jsons, errs := json.Marshal(it)
CheckErr(errs)
err := b.Put([]byte(it.ID), jsons)
return err
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(starshipsBucket))
v := b.Get([]byte(it.ID))
fmt.Printf("%s\n", v)
return nil
})
}
}
//Db数据库连接池
var DB *sql.DB
func main() {
// storeData()
/* test get people */
// people := dbOp.GetPeopleByID("1")
// fmt.Println(people)
/* test get film */
// film := dbOp.GetFilmByID("1")
// fmt.Println(film)
/* test get planet */
// planet := dbOp.GetPlanetByID("1")
// fmt.Println(planet)
/* test get specie */
// specie := dbOp.GetSpeciesByID("1")
// fmt.Println(specie)
/* test get starship */
// starship := dbOp.GetStarshipByID("10")
// fmt.Println(starship)
/* test get vehicle */
// _, vehicle := dbOp.GetVehicleByID("14")
// fmt.Println(vehicle)
db, err := bolt.Open("./data/bolt/data.db", 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
//构建连接:"用户名:密码@tcp(IP:端口)/数据库?charset=utf8"
path := strings.Join([]string{userName, ":", password, "@tcp(", ip, ":", port, ")/", dbName, "?charset=utf8"}, "")
//打开数据库,前者是驱动名,所以要导入: _ "github.com/go-sql-driver/mysql"
DB, err := sql.Open("mysql", path)
defer DB.Close()
//设置数据库最大连接数
DB.SetConnMaxLifetime(100)
//设置上数据库最大闲置连接数
DB.SetMaxIdleConns(10)
//验证连接
if err := DB.Ping(); err != nil {
fmt.Println("opon database fail")
return
}
fmt.Println("connnect success")
/* insert */
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(peopleBucket))
b.ForEach(func(k, v []byte) error {
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(string(v), NoneRe)
people := &model.People{}
err = json.Unmarshal([]byte(rep), people)
mySql.InsertPeople(DB, people)
return nil
})
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(filmsBucket))
b.ForEach(func(k, v []byte) error {
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(string(v), NoneRe)
film := &model.Film{}
err = json.Unmarshal([]byte(rep), film)
mySql.InsertFilm(DB, film)
return nil
})
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(planetsBucket))
b.ForEach(func(k, v []byte) error {
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(string(v), NoneRe)
planet := &model.Planet{}
err = json.Unmarshal([]byte(rep), planet)
mySql.InsertPlanet(DB, planet)
return nil
})
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(speciesBucket))
b.ForEach(func(k, v []byte) error {
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(string(v), NoneRe)
specie := &model.Species{}
err = json.Unmarshal([]byte(rep), specie)
mySql.InsertSpecie(DB, specie)
return nil
})
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(starshipsBucket))
b.ForEach(func(k, v []byte) error {
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(string(v), NoneRe)
starship := &model.Starship{}
err = json.Unmarshal([]byte(rep), starship)
mySql.InsertStarship(DB, starship)
return nil
})
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(vehiclesBucket))
b.ForEach(func(k, v []byte) error {
// 正则替换
re, _ := regexp.Compile(origin2)
rep := re.ReplaceAllString(string(v), NoneRe)
vehicle := &model.Vehicle{}
err = json.Unmarshal([]byte(rep), vehicle)
mySql.InsertVehicle(DB, vehicle)
return nil
})
return nil
})
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/cajan2/SW-Crawler.git
git@gitee.com:cajan2/SW-Crawler.git
cajan2
SW-Crawler
SW-Crawler
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385