
Golang 多线程图片爬取
package main
import (
"io/ioutil"
"log"
"net/http"
"strconv"
"sync"
"time"
"github.com/anaskhan96/soup"
)
var (
photoNum int = 0
wg sync.WaitGroup
lock sync.Mutex
)
func getPhotoURL(pageURL <-chan string, photoURL chan<- string, done chan int) {
for i := range pageURL {
source, _ := soup.Get(i)
ul := soup.HTMLParse(source).Find("ul", "id", "post-list-posts")
if ul.Pointer == nil {
continue
}
for _, bar := range soup.HTMLParse(source).Find("ul", "id", "post-list-posts").FindAll("li") {
photoURL <- bar.FindAll("a")[1].Attrs()["href"]
}
}
done <- 1
}
func getPhoto(photoURL <-chan string) {
for i := range photoURL {
resp, err := http.Get(i)
if err != nil {
log.Fatalln(err)
}
photo, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Fatalln(err)
}
lock.Lock()
j := photoNum
photoNum++
lock.Unlock()
ioutil.WriteFile("/home/tsubasa/图片/photo/"+strconv.Itoa(j)+".jpg", photo, 0755)
}
wg.Done()
}
func main() {
var (
pageURL chan string = make(chan string, 10)
done chan int = make(chan int)
photoURL chan string = make(chan string, 10)
pageNum int = 101
)
for i := 0; i < 8; i++ {
wg.Add(1)
go getPhotoURL(pageURL, photoURL, done)
wg.Add(1)
go getPhoto(photoURL)
}
go func() {
for i := 0; i < 4; i++ {
<-done
}
close(photoURL)
}()
for i := 1; i < pageNum; i++ {
pageURL <- "https://konachan.net/post?page=" + strconv.Itoa(i) + "&tags="
time.Sleep(time.Second * 1)
}
close(pageURL)
wg.Wait()
}