package main
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"log"
"net/http"
"os"
"sync"
"time"
"unsafe"
"github.com/antchfx/htmlquery"
"github.com/chromedp/chromedp"
)
var wg sync.WaitGroup
func main() {
htmls = make(chan string, 33)
url := "https://pvp.qq.com/web201605/wallpaper.shtml"
sel := `#Work_List_Container_267733`
three := `document.querySelector("body")`
wg.Add(1)
go GetHttpHtmlContent(url, sel, three)
for i := 0; i < 33; i++ {
data, ok := <-htmls
fmt.Println(i, ok)
wg.Add(1)
go GetSpecialImages(data)
}
wg.Wait()
}
func toBytes(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s))
}
var htmls chan string
func GetSpecialImages(htmlContent string) error {
body := toBytes(htmlContent)
doc, err := htmlquery.Parse(bytes.NewReader(body))
if err != nil {
fmt.Println(err)
}
urls := []string{}
nodes := htmlquery.Find(doc, `//*[@id="Work_List_Container_267733"]/div/ul/li[7]/a/@href`)
for _, node := range nodes {
fmt.Println("fetch ", node.FirstChild.Data)
urls = append(urls, node.FirstChild.Data)
}
names := []string{}
nodes1 := htmlquery.Find(doc, `//*[@id="Work_List_Container_267733"]/div/h4/a`)
for _, node := range nodes1 {
fmt.Println("fetch ", node.FirstChild.Data)
names = append(names, node.FirstChild.Data)
}
for i, v := range urls {
name := names[i]
fmt.Println(v, "->: ", name)
httpDownLoad(v, name)
}
defer wg.Done()
return nil
}
func GetHttpHtmlContent(url string, selector string, sel interface{}) {
options := []chromedp.ExecAllocatorOption{
chromedp.Flag("headless", false),
chromedp.Flag("blink-settings", "imagesEnabled=true"),
chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36`),
}
options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
c, _ := chromedp.NewExecAllocator(context.Background(), options...)
chromeCtx, cancel := chromedp.NewContext(c, chromedp.WithLogf(log.Printf))
chromedp.Run(chromeCtx, make([]chromedp.Action, 0, 1)...)
timeoutCtx, cancel := context.WithTimeout(chromeCtx, 120*time.Second)
defer cancel()
var htmlContent string
err := chromedp.Run(timeoutCtx,
chromedp.Navigate(url),
chromedp.WaitVisible(selector),
chromedp.OuterHTML(sel, &htmlContent, chromedp.ByJSPath),
)
if err != nil {
fmt.Println(err)
}
htmls <- htmlContent
for i := 0; i < 32; i++ {
htmlContent = ""
err := chromedp.Run(timeoutCtx,
chromedp.Click(`#Page_Container_267733 > a.downpage`),
chromedp.OuterHTML(sel, &htmlContent, chromedp.ByJSPath),
)
if err != nil {
panic(errors.New("get html failed"))
}
htmls <- htmlContent
fmt.Println(len(htmls), i)
time.Sleep(time.Second)
}
close(htmls)
defer wg.Done()
}
func httpDownLoad(url string, name string) error {
v, err := http.Get(url)
if err != nil {
fmt.Printf("Http get [%v] failed! %v", url, err)
return err
}
defer v.Body.Close()
content, err := io.ReadAll(v.Body)
if err != nil {
fmt.Printf("Read http response failed! %v", err)
return err
}
filename := "./images/" + name + ".jpg"
err = os.WriteFile(filename, content, 0666)
if err != nil {
fmt.Printf("Save to file failed! %v", err)
return err
}
return nil
}