欢迎来到 跌倒的小黄瓜 的博客

♪(^∇^*)我要当大佬,(#^.^#)哈哈哈哈,(。-ω-)zzz我要成为优秀的人,(*^▽^*)٩(๑>◡<๑)۶O(∩_∩)O哈哈~~~~~~~~欢迎━(*`∀´*)ノ亻!

go-爬图片

go语言爬取图片

注:动态加载出来的爬取不到,或怕取出来图片出错,代码中的网页是可以正常爬取的

package main

import (
	"fmt"
	"io"
	"net/http"
	"os"
	"regexp"
	"strconv"
	_ "strings"
)

func SaveImg(idx int, url string, page chan int) {
	path := "D:/gogogo/src/go-爬虫/图片爬取/main/images/" + strconv.Itoa(idx+1) + ".jpg"
	f, err := os.Create(path)
	//	resp, err1 := http.Get(url)
	if err != nil {
		fmt.Println("err:", err)
		return
	}

	defer f.Close()
	resp, err := http.Get(url)
	if err != nil {
		fmt.Println("err:", err)
		return
	}
	defer resp.Body.Close()

	buf := make([]byte, 4096)
	for {
		n, err2 := resp.Body.Read(buf)
		if n == 0 {
			break
		}
		if err2 != nil && err2 != io.EOF {
			err = err2
			return
		}
		f.Write(buf[:n])

	}
	page <- idx
}

func HttpGet(url string) (result string, err error) {
	resp, err1 := http.Get(url)
	if err1 != nil {
		err = err1
		return
	}
	defer resp.Body.Close()
	buf := make([]byte, 4096)
	for {
		n, err2 := resp.Body.Read(buf)
		if n == 0 {
			break
		}
		if err2 != nil && err2 != io.EOF {
			err = err2
			return
		}
		result += string(buf[:n])
	}
	return
}

func main() {
	url := "http://sc.chinaz.com/tupian/ribenmeinv.html"
	result, err := HttpGet(url)
	if err != nil {
		fmt.Println("err:", err)
		return
	}
	//	(?s:(.*?))
	ret1 := regexp.MustCompile(`<img src2="(?s:(.*?))"`)

	alls := ret1.FindAllStringSubmatch(result, -1)

	page := make(chan int)
	n := len(alls)

	for idx, imgURL := range alls {
		//		fmt.Println(imgURL[1])
		go SaveImg(idx, imgURL[1], page)

	}
	for i := 0; i < n; i++ {
		fmt.Printf("第%d下载完成\n", <-page)
	}

}

posted @ 2019-12-07 12:58  跌倒的小黄瓜  阅读(463)  评论(1编辑  收藏  举报