Go语言 之并发版网络爬虫
package main import ( "fmt" "net/http" "os" "strconv" ) var url = "https://tieba.baidu.com/f?kw=%E7%BB%9D%E5%9C%B0%E6%B1%82%E7%94%9F&ie=utf-8&pn=" //get请求获取网页内容 func HttpGet(url string) (result string) { //网络请求 response, err := http.Get(url) if err != nil { fmt.Println(err.Error()) } //延迟关闭 defer response.Body.Close() buf := make([]byte, 1024*6) for { n, _ := response.Body.Read(buf) if n == 0 { break } result += string(buf[:n]) } return } //保存文件内容到本地 func SaveFileToLocal(index int, c chan<- int) { index_str := strconv.Itoa((index - 1) * 50) //网络请求获取内容 result := HttpGet(url + index_str) if result != "" { //获取当前绝对路径 path, _ := os.Getwd() //创建文件 f, err := os.Create(path + "/page/" + strconv.Itoa(index) + ".html") if err != nil { fmt.Println(err) } //将内容写到文件 f.WriteString(result) f.Close() } //传递当前协程爬取的页数 c <- index } func doWork(start, end int) { //创建无缓冲通道 page := make(chan int) for i := start; i <= end; i++ { //创建协程处理 go SaveFileToLocal(i, page) } for i := start; i <= end; i++ { //阻塞等待获取通道的值 fmt.Printf("第%d页爬取完毕\n", <-page) } } func main() { var start, end int fmt.Print("请输入起始页:") fmt.Scan(&start) if start <= 0 { fmt.Println("参数不正确") } fmt.Print("请输入结束页:") fmt.Scan(&end) doWork(start, end) }