gorapy使用方式

1.目前计划

实现类似scrapy等的爬虫编写流程,但是鉴于go是静态语言,方法入参也不能给定默认值,就暂定通过传入几个配置项,发起请求,相关的配置会在配置项里编辑

package main

import (
	"fmt"
	"gorapy"
	"gorapy/spiders"
)

type DemoSpider struct {
	spiders.Spider
}

func (d DemoSpider) start() {
	startUrl := "https://www.baidu.com"
	settings := gorapy.DefaultSettings()
	settings.Callback = d.parse
	settings.ConCurrency = 1
	settings.DefaultHeaders["User-Agent"] = ""
	request := gorapy.Request{}
	d.Request(startUrl, "get", request, &settings)
}

func (d DemoSpider) parse(response gorapy.Response) {
	fmt.Println("parse", response.Text)
	meta := make(map[string]interface{})
	meta["liangSir"] = "nb"
	request := gorapy.Request{}
	request.Meta = meta
	settings := gorapy.Settings{}
	settings.Callback = d.parse2
	d.Request("nextUrl", "get", request, &settings)
}

func (d DemoSpider) parse2(response gorapy.Response) {
	fmt.Println("parse2")
	fmt.Println("parse2 meta", response.Meta)
	req := gorapy.Request{}
	req.Params = gorapy.Params{
		"c": "d",
	}
	req.Headers = gorapy.Headers{
		"Accept": "application/json",
	}
	req.Payload = gorapy.Payload{
		"a": "b",
	}

	d.Request("nextUrl", "get", req, nil)
}

func main() {
	d := DemoSpider{}
	d.start()

}

输出结果为:


parse 
parse2
parse2 meta map[liangSir:nb]

每次发起请求,可以通过gorapy.spiders.Spider.Request方法发起请求,自己实现的爬虫类(结构体),继承了gorapy.Spider即可直接使用 .Request 方法

其中,gorapy.Request对象内的主要参数有:

type Request struct {
	*http.Request
	Headers            Headers
	Cookies            SimpleCookie
	Auth               Auth
	Params             Params
	Form               Form
	Payload            Payload
	Binary             []byte
	Files              []*File
	Meta               map[string]interface{}
	Method             string
	Proxy              string
	Timeout            time.Duration
	SkipVerifyTLS      bool
	Chunked            bool
	AllowRedirects     bool
	DisableKeepAlive   bool
	DisableCompression bool
}

gorapy.Settings 主要包含的参数有:


type Settings struct {
	DownloadDelay        time.Duration           `json:"downloadDelay"`        // 下载延迟
	DefaultHeaders       Headers                 `json:"defaultHeaders"`       // 默认请求头
	Callback             func(response Response) `json:"callback"`             // 回调函数
	RandomJA3Fingerprint bool                    `json:"randomJA3Fingerprint"` // 是否启用随机ja3指纹
	AllowDomains         []string                `json:"allowDomains"`         // default nil slice to allow any domains
	ConCurrency          int                     `json:"conCurrency"`          // 并发数量
}

posted @ 2023-01-12 12:55  ykallan  阅读(30)  评论(0)    收藏  举报