pu369com

纯golang爬虫实战-(五)-登录并带cookie访问

之前写的代码访问内网网站,在实践中发现以下现象:

1、访问网站时如不设置headers 会返回包含xss字样的提示

2:fiddler截获后,只有在IE浏览器仍处于登录状态时,才能补发成功。当在浏览器中退出登录后,fiddler补发不成功。

3:我将浏览器登录成功后的http headers复制到代码中,其中包含了cookies ,此时运行代码可以成功。但是在浏览器中退出登录后,代码运行也是未登录状态。

说明浏览器登录后,服务器上才保持了有效的sessionID,起初怀疑是不是因为httponly设置的影响?

对照现象2和3,说明之前的go代码虽然用了cookiejar,但似乎没能携带cookies去访问,用类似以下代码也不起作用:

    jar.SetCookies(req.URL, []*http.Cookie{
        &http.Cookie{Name: "PHPSESSID", Value: "26c2tkqumv2a2l4o34qtdcbs80", HttpOnly: false},
        &http.Cookie{Name: "security", Value: "impossible", HttpOnly: false},
    })
    client.Jar = jar

用  req.AddCookie手工设置cookies;试了也不行。

    u, err := url.Parse("http://192.168.132.80/login/login.jsp")
    for _, v := range jar.Cookies(u) {
        req.AddCookie(v)
    }

后来参考这里https://www.oschina.net/question/593413_139087,:可以把Transport包装了一下,在RoundTrip开始和结束的位置进行cookie的管理。但不太会用Transport http.RoundTripper 。

最终还是用fiddler拦截请求,发现原来在chrome console中看不到正确的POST地址和http header,当然没法成功登录!注意:

1、POST提交表单时,要设置Content-Type: application/x-www-form-urlencoded

2、在chrome console中可看到:post提交的body(或叫form data)与header部分是有一个空行分隔开的。并且在header中有类似Content-Length: 258的字样。258表示body(或叫form data)的字符数。

 最终的测试代码如下:

 

package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "net/http/cookiejar"
    "strings"
)

var gCurCookies []*http.Cookie
var gCurCookieJar *cookiejar.Jar

func initAll() {
    gCurCookies = nil
    //var err error;
    gCurCookieJar, _ = cookiejar.New(nil)

}

//1 get url response html
func getUrlRespHtml(url string) string {
    fmt.Printf("\ngetUrlRespHtml, url=%s", url)

    var respHtml string = ""

    httpClient := &http.Client{
        CheckRedirect: nil,
        Jar:           gCurCookieJar,
    }

    httpReq, err := http.NewRequest("GET", url, nil)
    httpResp, err := httpClient.Do(httpReq)
    if err != nil {
        fmt.Printf("\nhttp get url=%s response error=%s\n", url, err.Error())
    }
    fmt.Printf("\nhttpResp.Header=%s", httpResp.Header)
    fmt.Printf("\nhttpResp.Status=%s", httpResp.Status)

    defer httpResp.Body.Close()

    body, errReadAll := ioutil.ReadAll(httpResp.Body)
    if errReadAll != nil {
        fmt.Printf("\nget response for url=%s got error=%s\n", url, errReadAll.Error())
    }
    //全局保存
    gCurCookies = gCurCookieJar.Cookies(httpReq.URL)

    respHtml = string(body)
    return respHtml
}

//2
func getUrlRespHtmlWithHeader(url, headers string) string {
    fmt.Printf("\ngetUrlRespHtml, url=%s", url)

    var respHtml string = ""

    httpClient := &http.Client{
        CheckRedirect: nil,
        Jar:           gCurCookieJar,
    }

    httpReq, err := http.NewRequest("GET", url, nil)
    AddHeaders(httpReq, headers)
    httpResp, err := httpClient.Do(httpReq)
    if err != nil {
        fmt.Printf("\nhttp get url=%s response error=%s\n", url, err.Error())
    }
    fmt.Printf("\nhttpResp.Header=%s", httpResp.Header)
    fmt.Printf("\nhttpResp.Status=%s", httpResp.Status)
    fmt.Printf("\nhttpResp.cookies=%s", httpResp.Cookies())

    defer httpResp.Body.Close()

    body, errReadAll := ioutil.ReadAll(httpResp.Body)
    if errReadAll != nil {
        fmt.Printf("\nget response for url=%s got error=%s\n", url, errReadAll.Error())
    }
    //全局保存
    gCurCookies = gCurCookieJar.Cookies(httpReq.URL)

    respHtml = string(body)
    return respHtml
}

//3
func PostUrlRespHtmlWithHeader(url, headers, formdata string) string {
    fmt.Printf("\ngetUrlRespHtml, url=%s", url)

    var respHtml string = ""

    httpClient := &http.Client{
        CheckRedirect: nil,
        Jar:           gCurCookieJar,
    }

    httpReq, err := http.NewRequest("POST", url, ioutil.NopCloser(strings.NewReader(formdata)))
    AddHeaders(httpReq, headers)
    httpReq.Header.Set("ContentType", "application/x-www-form-urlencoded")
    httpResp, err := httpClient.Do(httpReq)
    if err != nil {
        fmt.Printf("\nhttp get url=%s response error=%s\n", url, err.Error())
    }
    fmt.Printf("\nhttpResp.Header=%s", httpResp.Header)
    fmt.Printf("\nhttpResp.Status=%s", httpResp.Status)

    defer httpResp.Body.Close()

    body, errReadAll := ioutil.ReadAll(httpResp.Body)
    if errReadAll != nil {
        fmt.Printf("\nget response for url=%s got error=%s\n", url, errReadAll.Error())
    }
    //全局保存
    gCurCookies = gCurCookieJar.Cookies(httpReq.URL)

    respHtml = string(body)
    return respHtml
}

func dbgPrintCurCookies() {
    var cookieNum int = len(gCurCookies)
    fmt.Printf("cookieNum=%d", cookieNum)
    for i := 0; i < cookieNum; i++ {
        var curCk *http.Cookie = gCurCookies[i]
        fmt.Printf("\n\n\n\n------ Cookie [%d]------", i)
        fmt.Printf("\n\tName=%s", curCk.Name)
        fmt.Printf("\n\tValue=%s", curCk.Value)
        fmt.Printf("\n\tPath=%s", curCk.Path)
        fmt.Printf("\n\tDomain=%s", curCk.Domain)
        fmt.Printf("\n\tExpires=%s", curCk.Expires)
        fmt.Printf("\n\tRawExpires=%s", curCk.RawExpires)
        fmt.Printf("\n\tMaxAge=%d", curCk.MaxAge)
        fmt.Printf("\n\tSecure=%t", curCk.Secure)
        fmt.Printf("\n\tHttpOnly=%t", curCk.HttpOnly)
        fmt.Printf("\n\tRaw=%s", curCk.Raw)
        fmt.Printf("\n\tUnparsed=%s", curCk.Unparsed)
    }
}

func AddHeaders(req *http.Request, headers string) *http.Request {
    //将传入的Header分割成[]ak和[]av
    a := strings.Split(headers, "\n")
    ak := make([]string, len(a[:]))
    av := make([]string, len(a[:]))
    //要用copy复制值;若用等号仅表示指针,会造成修改ak也就是修改了av
    copy(ak, a[:])
    copy(av, a[:])
    //fmt.Println(ak[0], av[0])
    for k, v := range ak {
        i := strings.Index(v, ":")
        j := i + 1
        ak[k] = v[:i]
        av[k] = v[j:]
        //设置Header
        req.Header.Set(ak[k], av[k])
    }
    return req
}

func main() {
    initAll()
    /*
        fmt.Printf("====== step 1:get Cookie ======")
        var MainUrl string = "http://192.168.132.80/login/login.jsp"
        fmt.Printf("\nMainUrl=%s", MainUrl)
        getUrlRespHtmlWithHeader(MainUrl, headers2)
        dbgPrintCurCookies()
    */

    fmt.Printf("\n\n\n====== step 2:get Cookie ======")
    var headers2 = `Accept: text/html, application/xhtml+xml, */*
Referer: http://192.168.132.80/login/login.jsp
Accept-Language: zh-CN
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko
Content-Type: application/x-www-form-urlencoded
Accept-Encoding: gzip, deflate
Host: 192.168.132.80
Content-Length: 258
Connection: Keep-Alive
Pragma: no-cache
Cookie: logincookiecheck=1581819550262+C1D3FCB434C8223BE9C4CE5AD9497183; testBanCookie=test; JSESSIONID=abcrJrk4lxqzZccwgDUax; loginfileweaver=%2Fwui%2Ftheme%2Fecology7%2Fpage%2Flogin.jsp%3FtemplateId%3D6%26logintype%3D1%26gopage%3D; loginidweaver=114; languageidweaver=7`
    var formdata = `loginfile=%2Fwui%2Ftheme%2Fecology7%2Fpage%2Flogin.jsp%3FtemplateId%3D6%26logintype%3D1%26gopage%3D&logintype=1&fontName=%CE%A2%C8%ED%D1%C5%BA%DA&message=&gopage=&formmethod=post&rnd=&serial=&username=&isie=true&loginid=admin&userpassword=1234&submit=`
    var getapiUrl string = "http://192.168.132.80/login/VerifyLogin.jsp "
    PostUrlRespHtmlWithHeader(getapiUrl, headers2, formdata)
    dbgPrintCurCookies()

    fmt.Printf("\n\n\n====== step 3:use the Cookie ======")
    var headers3 = `Host: 192.168.132.80
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9`
    var getapiUrl3 string = "http://192.168.132.80/docs/docs/DocMoreForHp.jsp?eid=660&date2during=0&tabid=2"
    getUrlRespHtmlWithHeader(getapiUrl3, headers3)
    dbgPrintCurCookies()
}

 

 

 

 

 

posted on 2020-02-14 13:50  pu369com  阅读(5182)  评论(0编辑  收藏  举报

导航