油猴脚本爬虫

脚本内容

// ==UserScript==
// @name         大众点评评论爬虫
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  crawl is greate
// @author       陈祥安
// @include      http://www.dianping.com/shop*
// @match        http://www.dianping.com/ajax/json/shopDynamic/allReview*
// @require      http://cdn.bootcss.com/jquery/1.11.2/jquery.js
// @grant        GM_xmlhttpRequest


// ==/UserScript==

(function() {
    var $x = function (xpath, context) {
        var nodes = [];
        try {
            var doc = (context && context.ownerDocument) || window.document;
            var results = doc.evaluate(xpath, context || doc, null, XPathResult.ANY_TYPE, null);
            var node;
            while (node = results.iterateNext()) {
                nodes.push(node);
            }
        } catch (e) {
             throw e;
        }
        return nodes;
    }

    var server_url = 'http://127.0.0.1:9090/comment/'

    window.addEventListener('load', (event) => {
       //关闭弹窗
       let close_btn = $(".J-bonus-close")
       console.log("准备关闭",close_btn)
       if(close_btn){
                  close_btn.click();
       }
       let li_item_list=$x("//ul[@class='comment-list J-list']/li[@class='comment-item']/div[@class='content']//p[@class='desc']");
       var dataList = [];
       li_item_list.forEach(v=>{
             console.log(v);
             dataList.push({"data":v.innerText})
       });


        GM_xmlhttpRequest({
            method: "POST",
            url: server_url,
            data : JSON.stringify({'name':"爬虫","dataList":dataList}),
            onload: function(response) {
                //这里写处理函数
                //document.getElementById('text').innerHTML = this.responseText;
                console.log(response);
                console.log("dataList",dataList);
                //window.close();
            }
        });
    });
})();

python代码

# @Author : cxa
# @File : server.py
# @Software: PyCharm
import json

from flask import Flask, request, render_template

app = Flask(__name__)


@app.route('/')
def index():
    return "<h1>大众点评API</h1>"


@app.route('/comment/', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        form_data = request.get_data()
        result = json.loads(form_data.decode("utf-8"))
    print(result)
    return result


@app.errorhandler(404)
def miss(e):
    return render_template('404.html'), 404


@app.errorhandler(500)
def error(e):
    return render_template('500.html'), 500


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9090, debug=True)

posted @ 2019-12-10 15:15  公众号python学习开发  阅读(2046)  评论(0编辑  收藏  举报