在GAE中用Python编写webapp进行Post数据采集

#!/usr/bin/env python
# -*- coding: cp936 -*-
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import webapp2
#from lxml import etree
from google.appengine.api import urlfetch
import re


class MainHandler(webapp2.RequestHandler):
    def get(self):
        #self.response.write(urlfetch.fetch('https://www.cnblogs.com/').content)#Get
        form_data = 'ssss=aaa&bb=ccc'
        result = urlfetch.fetch(url='https://www.cnblogs.com',
                            payload=form_data,
                            method=urlfetch.POST,
                            follow_redirects=False, 
                            headers={'Content-Type': 'application/x-www-form-urlencoded'})
        #self.response.headers['Content-Type'] = 'application/json'
        html="<a href='javascript:void(0)' onclick='opennewpage('54999')'>ddd</a><a onclickk='opennewpage('123456')'>aaa</a>"
        reg=r"opennewpage\('(\d+)'\)"#正则
        result='Result:<br />'
        for m in list(set(re.findall(reg,html))):
            result=result+ m+'<br />'
            
        self.response.write(result)

        #ids= result.content.xpath("//a[start-with(@onclick,'opennewpage')]")
        #for i in ids:
        #  print(i.text)        
        #self.response.write(result.content)

app = webapp2.WSGIApplication([
    ('/', MainHandler)
], debug=True)

 

posted @ 2018-06-22 17:35  随便取个名字算了  阅读(302)  评论(0编辑  收藏  举报