摘要:
import requests,re,time,pymongofrom bs4 import BeautifulSoup as bs#计数用num = 0str_time = time.time()#连接mongodbclient = pymongo.MongoClient(host='localh 阅读全文
摘要:
import requestsimport refrom bs4 import BeautifulSoup as bsimport tracebackdef getHTMLtext(url,code = "utf-8 "): try: r = requests.get(url) r.raise_fo 阅读全文
摘要:
import requestsimport re headers = {'cookie': 'l=Aj8/z1CVFeqHt7/Nk9kSI9v3TxnJEZPG; miid=5178119511105888855; cna=cDBEEgUJsxMCARsRgoXUNkvN; x=e%3D1%26p 阅读全文
摘要:
import scrapyimport refrom collections import Counterfrom lianjia.items import LianjiaItemclass LianjiaSpiderSpider(scrapy.Spider): name = 'lianjia_sp 阅读全文
摘要:
import scrapyimport json,time,refrom zhihuinfo.items import ZhihuinfoItemclass ZhihuSpider(scrapy.Spider): name = 'zhihu' allowed_domains = ['www.zhih 阅读全文
摘要:
import requests,time,re,json,pymongofrom urllib.parse import urlencodefrom requests.exceptions import RequestExceptionfrom bs4 import BeautifulSoup as 阅读全文
摘要:
import requestsfrom bs4 import BeautifulSoup as bsimport reimport timeimport pandas as pdheaders ={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) 阅读全文
摘要:
import requests,re,timeheader ={ "Cookie":"登陆过账号后的cookie 必须填写", "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Geck 阅读全文