摘要: 所有城市列表CITY_CODE = ['quanguo', 'shijiazhuang', 'tangshan', 'qinhuangdao', 'handan', 'xingtai', 'baoding', 'zhangjiakou', 'chengde', 'cangzhou', 'langfa 阅读全文
posted @ 2019-02-18 15:58 Bob__Zhang 阅读(264) 评论(0) 推荐(0) 编辑
摘要: import requests from lxml import etree import re # 爬取糗事百科所有列表页信息 class Qiushi(): def __init__(self, base_url): self.base_url = base_url self.max_page = self.get_max_page() ... 阅读全文
posted @ 2018-12-15 01:24 Bob__Zhang 阅读(301) 评论(0) 推荐(0) 编辑
摘要: 新华书店 Harry Potter 29.99 Learning XML 39.95 python 大全 99.95 阅读全文
posted @ 2018-12-13 16:31 Bob__Zhang 阅读(265) 评论(0) 推荐(0) 编辑
摘要: <html> <body> <div> <ul class="bold"> <li class="item-0"> <span class="bold">这是一个span标签</span> <a href="link1.html">first item</a> </li> <li class="it 阅读全文
posted @ 2018-12-13 16:21 Bob__Zhang 阅读(561) 评论(0) 推荐(0) 编辑
摘要: # 带入需要使用的包 from urllib import request, parse import os # 基础知识 # 变量赋值 # 字符串赋值 爬取的关键字 kw = 'lol' # 数值赋值 爬取的页数范围 start = 1 end = 4 # 输出 # print(kw, start, end) # 声明需要爬取的连接 base_url = 'https://tieba.b... 阅读全文
posted @ 2018-12-12 16:23 Bob__Zhang 阅读(314) 评论(0) 推荐(0) 编辑
摘要: # 带入需要使用的包from urllib import request, parseimport os# 基础知识# 变量赋值# 字符串赋值 爬取的关键字kw = 'lol'# 数值赋值 爬取的页数范围start = 1end = 4# 输出# print(kw, start, end)# 声明需要爬取的连接base_url = 'https://tieba.baidu.com/f?'... 阅读全文
posted @ 2018-12-12 15:56 Bob__Zhang 阅读(230) 评论(0) 推荐(0) 编辑
摘要: import itchat from itchat.content import * # 微信自动回复 @itchat.msg_register([TEXT]) def text_reply(msg): print(msg) text = msg['Text'] print(text) to_user = msg["ToUserName"] if "年" ... 阅读全文
posted @ 2018-12-12 15:23 Bob__Zhang 阅读(962) 评论(0) 推荐(0) 编辑
摘要: import itchat from itchat.content import * import re msg_infomation = {} # 监听发送消息 @itchat.msg_register([TEXT]) def handle_receive_msg(msg): print(msg) msg_from = itchat.search_friends(userNam... 阅读全文
posted @ 2018-12-12 15:20 Bob__Zhang 阅读(440) 评论(0) 推荐(0) 编辑
摘要: # 使用itchat获取微信好友的男女比例 import itchat itchat.auto_login(hotReload=True) friends = itchat.get_friends(update=True)[0:] # 初始化 男和女的数量 male = female = other = 0 for i in friends[1:]: print(i) sex =... 阅读全文
posted @ 2018-12-12 15:19 Bob__Zhang 阅读(713) 评论(0) 推荐(0) 编辑
摘要: # 导入我们需要的模块 import re import requests # 一、获取网页内容 # (1)声明目标url,就是爬取的网站地址 base_url = "http://maoyan.com/board" # (2)模仿浏览器 headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537... 阅读全文
posted @ 2018-12-12 15:05 Bob__Zhang 阅读(313) 评论(0) 推荐(0) 编辑