#在进行操作之前,先把mongodb数据库启动起来,新建一个mongo_cache.py文件
import pickle
import zlib
from datetime import datetime,timedelta
import requests
from pymongo import MongoClient
from bson.binary import Binary
class MongoCache(object):
"""
数据库缓存
"""
def __init__(self,client=None,expires=timedelta(days=30)):
self.client = MongoClient("localhost",27017)
self.db = self.client.cache
##加速查找设置索引,设置超时时间,如果达到expi reAfterSeconds设置的超时时间,mongodb会自动删除超时数据
self.db.webpage.create_index('timestamp',expireAfterSeconds=expires.total_seconds())
def __setitem__(self, key, value):
# 压缩数据设置时间戳
record = {"result":Binary(zlib.compress(pickle.dumps(value))),"timestamp":datetime.utcnow()}
##使用update的upsert (如果不存在执行insert,存在update)参数迸行插入更新操作,$set内置函数表示覆盖原始数据
self.db.webpage.update({"_id":key},{'$set':record},upsert=True)
def __getitem__(self, item):
#根据_id以item作为关键字,查找相关网页
record = self.db.webpage.find_one({"_id":item})
if record:
#如果存在进行解压缩反序列化
return pickle.loads(zlib.decompress(record["result"]))
else:
raise KeyError(item + "does not exist")#找不到抛出异常
def __contains__(self, item):
try:
self[item]#执行__getitem__方法
except KeyError:
return False#捕获到keyerror异常
else:
return True#找到相应数据说明说句酷白喊下载内容
def clear(self):
self.db.webpage.drop()
if __name__ == '__main__':
mongo_cache = MongoCache()
url = 'http://tieba.baidu.com/f?kw=猫&red_tag=1'
response = requests.get(url)
mongo_cache[url] = response.text
print(mongo_cache[url])
#在建一个文件
import requests
import mongo_cache
download_url = "http://tieba.baidu.com/f?kw=猫&red_tag=2"
download_response = requests.get(download_url)
m_cache = mongo_cache. MongoCache()
m_cache [download_url] = download_response.content
print (m_cache [download_url]. decode('utf-8'))
print(download_url in m_cache)