1.安装:
pip install pybloom
or:
https://pypi.python.org/pypi/pybloom/1.0.2
2.使用:
from pybloom import BloomFilter
bl = BloomFilter(capacity=10000, error_rate=0.001) #容器大小10000条,错误率为0.001
for i in datalist:
bl.add(i)
for i in newdata:
if i in bl:
print 'has this data'
else:
bl.add(i)
-----------
try:
bl = BloomFilter(capacity=1000, error_rate=0.001)
with open('allfile','a+') as fd:
[bl.add(x)for x in fd.readlines()]
if os.path.isdir(path):
filelist = os.listdir(path)
for i in filelist:
with open(path+'/'+i,'r') as fdd:
for c in fdd.readlines():
con = c.strip('\n')
url = urlparse(con)
print url.netloc
if url.netloc in bl:
pass
else:
bl.add(url.netloc)
fd.write(url.netloc+'\n')
fd.flush()
elif os.path.isfile(path):
print 'file..'
except Exception,e:
print str(e)
------------
浙公网安备 33010602011771号