class UrlManager():
#url管理器
def __init__():#设置新老url数组,分别为未爬取和已爬取
self.new_urls = set()
self.old_urls = set()
def add_new_url(self,url):#添加单个url
if url is None or len(url) == 0:
return
if url in self.new_urls or url in self.old_urls:
return
self.new_urls.add(url)
def add_new_urls(self,urls):#添加多个url
if urls is None or len(urls) == 0:
return
for url in urls:
self.add_new_url(url)
def get_url(self):#得到新的url最后一位
if self.has_new_url():
url = self.new_urls.pop()
self.old_urls.add(url)
return url
else:
return Nome
def has_new_url(self):#查看新url数组是否还有
return len(self.new_urls) > 0
if __name__=="__main__":
url_manger = UrlManager()