孤独的猫

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::

实现抓图的工具

#encoding:UTF-8

import urllib
import urllib2
import re
import os
from BeautifulSoup import BeautifulSoup

def GetUrlContent(url,path):
#url = "http://www.2cto.com/meinv/sexmv/"
req = urllib2.urlopen(url)
content = req.read()
soup = BeautifulSoup(content)
# print soup.pret()
#查找左右链接,并且不含title属性
alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None)
i = 0
for a in alinks:
surl = a['href']
print surl
GetUrl(surl,path)
print " "
print " "
#print surl

def createFileWithFileName(localPathParam,fileName):
totalPath=localPathParam+'\\'+fileName
if not os.path.exists(totalPath):
file=open(totalPath,'a+')
file.close()
return totalPath

def GetFileName(url):
sFilename=os.path.basename(url)
return sFilename

def GetUrl(myUrl,localPath):
#url = "http://www.2cto.com/meinv/sexmv/1819.html"
try:
req = urllib2.urlopen(myUrl,None,5)
content = req.read()
soup = BeautifulSoup(content)
alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")})

for d in alinks:
imgUrl=d["src"]
print imgUrl
fileName=GetFileName(imgUrl)
print fileName
urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName))
except Exception,e:
print "Error"

if __name__=='__main__':
#GetUrl("http://www.2cto.com/meinv/sexmv/1810.html")
print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")
posted on 2016-04-10 15:22  孤独的猫  阅读(680)  评论(0编辑  收藏  举报