使用Python抓取网页图片

今天写了一个Python小程序，用来抓取网页图片.

import win32com.client,time,win32inet,win32file,os
2

class ImgDownloader:
3

def __init__(self,url,dir):
4

self.__dir=dir
5

self.__ie=win32com.client.Dispatch('InternetExplorer.Application')
6

self.__ie.Navigate(url)
7

self.__wait__()
8

def __wait__(self):
10

while self.__ie.Busy:
11

time.sleep(0.1)
12

def start(self):
14

self.__wait__()
15

imgs=self.__ie.Document.getElementsByTagName('img')
16

for i in range(imgs.length):
18

try:
19

cachInfo=win32inet.GetUrlCacheEntryInfo(imgs[i].src)
20

if cachInfo:
21

path=cachInfo['LocalFileName']
22

pathinfo=path.split('\\')
23

pathinfo.reverse()
24

filename=('[%d]' % i) + pathinfo[0]
25

win32file.CopyFile(path,os.path.join(self.__dir,filename),True)
27

except:
28

pass
29

def close(self):
30

self.__ie.Quit()
31

if __name__=='__main__':
33

d=ImgDownloader('http://image.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baiduimage&pv=&word=boy&z=0','c:\\temp\\')
34

d.start()
35

d.close()

原理：在Python使用com 接口运行IE浏览器，然后打开网页，获取网页所有图片的URL，最后利用win32api函数GetUrlCacheEntryInfo找出图片相应的本地缓存文件,复制到指定目录。

posted @ 2009-06-16 13:51 flysun 阅读(14854) 评论(15) 编辑收藏举报

刷新页面返回顶部