import urllib2
class UseProxy(object):
def __init__(self):
self.user = 'aaaa'
self.password = 'bbbb'
self.proxyserver = 'xxx.yyy.zzz:8080'
self.content = ''
def getproxy(self):
proxy = 'http://{}:{}@{}'.format(self.user, self.password, self.proxyserver)
proxy_handler = urllib2.ProxyHandler({'http': proxy})
opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler)
# self.content = opener.open(self.url).read().decode('utf-8')
return opener
UseProxy
from urlparse import urljoin
import re
from UseProxy import *
from bs4 import BeautifulSoup
class GetZealerVideo(object):
def __init__(self):
self.url = 'http://www.zealer.com'
self.content = ''
self.lists = []
def splitcontent(self, proxyset):
# self.proxyset = UseProxy()
self.content = proxyset.getproxy().open(self.url).read().decode('utf-8')
# self.useproxy()
soup = BeautifulSoup(self.content, "html.parser")
founddiv = soup.findAll('div', {'class': 'subject'})
foundli = soup.findAll('div', {'id': re.compile("^li_layer")})
l = len(founddiv)
self.lists = []
if l == len(foundli):
for i in range(l):
b = re.findall('/post/\d+'), str(foundli[i]))[1]
self.lists.append(urljoin(self.url, b))
self.lists.append(founddiv[i].contents[0].encode('utf-8'))
return self.lists
if __name__ == '__main__':
gvideo = GetZealerVideo()
proxyset = UseProxy()
print '.'.join(gvideo.splitcontent(proxyset)).decode('utf-8')
GetZealerVideo
from UseProxy import *
from bs4 import BeautifulSoup
class GetMydrivers(object):
def __init__(self):
self.url = 'http://www.mydrivers.com'
self.content = ''
self.lists = []
def splitcontent(self, proxyset):
# self.useproxy()
self.content = proxyset.getproxy().open(self.url).read()
soup = BeautifulSoup(self.content, "html.parser", from_encoding="gb18030")
print soup.original_encoding
founddiv = soup.findAll('span', {'class': 'titl'})
for i in range(len(founddiv)):
self.lists.append(founddiv[i].contents[0])
return self.lists
if __name__ == '__main__':
gnews = GetMydrivers()
proxyset = UseProxy()
lists = gnews.splitcontent(proxyset)
for l in lists:
print str(l).decode('utf-8').encode('gb18030')
GetMydrivers
# -*- coding: utf-8 -*-
from Tkinter import *
from time import ctime
import os
import re
import GetZealerVideo as soup
import GetMydrivers as mnews
from UseProxy import *
class GetResource(object):
def __init__(self):
self.win = Tk()
self.l1 = StringVar(self.win)
self.msg = ""
self.frame = Frame(width=800, height=600, bg='white')
# self.frame.grid_propagate(False)
# self.frame.grid()
self.frame.propagate(False)
self.frame.pack()
self.scroll = Scrollbar(self.frame)
self.scroll.pack(side=RIGHT, fill=Y)
# self.scroll.grid(row=0, column=1)
self.listbox = Listbox(self.frame, selectbackground='blue', font='12', heigh=550, width=750, yscrollcommand=self.scroll.set,
xscrollcommand=self.scroll.set)
self.listbox.pack(side=TOP, fill=BOTH)
# self.listbox.grid(row=0, column=0)
self.listbox.bind('<Double-1>', self.get_select)
self.frame2 = Frame(width=800, height=50, bg='white')
self.frame2.propagate(False)
self.frame2.pack()
# self.frame2.grid_propagate(False)
# self.frame2.grid()
Button(self.frame2, text=u'Get Zealer', command=self.zealer_video).pack(expand=YES)
# Button(self.frame2, text=u'Get Zealer', command=self.zealer_video).grid(row=0, column=0)
Button(self.frame2, text=u'Get Mydrivers', command=self.my_drivers).pack(expand=YES)
# Button(self.win, text=u'Get Mydrivers', command=self.my_drivers).grid(row=1, column=1)
def my_drivers(self):
print 'start get at:', ctime()
self.listbox.delete(0, END)
self.getm = mnews.GetMydrivers()
proxyset = UseProxy()
for l in self.getm.splitcontent(proxyset):
s = str(l).decode('utf-8')
try:
self.listbox.insert(END, re.findall(r'(?<=href=").+?(?=">)', s)[0]+"\r\n")
self.listbox.insert(END, re.findall(r'(?<=>).+?(?=<)', s)[0]+"\r\n")
self.listbox.update()
except IndexError:
pass
print 'get done at:', ctime()
def zealer_video(self):
print 'start get at:', ctime()
self.listbox.delete(0, END)
self.getz = soup.GetZealerVideo()
proxyset = UseProxy()
for l in self.getz.splitcontent(proxyset):
self.listbox.insert(END, l+"\r\n")
self.listbox.update()
print 'get done at:', ctime()
def get_select(self, ev=None):
self.listbox.config(selectbackground='red')
print self.listbox.curselection()
self.check = self.listbox.get(self.listbox.curselection())
if self.check:
if re.match('http', self.check):
os.startfile(self.check)
def main():
d = GetResource()
mainloop()
if __name__ == '__main__':
main()