import tornado.httpclient as httpclient
import urllib
from pyquery import PyQuery as pq
import torndb
import threading
url = 'http://gmgard.us/gm'
img_url = 'http://gmgard.us'
static_path = 'static/img/'
db = torndb.Connection(host='localhost:3306',user='ypc',password='ypc',database='ypc')
InsertSQL = 'INSERT INTO yellow_pictures(path) VALUES(%s)'
cnt = 10
class crawl_picture(threading.Thread):
def __init__(self,i):
threading.Thread.__init__(self)
self.i = i
def run(self):
cnt--
http_client = httpclient.HTTPClient()
print 'Getting '+url+str(self.i)
response = http_client.fetch(url+str(i))
print "global: %r\n" % globals().keys()
print "local: %r\n" % locals().keys()
if response.effective_url != 'http://gmgard.us/Blog/List':
d = pq(response.body)
img_path = d('#blog').find('img').eq(0).attr('src').encode('utf-8')
url_img = img_url+img_path
filename = static_path+img_path.split('/')[2]
print url_img
#urllib.urlretrieve(url_img,filename)
# db.execute(InsertSQL,img_path.split('/')[2])
else:
print 'Getting Failed.'
http_client.close()
cnt = 10
try:
for i in range(1000,1010):
x = i
print i
if cnt <=0
while True:
if cnt > 0
break
c = crawl_picture(x)
c.start()
cnt--
#print response.body
except httpclient.HTTPError as e:
print "Error:", e