黄聪

论SEO对人类的重要性,请看我的博客:hcsem.com

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::
#-*-coding:utf-8-*-
import urllib2, urllib, cookielib
import re
import getpass
import sqlite3
import random
import time

class Discuz:
def __init__(self,user,pwd,args):
self.username
= user
self.password
= pwd
self.args
= args
self.regex
= {
'loginreg':'<input\s*type="hidden"\s*name="formhash"\s*value="([\w\W]+?)"\s*\/>',
'replyreg':'<input\s*type="hidden"\s*name="formhash"\s*value="([\w\W]+?)"\s*\/>',
'tidreg': '<tbody\s*id="normalthread_\d+">[\s\S]+?<span\s*id="thread_(\d+)">'
}
self.conn
= None
self.cur
= None
self.islogin
= False
self.login()
self.InitDB()

def login(self):
try:
loginPage
= urllib2.urlopen(self.args['loginurl']).read()
formhash
= re.search(self.regex['loginreg'], loginPage)
formhash
= formhash.group(1)
#print 'login formhash:', formhash
print 'start login...'
cj
= cookielib.CookieJar()
opener
= urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
user_agent
= 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Mozilla/4.0 \
(compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.507
'
opener.addheaders
= [('User-agent', user_agent)]
urllib2.install_opener(opener)
logindata
= urllib.urlencode({
'cookietime': 2592000,
'formhash': formhash,
'loginfield':'username',
'username': self.username,
'password': self.password,
'questionid': 0,
'referer': self.args['referer']
})
request
= urllib2.Request(self.args['loginsubmiturl'],logindata)
response
= urllib2.urlopen(request)
self.islogin
= True
print 'login success...'
except Exception,e:
print 'loggin error: %s' % e

def PostReply(self, fid, tid, content):
try:
sql
= "select * from post where fid='%s' and tid='%s'" % (fid,tid)
self.cur.execute(sql)
if self.cur.rowcount == -1:
tidurl
= self.args['tidurl'] % tid
replysubmiturl
= self.args['replysubmiturl'] % (fid,tid)
tidPage
= urllib2.urlopen(tidurl).read()
formhash
= re.search(self.regex['replyreg'], tidPage)
formhash
= formhash.group(1)
#print 'reply formhash:', formhash
print 'start reply...'
replydata
= urllib.urlencode({
'formhash': formhash,
'message': content,
'subject': '',
'usesig':'1'
})
request
= urllib2.Request(replysubmiturl,replydata)
response
= urllib2.urlopen(request)
sql
= "insert into post values ('%s', '%s', '%d')" % (fid, tid, 1)
self.cur.execute(sql)
self.conn.commit()
print 'reply success for [%s]' % tidurl
else:
print 'Skip! Thread:%s is already replied...' % tid
except Exception, e:
print 'reply error: %s' % e

def GetTids(self, fid):
if self.islogin:
fidurl
= self.args['fidurl'] % fid
response
= urllib2.urlopen(fidurl)
content
= response.read()
tids
= re.findall(self.regex['tidreg'], content)
return tids
else:
print 'Error Please Login...'

def InitDB(self):
self.conn
= sqlite3.connect('data.db')
self.cur
= self.conn.cursor()
sql
= '''create table if not exists post (
fid text,
tid text,
replied integer)
'''
self.cur.execute(sql)
self.conn.commit()

if __name__ == '__main__':
username
= raw_input('username:').strip()
password
= getpass.getpass('password:').strip()
args
= {
'loginurl': 'http://www.xxx.com/logging.php?action=login',
'loginsubmiturl': 'http://www.xxx.com/logging.php?action=login&loginsubmit=yes',
'fidurl': 'http://www.xxx.com/forum-%s-1.html',
'tidurl': 'http://www.xxx.com/thread-%s-1-1.html',
'replysubmiturl': 'http://www.xxx.com/post.php?action=reply&replysubmit=yes&infloat=yes&handlekey=fastpost&fid=%s&tid=%s',
'referer':'http://www.xxx.com/index.php'
}
dz
= Discuz(username, password,args)
fid
= '45'
tids
= dz.GetTids('45')
replylist
= [
u
'不错,支持一下,呵呵',
u
'已阅,顶一下',
u
'看看,顶你,呵呵',
u
'多谢分享,顶一下',
u
'说的不错,支持一下',
u
'提着水桶到处转,哪里缺水哪里灌! ',
u
'你太油菜了!'
]
for tid in tids:
content
= random.choice(replylist)
content
= content.encode('gbk')
dz.PostReply(
'45',tid, content)
time.sleep(
20)

下面简单说下过程:
首先是得到了login的post地址:http://www.xxx.com/logging.php?action=login&loginsubmit=yes
几个关键的parameter是

formhash
cookietime
formhash
loginfield
password
questionid
referer
username

  • cookietime 浏览器自动给的是 2592000
  • loginfield 默认的username
  • password 密码
  • questionid 这个貌似是登录时的回答问题,这个论坛没有强制回答所以用默认的0
  • referer 这个则是引用地址 http://www.xxx.com/index.php
  • username 用户名
  • formhash 最后这个貌似这个是随机的,不固定,可也是个关键参数,所以就直接用正则查找之
args = {
'loginurl': 'http://www.xxx.com/logging.php?action=login',
'loginsubmiturl': 'http://www.xxx.com/logging.php?action=login&amp;loginsubmit=yes',
'fidurl': 'http://www.xxx.com/forum-%s-1.html',
'tidurl': 'http://www.xxx.com/thread-%s-1-1.html',
'replysubmiturl': 'http://www.xxx.com/post.php?action=reply&amp;replysubmit=yes&amp;infloat=yes&amp;handlekey=fastpost&amp;fid=%s&amp;tid=%s',
'referer':'http://www.xxx.com/index.php'
}
  • loginurl为登录面页,用于获得formhash的值
  • loginsubmiturl为post登录参数的地址
  • fidurl这个是版块的ID,url中%s那里即为fid,这样的url http://www.xxx.com/forum-45-1.html,fid即为45
  • tidurl是帖子的id,查找方法同上
  • replysubmiturl这个是回复帖子post参数的url,要定位一个帖子前提得知道fid和tid
  • referer这个是引用地址,用网站的首页即可

原创文章,转载请注明: 转载自LazyHack.Net

posted on 2011-09-03 21:57  黄聪  阅读(6723)  评论(3编辑  收藏  举报