python模拟163登陆获取邮件列表

利用cookielib和urllib2模块模拟登陆163的例子有很多，近期看了《python模拟登陆163邮箱并获取通讯录》一文，受到启发，试着对收件箱、发件箱等进行了分析，并列出了所有邮件列表及状态，包括发件人、收件人、主题、发信时间、已读未读等状态。

1、参考代码：http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BF

  1 #-*- coding:UTF-8 -*-
  2 import urllib,urllib2,cookielib
  3 import xml.etree.ElementTree as etree #xml解析类
  4 
  5 class Login163:
  6    #伪装browser
  7     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
  8     username = ''
  9     passwd = ''
 10     cookie = None #cookie对象
 11     cookiefile = './cookies.dat' #cookie临时存放地
 12     user = ''
 13     
 14     def __init__(self,username,passwd):
 15         self.username = username
 16         self.passwd = passwd
 17         #cookie设置
 18         self.cookie = cookielib.LWPCookieJar() #自定义cookie存放
 19         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
 20         urllib2.install_opener(opener)
 21 
 22    #登陆    
 23     def login(self):       
 24 
 25         #请求参数设置
 26         postdata = {
 27             'username':self.username,
 28             'password':self.passwd,
 29             'type':1
 30             }
 31         postdata = urllib.urlencode(postdata)
 32 
 33         #发起请求
 34         req = urllib2.Request(
 35                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
 36                 data= postdata,#请求数据
 37                 headers = self.header #请求头
 38             )
 39 
 40         result = urllib2.urlopen(req).read()
 41         result = str(result)
 42         self.user = self.username.split('@')[0]
 43 
 44         self.cookie.save(self.cookiefile)#保存cookie
 45         
 46         if '登录成功，正在跳转...' in result:
 47             #print("%s 你已成功登陆163邮箱。---------\n" %(user))
 48             flag = True
 49         else:
 50             flag = '%s 登陆163邮箱失败。'%(self.user)
 51            
 52         return flag
 53 
 54    #获取通讯录
 55     def address_list(self):
 56 
 57         #获取认证sid
 58         auth = urllib2.Request(
 59                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
 60                 headers = self.header
 61             )
 62         auth = urllib2.urlopen(auth).read()
 63         for i,sid in enumerate(self.cookie):#enumerate()用于同时返数字索引与数值，实际上是一个元组:((0,test[0]),(1,test[1]).......)这有点像php里的foreach 语句的作用
 64             sid = str(sid)
 65             if 'sid' in sid:
 66                 sid = sid.split()[1].split('=')[1]
 67                 break
 68         self.cookie.save(self.cookiefile)
 69         
 70         #请求地址
 71         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
 72         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages/>这类信息)
 73         #这里参数也是在firebug下查看的。
 74         postdata = {
 75             'func':'global:sequential',
 76             'showAd':'false',
 77             'sid':sid,
 78             'uid':self.username,
 79             'userType':'browser',
 80             'var':'<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
 81             }
 82         postdata = urllib.urlencode(postdata)
 83         
 84         #组装请求
 85         req = urllib2.Request(
 86             url = url,
 87             data = postdata,
 88             headers = self.header
 89             )
 90         res = urllib2.urlopen(req).read()
 91         
 92         #解析XML，转换成json
 93         #说明：由于这样请求后163给出的是xml格式的数据，
 94         #为了返回的数据能方便使用最好是转为JSON
 95         json = []
 96         tree = etree.fromstring(res)
 97         obj = None
 98         for child in tree:
 99             if child.tag == 'array':
100                 obj = child            
101                 break
102         #这里多参考一下，etree元素的方法属性等，包括attrib,text,tag,getchildren()等
103         obj = obj[0].getchildren().pop()
104         for child in obj:
105             for x in child:
106                 attr = x.attrib
107                 if attr['name']== 'EMAIL;PREF':
108                     value = {'email':x.text}
109                     json.append(value)
110         return json
111         
112 #Demo
113 print("Requesting......\n\n")
114 login = Login163('xxxx@163.com','xxxxx')
115 flag = login.login()
116 if type(flag) is bool:
117     print("Successful landing,Resolved contacts......\n\n")
118     res = login.address_list()
119     for x in res:
120         print(x['email'])
121 else:
122     print(flag)

View Code

2、分析收件箱、发件箱等网址

在参考代码中，获取通讯录的url为

url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username，通过对邮箱地址的分析，发现收件箱、发件箱等的url为url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username，其中func=

mbox:listMessages。其对收件箱、发件箱的具体区分在下面的postdata中，具体为：

（1）收件箱

postdata = {
'func':'global:sequential',
'showAd':'false',
'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
'uid':self.username,
'userType':'browser',
'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
}

（2）发件箱

postdata = {
'func':'global:sequential',
'showAd':'false',
'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
'uid':self.username,
'userType':'browser',
'var':'<!--?xml version="1.0"?--><object><int name="fid">3</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
}

可以看出，两段代码的不同之处就是fid的取值不同，其中收件箱为1，发件箱为3，草稿箱为2。

3、xml解析

利用ElementTree 类来进行xml到字典的转换。在获取通讯录的实例中，主要使用了这一方法。本例子（具体代码见后文）在收取邮件列表时，并没有用这一方法，仍然使用的是字符串的处理方法。但这里还是列一下ElementTree 类对xml的处理。如（参考地址：http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html）：

-<result>
   <code>S_OK</code>
 -<array name="var">
  -<object>
     <string name="code">S_OK</string>
    -<array name="var">
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     +<object></object>
     </array>
   </object>
  +<object></object>
  </array>
 </result>

解决方法：

 1 #-*- coding:UTF-8 -*-
 2 
 3 import xml.etree.ElementTree as etree #xml解析类
 4 def xml2json(xml):
 5     json = []
 6     tree = etree.fromstring(xml) #如果是文件可用parse(source)
 7     obj = None
 8     for child in tree:
 9         if child.tag == 'array':
10             obj = child            
11             break
12     #这里多参考一下，etree元素的方法属性等，包括attrib,text,tag,getchildren()等
13     obj = obj[0].getchildren().pop()
14     for child in obj:
15         for x in child:
16             attr = x.attrib
17             if attr['name']== 'EMAIL;PREF':
18                 value = {'email':x.text}
19                 json.append(value)
20     return json

4、收件箱邮件列表

本例子只列出了收件箱邮件列表，如果需要，可根据以上介绍调整fid值，列出发件箱、草稿箱等的邮件列表。程序在windosxp、py2.6环境下调查通过，运行后，会在当前目录下生成三个文件：inboxlistfile.txt记录收件箱邮件列表，addfile.txt记录通讯录，cookies.dat记录cookies。具体代码如下：

  1 #-*- coding:UTF-8 -*-
  2 #@小五义 http://www.cnblogs.com/xiaowuyi
  3 #163邮件列表
  4 import urllib,urllib2,cookielib
  5 import xml.etree.ElementTree as etree #xml解析类
  6 
  7 class Login163:
  8    #伪装browser
  9     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
 10     username = ''
 11     passwd = ''
 12     cookie = None #cookie对象
 13     cookiefile = './cookies.dat' #cookie临时存放地
 14     user = ''
 15     
 16     def __init__(self,username,passwd):
 17         self.username = username
 18         self.passwd = passwd
 19         #cookie设置
 20         self.cookie = cookielib.LWPCookieJar() #自定义cookie存放
 21         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
 22         urllib2.install_opener(opener)
 23 
 24    #登陆    
 25     def login(self):       
 26 
 27         #请求参数设置
 28         postdata = {
 29             'username':self.username,
 30             'password':self.passwd,
 31             'type':1
 32             }
 33         postdata = urllib.urlencode(postdata)
 34 
 35         #发起请求
 36         req = urllib2.Request(
 37                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
 38                 data= postdata,#请求数据
 39                 headers = self.header #请求头
 40             )
 41 
 42         result = urllib2.urlopen(req).read()
 43         result = str(result)
 44         #print result
 45         self.user = self.username.split('@')[0]
 46 
 47         self.cookie.save(self.cookiefile)#保存cookie
 48         
 49         if '登录成功，正在跳转...' in result:
 50             #print("%s 你已成功登陆163邮箱。---------n" %(user))
 51             flag = True
 52         else:
 53             flag = '%s 登陆163邮箱失败。'%(self.user)
 54            
 55         return flag
 56 
 57    #获取通讯录
 58     def address_list(self):
 59 
 60         #获取认证sid
 61         auth = urllib2.Request(
 62                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
 63                 headers = self.header
 64             )
 65         auth = urllib2.urlopen(auth).read()
 66 
 67         #authstr=str(auth)
 68         #print authstr
 69         
 70         for i,sid in enumerate(self.cookie):
 71             sid = str(sid)
 72             #print 'sid:%s' %sid
 73             if 'sid' in sid:
 74                 sid = sid.split()[1].split('=')[1]
 75                 break
 76         self.cookie.save(self.cookiefile)
 77         
 78         #请求地址
 79         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
 80         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息)
 81         #这里参数也是在firebug下查看的。
 82         postdata = {
 83             'func':'global:sequential',
 84             'showAd':'false',
 85             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
 86             'uid':self.username,
 87             'userType':'browser',
 88             'var':'<!--?xml version="1.0"?--><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
 89             }
 90         postdata = urllib.urlencode(postdata)
 91         
 92         #组装请求
 93         req = urllib2.Request(
 94             url = url,
 95             data = postdata,
 96             headers = self.header
 97             )
 98         res = urllib2.urlopen(req).read()
 99 
100         #print str(res)
101         
102         #解析XML，转换成json
103         #说明：由于这样请求后163给出的是xml格式的数据，
104         #为了返回的数据能方便使用最好是转为JSON
105         json = []
106         tree = etree.fromstring(res)
107 
108         
109         
110         obj = None
111         for child in tree:
112             if child.tag == 'array':
113                 obj = child            
114                 break
115         #这里多参考一下，etree元素的方法属性等，包括attrib,text,tag,getchildren()等
116         obj = obj[0].getchildren().pop()
117         for child in obj:
118             for x in child:
119                 attr = x.attrib
120                 if attr['name']== 'EMAIL;PREF':
121                     value = {'email':x.text}
122                     json.append(value)
123         return json
124 
125     def minbox(self):#收件箱，fid为1，发件箱为3，草稿箱为2
126         #获取认证sid
127         auth = urllib2.Request(
128                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
129                 headers = self.header
130             )
131         auth = urllib2.urlopen(auth).read()
132 
133         #authstr=str(auth)
134         #print authstr
135         
136         for i,sid in enumerate(self.cookie):
137             sid = str(sid)
138             #print 'sid:%s' %sid
139             if 'sid' in sid:
140                 sid = sid.split()[1].split('=')[1]
141                 break
142         self.cookie.save(self.cookiefile)
143         
144         #请求地址
145         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username
146         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息)
147         #这里参数也是在firebug下查看的。
148         postdata = {
149             'func':'global:sequential',
150             'showAd':'false',
151             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
152             'uid':self.username,
153             'userType':'browser',
154             'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
155             }
156         postdata = urllib.urlencode(postdata)
157         
158         #组装请求
159         req = urllib2.Request(
160             url = url,
161             data = postdata,
162             headers = self.header
163             )
164         res = urllib2.urlopen(req).read()
165 
166         liststr=str(res).split('<object>')#用object进行分割
167         inboxlistcount=len(liststr)-1#记录邮件封数
168         inboxlistfile=open('inboxlistfile.txt','a')
169         t=0  #记录当前第几封信
170         for i in liststr:
171             if 'xml' in i and ' version=' in i:
172                 inboxlistfile.write('inbox 共'+str(inboxlistcount)+'信')
173                 inboxlistfile.write('\n')
174             if 'name="id"' in i:
175                 t=t+1
176                 inboxlistfile.write('第'+str(t)+'封：')
177                 inboxlistfile.write('\n')
178                 #写入from
179                 beginnum=i.find('name="from"')
180                 endnum=i.find('</string>',beginnum)
181                 inboxlistfile.write('From:'+i[beginnum+12:endnum])
182                 inboxlistfile.write('\n')
183                 #写入to
184                 beginnum=i.find('name="to"')
185                 endnum=i.find('</string>',beginnum)
186                 inboxlistfile.write('TO:'+i[beginnum+10:endnum])
187                 inboxlistfile.write('\n')
188                 #写入subject
189                 beginnum=i.find('name="subject"')
190                 endnum=i.find('</string>',beginnum)
191                 inboxlistfile.write('Subject:'+i[beginnum+15:endnum])
192                 inboxlistfile.write('\n')
193                 #写入date：
194                 beginnum=i.find('name="sentDate"')
195                 endnum=i.find('</date>',beginnum)
196                 inboxlistfile.write('Date:'+i[beginnum+16:endnum])
197                 inboxlistfile.write('\n')
198                 if 'name="read">true' in i:
199                     inboxlistfile.write('邮件状态:已读')
200                     inboxlistfile.write('\n')
201                 else:
202                     inboxlistfile.write('邮件状态:未读')
203                     inboxlistfile.write('\n')
204                 #写用邮件尺寸
205                 beginnum=i.find('name="size"')
206                 endnum=i.find('</int>',beginnum)
207                 inboxlistfile.write('邮件尺寸:'+i[beginnum+12:endnum])
208                 inboxlistfile.write('\n')
209                 #写入邮件编号，用于下载邮件
210                 beginnum=i.find('name="id"')
211                 endnum=i.find('</string>',beginnum)
212                 inboxlistfile.write('邮件编号:'+i[beginnum+10:endnum])
213                 inboxlistfile.write('\n\n')
214                 
215         inboxlistfile.close()
216                 
217         
218         
219 #Demo
220 print("Requesting......nn")
221 login = Login163('AAAAA@163.com','AAAAA')
222 flag = login.login()
223 if type(flag) is bool:
224     login.minbox()
225     #login.letterdown()
226     print("Successful landing,Resolved contacts......nn")
227     res = login.address_list()
228     for x in res:
229         print(x['email'])
230 else:
231     print(flag)

posted @ 2014-10-24 16:01 神煌阅读(1199) 评论(0) 收藏举报

刷新页面返回顶部

飘渺之路

python模拟163登陆获取邮件列表

公告