Python边学边用--BT客户端实现之BitTorrent文件解析

BitTorrent文件解析:

BitTorrent文件使用bencode编码,其中包括了4种数据类型:

'd' 开头表示是dict类型,'e'表示结束

'l' (小写字母L)开头表示是list类型,'e'表示结束

'i'开头表示是integer类型,'e'表示结束,可以表示负数

以数字开头表示string类型,数字为string长度,长度与string内容以':'分割

默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式

BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html

已下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。

 

  1 import os
  2 from datetime import tzinfo
  3 from datetime import datetime
  4 
  5 _READ_MAX_LEN = -1
  6 
  7 class BTFormatError(BaseException):
  8     pass
  9     
 10 class TorrentFile(object):
 11     
 12     __metainfo = {}
 13     __file_name = ''
 14     
 15     def read_file(self, filename):
 16         
 17         torrent_file = open(filename, 'rb')
 18         data = torrent_file.read(_READ_MAX_LEN)
 19         torrent_file.close()
 20         
 21         data = list(data)
 22         metainfo = self.__read_chunk(data)
 23         if metainfo and type(metainfo) == type({}):
 24             self.__file_name = filename
 25             self.__metainfo = metainfo
 26         else:
 27             raise BTFormatError()
 28     
 29     def __read_chunk(self, data):
 30         
 31         chunk = None
 32         
 33         if len(data) == 0:
 34             return chunk
 35         
 36         leading_chr = data[0]
 37                          
 38         if leading_chr.isdigit():
 39             chunk = self.__read_string(data)
 40         elif leading_chr == 'd':
 41             chunk = self.__read_dict(data)
 42         elif leading_chr == 'i':
 43             chunk = self.__read_integer(data)
 44         elif leading_chr == 'l':
 45             chunk = self.__read_list(data)
 46 
 47         #print leading_chr, chunk
 48         return chunk
 49                                
 50     def __read_dict(self, data):
 51         
 52         if  len(data) == 0 or data.pop(0) != 'd': 
 53             return None
 54         
 55         chunk = {} 
 56         while len(data) > 0 and data[0] != 'e':
 57             
 58             key = self.__read_chunk(data)
 59             value = self.__read_chunk(data)
 60             
 61             if key and value and type(key) == type(''):
 62                 chunk[key] = value
 63             else:
 64                 return None
 65             
 66         if len(data) == 0 or data.pop(0) != 'e':
 67             return None
 68         
 69         return chunk
 70     
 71     def __read_list(self, data):
 72 
 73         if  len(data) == 0 or data.pop(0) != 'l': 
 74             return None
 75         
 76         chunk = []
 77         while len(data) > 0 and data[0] != 'e':
 78             value = self.__read_chunk(data)
 79             if value:
 80                 chunk.append(value)
 81             else:
 82                 return None
 83             
 84         if len(data) == 0 or data.pop(0) != 'e': 
 85             return None
 86 
 87         return chunk
 88 
 89     def __read_string(self, data):
 90         
 91         str_len = ''
 92         while len(data) > 0 and data[0].isdigit():
 93             str_len +=  data.pop(0)
 94         
 95         if len(data) == 0 or data.pop(0) != ':':
 96             return None
 97         
 98         str_len = int(str_len)
 99         if str_len > len(data):
100             return None
101         
102         value = data[0:str_len]
103         del data[0:str_len]
104         return ''.join(value)
105     
106     def __read_integer(self, data):
107        
108         integer = ''
109         if len(data) < len('i2e') or data.pop(0) != 'i': 
110             return None
111         
112         sign = data.pop(0)
113         if sign != '-' and not sign.isdigit():
114             return None
115         integer += sign
116         
117         while len(data) > 0 and data[0].isdigit():
118             integer += data.pop(0)
119         
120         if len(data) == 0 or data.pop(0) != 'e':
121             return None
122 
123         return  int(integer)
124     
125     def __is_singlefile(self):
126         return 'length' in self.__metainfo.keys()
127     
128     def __decode_text(self, text):
129         encoding = 'utf-8'
130         resultstr = ''
131         if self.get_encoding():
132             encoding = self.get_encoding()
133         elif self.get_codepage():
134             encoding = 'cp' + str(self.get_codepage())
135         if text:
136             try:
137                 resultstr = text.decode(encoding=encoding)
138             except ValueError:
139                 return text
140         else:
141             return None
142         return resultstr
143     
144     def __get_meta_top(self, key):
145         if key in self.__metainfo.keys():
146             return self.__metainfo[key]
147         else:
148             return None
149     def __get_meta_info(self,key):
150         meta_info = self.__get_meta_top('info')
151         if meta_info and key in meta_info.keys():
152                 return meta_info[key]
153         return None
154     
155     def get_codepage(self):
156         return self.__get_meta_top('codepage')
157     def get_encoding(self):
158         return self.__get_meta_top('encoding')
159     
160     def get_announces(self):
161         announces = []
162         ann = self.__get_meta_top('announce')
163         if ann:
164             ann_list = []
165             ann_list.append(ann)
166             announces.append(ann_list)
167         announces.append(self.__get_meta_top('announce-list'))
168         return announces
169     
170     def get_publisher(self):
171         return self.__decode_text(self.__get_meta_top('publisher'))
172     def get_publisher_url(self):
173         return self.__decode_text(self.__get_meta_top('publisher-url'))
174     
175     def get_creater(self):
176         return self.__decode_text(self.__get_meta_top('created by'))
177     def get_creation_date(self):
178         utc_date = self.__get_meta_top('creation date')
179         if utc_date is None:
180             return utc_date
181         creationdate = datetime.utcfromtimestamp(utc_date)
182         return creationdate
183     def get_comment(self):
184         return self.__get_meta_top('comment')
185           
186     def get_nodes(self):
187         return self.__get_meta_top('nodes')
188     
189     def get_piece_length(self):
190         return self.__get_meta_info('piece length')
191     
192     def get_files(self):
193         pieces = self.__get_meta_info('pieces')
194         name = self.__decode_text(self.__get_meta_info('name'))
195         
196         if self.__is_singlefile():
197             file_name = name
198             file_length = self.__get_meta_info('length')
199             
200             return [{'name':[file_name], 'length':file_length, 'peaces':pieces}]
201         
202         files = []
203         folder = name
204         i = 0
205         for one_file in self.__get_meta_info('files'):
206             file_info = {}
207             path_list = []
208             path_list.append(folder)
209             for path in one_file['path']:
210                 path_list.append(self.__decode_text(path))
211             file_info['name'] = path_list
212             file_info['length'] = one_file['length']
213             file_info['pieces'] = pieces[i:(i+20)]
214             i += 20
215             files.append(file_info)
216         return files
217     
218 if __name__ == '__main__':
219     #filename = r".\huapi2.torrent"
220     #filename = r".\mh5t3tJ0EC.torrent"
221     filename = r".\huapi2.1.torrent"   
222     torrent = TorrentFile()
223 
224     print "begin to read file"
225     try:
226         torrent.read_file(filename)
227     except (IOError,BTFormatError), reason:
228         print "Read bittorrent file error! Error:%s" %reason
229      
230     print "end to read file"
231 
232     print "announces: " , torrent.get_announces() 
233     print "peace length:", torrent.get_piece_length()
234     print "code page:" , torrent.get_codepage()
235     print "encoding:" , torrent.get_encoding()
236     print "publisher:" ,torrent.get_publisher()
237     print "publisher url:", torrent.get_publisher_url()
238     print "creater:" , torrent.get_creater()
239     print "creation date:", torrent.get_creation_date()
240     print "commnent:", torrent.get_comment()
241     print "nodes:", torrent.get_nodes()
242 
243     for one_file in torrent.get_files():
244         print 'file name:', '\\'.join(one_file['name'])
245         print 'file length:', one_file['length']
246         print 'pieces:', list(one_file['pieces'])
247    

 

posted on 2012-09-29 23:09  duandetao  阅读(296)  评论(0)    收藏  举报