Python边学边用--BT客户端实现之BitTorrent文件解析
BitTorrent文件解析:
BitTorrent文件使用bencode编码,其中包括了4种数据类型:
'd' 开头表示是dict类型,'e'表示结束
'l' (小写字母L)开头表示是list类型,'e'表示结束
'i'开头表示是integer类型,'e'表示结束,可以表示负数
以数字开头表示string类型,数字为string长度,长度与string内容以':'分割
默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式
BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html
已下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。
1 import os 2 from datetime import tzinfo 3 from datetime import datetime 4 5 _READ_MAX_LEN = -1 6 7 class BTFormatError(BaseException): 8 pass 9 10 class TorrentFile(object): 11 12 __metainfo = {} 13 __file_name = '' 14 15 def read_file(self, filename): 16 17 torrent_file = open(filename, 'rb') 18 data = torrent_file.read(_READ_MAX_LEN) 19 torrent_file.close() 20 21 data = list(data) 22 metainfo = self.__read_chunk(data) 23 if metainfo and type(metainfo) == type({}): 24 self.__file_name = filename 25 self.__metainfo = metainfo 26 else: 27 raise BTFormatError() 28 29 def __read_chunk(self, data): 30 31 chunk = None 32 33 if len(data) == 0: 34 return chunk 35 36 leading_chr = data[0] 37 38 if leading_chr.isdigit(): 39 chunk = self.__read_string(data) 40 elif leading_chr == 'd': 41 chunk = self.__read_dict(data) 42 elif leading_chr == 'i': 43 chunk = self.__read_integer(data) 44 elif leading_chr == 'l': 45 chunk = self.__read_list(data) 46 47 #print leading_chr, chunk 48 return chunk 49 50 def __read_dict(self, data): 51 52 if len(data) == 0 or data.pop(0) != 'd': 53 return None 54 55 chunk = {} 56 while len(data) > 0 and data[0] != 'e': 57 58 key = self.__read_chunk(data) 59 value = self.__read_chunk(data) 60 61 if key and value and type(key) == type(''): 62 chunk[key] = value 63 else: 64 return None 65 66 if len(data) == 0 or data.pop(0) != 'e': 67 return None 68 69 return chunk 70 71 def __read_list(self, data): 72 73 if len(data) == 0 or data.pop(0) != 'l': 74 return None 75 76 chunk = [] 77 while len(data) > 0 and data[0] != 'e': 78 value = self.__read_chunk(data) 79 if value: 80 chunk.append(value) 81 else: 82 return None 83 84 if len(data) == 0 or data.pop(0) != 'e': 85 return None 86 87 return chunk 88 89 def __read_string(self, data): 90 91 str_len = '' 92 while len(data) > 0 and data[0].isdigit(): 93 str_len += data.pop(0) 94 95 if len(data) == 0 or data.pop(0) != ':': 96 return None 97 98 str_len = int(str_len) 99 if str_len > len(data): 100 return None 101 102 value = data[0:str_len] 103 del data[0:str_len] 104 return ''.join(value) 105 106 def __read_integer(self, data): 107 108 integer = '' 109 if len(data) < len('i2e') or data.pop(0) != 'i': 110 return None 111 112 sign = data.pop(0) 113 if sign != '-' and not sign.isdigit(): 114 return None 115 integer += sign 116 117 while len(data) > 0 and data[0].isdigit(): 118 integer += data.pop(0) 119 120 if len(data) == 0 or data.pop(0) != 'e': 121 return None 122 123 return int(integer) 124 125 def __is_singlefile(self): 126 return 'length' in self.__metainfo.keys() 127 128 def __decode_text(self, text): 129 encoding = 'utf-8' 130 resultstr = '' 131 if self.get_encoding(): 132 encoding = self.get_encoding() 133 elif self.get_codepage(): 134 encoding = 'cp' + str(self.get_codepage()) 135 if text: 136 try: 137 resultstr = text.decode(encoding=encoding) 138 except ValueError: 139 return text 140 else: 141 return None 142 return resultstr 143 144 def __get_meta_top(self, key): 145 if key in self.__metainfo.keys(): 146 return self.__metainfo[key] 147 else: 148 return None 149 def __get_meta_info(self,key): 150 meta_info = self.__get_meta_top('info') 151 if meta_info and key in meta_info.keys(): 152 return meta_info[key] 153 return None 154 155 def get_codepage(self): 156 return self.__get_meta_top('codepage') 157 def get_encoding(self): 158 return self.__get_meta_top('encoding') 159 160 def get_announces(self): 161 announces = [] 162 ann = self.__get_meta_top('announce') 163 if ann: 164 ann_list = [] 165 ann_list.append(ann) 166 announces.append(ann_list) 167 announces.append(self.__get_meta_top('announce-list')) 168 return announces 169 170 def get_publisher(self): 171 return self.__decode_text(self.__get_meta_top('publisher')) 172 def get_publisher_url(self): 173 return self.__decode_text(self.__get_meta_top('publisher-url')) 174 175 def get_creater(self): 176 return self.__decode_text(self.__get_meta_top('created by')) 177 def get_creation_date(self): 178 utc_date = self.__get_meta_top('creation date') 179 if utc_date is None: 180 return utc_date 181 creationdate = datetime.utcfromtimestamp(utc_date) 182 return creationdate 183 def get_comment(self): 184 return self.__get_meta_top('comment') 185 186 def get_nodes(self): 187 return self.__get_meta_top('nodes') 188 189 def get_piece_length(self): 190 return self.__get_meta_info('piece length') 191 192 def get_files(self): 193 pieces = self.__get_meta_info('pieces') 194 name = self.__decode_text(self.__get_meta_info('name')) 195 196 if self.__is_singlefile(): 197 file_name = name 198 file_length = self.__get_meta_info('length') 199 200 return [{'name':[file_name], 'length':file_length, 'peaces':pieces}] 201 202 files = [] 203 folder = name 204 i = 0 205 for one_file in self.__get_meta_info('files'): 206 file_info = {} 207 path_list = [] 208 path_list.append(folder) 209 for path in one_file['path']: 210 path_list.append(self.__decode_text(path)) 211 file_info['name'] = path_list 212 file_info['length'] = one_file['length'] 213 file_info['pieces'] = pieces[i:(i+20)] 214 i += 20 215 files.append(file_info) 216 return files 217 218 if __name__ == '__main__': 219 #filename = r".\huapi2.torrent" 220 #filename = r".\mh5t3tJ0EC.torrent" 221 filename = r".\huapi2.1.torrent" 222 torrent = TorrentFile() 223 224 print "begin to read file" 225 try: 226 torrent.read_file(filename) 227 except (IOError,BTFormatError), reason: 228 print "Read bittorrent file error! Error:%s" %reason 229 230 print "end to read file" 231 232 print "announces: " , torrent.get_announces() 233 print "peace length:", torrent.get_piece_length() 234 print "code page:" , torrent.get_codepage() 235 print "encoding:" , torrent.get_encoding() 236 print "publisher:" ,torrent.get_publisher() 237 print "publisher url:", torrent.get_publisher_url() 238 print "creater:" , torrent.get_creater() 239 print "creation date:", torrent.get_creation_date() 240 print "commnent:", torrent.get_comment() 241 print "nodes:", torrent.get_nodes() 242 243 for one_file in torrent.get_files(): 244 print 'file name:', '\\'.join(one_file['name']) 245 print 'file length:', one_file['length'] 246 print 'pieces:', list(one_file['pieces']) 247
 
                     
                    
                 
                    
                 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号