pandas 生成多个sheet(案例:爬取查博士信息)
案例
1 # coding=utf-8 2 import requests 3 import re 4 import pandas 5 from time import time 6 7 8 headers = { 9 'UserAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36', 10 'cookie': 'Hm_lvt_0773869eb6bae8eded582084b718d3f8=1657593020; Hm_lpvt_0773869eb6bae8eded582084b718d3f8=1657593020; partnerKey=eb4b548938014d58a8a1a95e55ee7713; JSESSIONID=4A4AFF5A91CC3C628CF01E024C15CFA3' 11 } 12 13 def get_brand(): # 根据查博士ID 返回查博士品牌ID信息 14 brandlist = [] 15 brandId = [] 16 url = 'https://www.chaboshi.cn/mfgj' 17 chaboshi_content = requests.get(url).text 18 chaboshi = chaboshi_content.replace("\n", "") 19 s = '"brandName":"(.*?)","id":([0-9]+)' 20 ss = re.findall(s, chaboshi) 21 for i in ss: 22 d={} 23 # print(i) 24 # print (type(i)) 25 brandname = i[0].split('"')[0] 26 d["brandname"]=brandname 27 brandid = i[1] 28 d["brandid"] = brandid 29 30 brandlist.append(brandname) 31 # brandId.append(brandid) 32 brandId.append(d) 33 # if (i = 660): 34 # brandId.remove(660) 35 del brandlist[0:10] 36 del brandlist[-1] 37 del brandId[-1] 38 del brandId[0:10] 39 return brandId 40 41 42 def get_series(car_id): 43 """ [{'seriesName': '爱驰U6', 'seriesId': 10034}, {'seriesName': '爱驰U5', 'seriesId': 6649}] """ 44 series_id = [] 45 data = {'source': 'pc', 46 'phone': '', 47 'ticket': '', 48 'supportbrandid': car_id 49 } 50 get_series_url = 'https://app.chaboshi.cn/app/brandModel/getSeries' 51 series_content = requests.post(url=get_series_url, data=data, headers=headers).json() 52 series_list = series_content['data'] 53 series_info = [] 54 # print ((series_content)) 55 # print (series_list['data']) 56 for z in series_list['data'].values(): 57 for zx in z: 58 d = {} 59 # print(zx) 60 d["seriesName"]=zx['series'] 61 d["seriesId"]=zx['id'] 62 series_id.append(d) 63 return series_id 64 65 # print(get_series(378)) 66 67 def get_model_name(model_id): # 获取车型 68 """ [{'modelId': 275130, 'modelName': '2019款U5ING'}, {'modelId': 275129, 'modelName': '2019款U5PRO+'} """ 69 ls=[] 70 errorid = [] 71 car_data = {'source': 'pc', 72 'phone': '', 73 'ticket': '', 74 'supportseriesid': model_id, 75 'type': '0'} 76 car_url = 'https://app.chaboshi.cn/app/brandModel/getModelsValuationAble' 77 model_content = requests.post(url=car_url, data=car_data, headers=headers).json() 78 79 if model_content['code'] == 1: # 判断是否可以估价,如果不可以估价打印异常 80 errorid.append(model_id) 81 pass 82 else: 83 model_list = model_content['data']['data'] 84 # print(type(model_list)) 85 for i in model_list.items(): 86 for j in iter(i): 87 # print (j) 88 # print (type(j)) 89 if type(j) is str: 90 continue 91 else: 92 for k in j: 93 d={} 94 # model_list.append(k['model']) 95 # print(k['model'], k['id']) 96 # if "万" in k['model']: 97 # print("".join(k['model'].split(" ")[:-1]), k['id']) 98 d["modelId"]=k['id'] 99 d["modelName"]="".join(k['model'].split(" ")[:-1]) 100 # print(k['sellPrice']) 101 ls.append(d) 102 return ls 103 104 # print(get_model_name(6649)) 105 106 def deal_data(): 107 brandlist = get_brand() 108 # 705 626 109 # brandlist = [{'brandname': '全球鹰', 'brandid': '626'},{'brandname': '全球鹰', 'brandid': '705'}] 110 ls=[] 111 ls2=[] 112 ls3=[] 113 ls4=[] 114 ls5=[] 115 ls6=[] 116 ls7=[] 117 #[{"001":{"001":[1,2,3],"002":[]}},] 118 for asd in brandlist: 119 # print(asd) 120 d = {} 121 d2 = {} 122 seriesIds=get_series(asd["brandid"]) 123 for i in seriesIds: 124 modelIds = get_model_name(i["seriesId"]) 125 for m in modelIds: 126 # print(asd["brandid"],i["seriesId"],m["modelId"]) 127 ls4.append(m["modelId"]) 128 ls7.append(m["modelName"]) 129 ls2.append(asd["brandid"]) 130 if asd["brandname"]=="全球鹰": 131 asd["brandname"]="全球鹰"+str(time()) #sheet 名称重复会报错 132 ls5.append(asd["brandname"]) 133 ls3.append(i["seriesId"]) 134 ls6.append(i["seriesName"]) 135 136 d["brandIds"]=ls2 137 d["brandNames"] = ls5 138 d["seriesIds"] = ls3 139 d["seriesNames"] = ls6 140 d["modelIds"] = ls4 141 d["modelNames"] = ls7 142 # print(d) 143 144 145 d2[asd["brandname"]] = d 146 # # print(d2) 147 ls.append(d2) 148 ls2=[] 149 ls3 = [] 150 ls4 = [] 151 ls5 = [] 152 ls6=[] 153 ls7=[] 154 155 156 157 158 # print(ls) 159 return ls 160 # deal_data() 161 162 def save_excel(data): 163 writer = pandas.ExcelWriter('cbsDatas2.xls') 164 for i in data: 165 for k,v in i.items(): 166 # print(v) 167 try: 168 data = pandas.DataFrame(v) 169 data.to_excel(writer,sheet_name=k) 170 except Exception as e: 171 print("error: ",e) 172 173 writer.save() 174 writer.close() 175 176 if __name__ == '__main__': 177 data = deal_data() 178 save_excel(data)
结果

总结
用pandas 生成多个sheet 的数据结构要遵循
data=[
{"001":{"001":[1,2,3],"002":[1,2,3]}},
{"002":{"002":[1,2,3],"002":[1,2,3]}},
]
def save_excel(data):
writer = pandas.ExcelWriter('cbsDatas.xls')
for i in data:
for k,v in i.items():
data = pandas.DataFrame(v)
data.to_excel(writer,sheet_name=k)
writer.save()
writer.close()

浙公网安备 33010602011771号