pandas 生成多个sheet(案例:爬取查博士信息)

 

案例

  1 # coding=utf-8
  2 import requests
  3 import re
  4 import pandas
  5 from time import time
  6 
  7 
  8 headers = {
  9     'UserAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
 10     'cookie': 'Hm_lvt_0773869eb6bae8eded582084b718d3f8=1657593020; Hm_lpvt_0773869eb6bae8eded582084b718d3f8=1657593020; partnerKey=eb4b548938014d58a8a1a95e55ee7713; JSESSIONID=4A4AFF5A91CC3C628CF01E024C15CFA3'
 11 }
 12 
 13 def get_brand():  # 根据查博士ID 返回查博士品牌ID信息
 14     brandlist = []
 15     brandId = []
 16     url = 'https://www.chaboshi.cn/mfgj'
 17     chaboshi_content = requests.get(url).text
 18     chaboshi = chaboshi_content.replace("\n", "")
 19     s = '"brandName":"(.*?)","id":([0-9]+)'
 20     ss = re.findall(s, chaboshi)
 21     for i in ss:
 22         d={}
 23         # print(i)
 24         # print (type(i))
 25         brandname = i[0].split('"')[0]
 26         d["brandname"]=brandname
 27         brandid = i[1]
 28         d["brandid"] = brandid
 29 
 30         brandlist.append(brandname)
 31         # brandId.append(brandid)
 32         brandId.append(d)
 33         # if (i = 660):
 34         #    brandId.remove(660)
 35     del brandlist[0:10]
 36     del brandlist[-1]
 37     del brandId[-1]
 38     del brandId[0:10]
 39     return brandId
 40 
 41 
 42 def get_series(car_id):
 43     """  [{'seriesName': '爱驰U6', 'seriesId': 10034}, {'seriesName': '爱驰U5', 'seriesId': 6649}] """
 44     series_id = []
 45     data = {'source': 'pc',
 46             'phone': '',
 47             'ticket': '',
 48             'supportbrandid': car_id
 49             }
 50     get_series_url = 'https://app.chaboshi.cn/app/brandModel/getSeries'
 51     series_content = requests.post(url=get_series_url, data=data, headers=headers).json()
 52     series_list = series_content['data']
 53     series_info = []
 54     # print  ((series_content))
 55     # print  (series_list['data'])
 56     for z in series_list['data'].values():
 57         for zx in z:
 58             d = {}
 59             # print(zx)
 60             d["seriesName"]=zx['series']
 61             d["seriesId"]=zx['id']
 62             series_id.append(d)
 63     return series_id
 64 
 65 # print(get_series(378))
 66 
 67 def get_model_name(model_id):  # 获取车型
 68     """  [{'modelId': 275130, 'modelName': '2019款U5ING'}, {'modelId': 275129, 'modelName': '2019款U5PRO+'} """
 69     ls=[]
 70     errorid = []
 71     car_data = {'source': 'pc',
 72                 'phone': '',
 73                 'ticket': '',
 74                 'supportseriesid': model_id,
 75                 'type': '0'}
 76     car_url = 'https://app.chaboshi.cn/app/brandModel/getModelsValuationAble'
 77     model_content = requests.post(url=car_url, data=car_data, headers=headers).json()
 78 
 79     if model_content['code'] == 1:  # 判断是否可以估价,如果不可以估价打印异常
 80         errorid.append(model_id)
 81         pass
 82     else:
 83         model_list = model_content['data']['data']
 84         # print(type(model_list))
 85         for i in model_list.items():
 86             for j in iter(i):
 87                 # print (j)
 88                 # print (type(j))
 89                 if type(j) is str:
 90                     continue
 91                 else:
 92                     for k in j:
 93                         d={}
 94                         # model_list.append(k['model'])
 95                         # print(k['model'], k['id'])
 96                         # if "万" in k['model']:
 97                         # print("".join(k['model'].split(" ")[:-1]), k['id'])
 98                         d["modelId"]=k['id']
 99                         d["modelName"]="".join(k['model'].split(" ")[:-1])
100                         # print(k['sellPrice'])
101                         ls.append(d)
102     return ls
103 
104 # print(get_model_name(6649))
105 
106 def deal_data():
107     brandlist = get_brand()
108     # 705 626
109     # brandlist = [{'brandname': '全球鹰', 'brandid': '626'},{'brandname': '全球鹰', 'brandid': '705'}]
110     ls=[]
111     ls2=[]
112     ls3=[]
113     ls4=[]
114     ls5=[]
115     ls6=[]
116     ls7=[]
117     #[{"001":{"001":[1,2,3],"002":[]}},]
118     for asd in brandlist:
119         # print(asd)
120         d = {}
121         d2 = {}
122         seriesIds=get_series(asd["brandid"])
123         for i in seriesIds:
124             modelIds = get_model_name(i["seriesId"])
125             for m in modelIds:
126                 # print(asd["brandid"],i["seriesId"],m["modelId"])
127                 ls4.append(m["modelId"])
128                 ls7.append(m["modelName"])
129                 ls2.append(asd["brandid"])
130                 if asd["brandname"]=="全球鹰":
131                     asd["brandname"]="全球鹰"+str(time()) #sheet 名称重复会报错
132                 ls5.append(asd["brandname"])
133                 ls3.append(i["seriesId"])
134                 ls6.append(i["seriesName"])
135 
136         d["brandIds"]=ls2
137         d["brandNames"] = ls5
138         d["seriesIds"] = ls3
139         d["seriesNames"] = ls6
140         d["modelIds"] = ls4
141         d["modelNames"] = ls7
142         # print(d)
143 
144 
145         d2[asd["brandname"]] = d
146         # # print(d2)
147         ls.append(d2)
148         ls2=[]
149         ls3 = []
150         ls4 = []
151         ls5 = []
152         ls6=[]
153         ls7=[]
154 
155 
156 
157 
158     # print(ls)
159     return ls
160 # deal_data()
161 
162 def save_excel(data):
163     writer = pandas.ExcelWriter('cbsDatas2.xls')
164     for i in data:
165         for k,v in i.items():
166             # print(v)
167             try:
168                 data = pandas.DataFrame(v)
169                 data.to_excel(writer,sheet_name=k)
170             except Exception as e:
171                 print("error: ",e)
172 
173     writer.save()
174     writer.close()
175 
176 if __name__ == '__main__':
177     data = deal_data()
178     save_excel(data)

 

 

结果

 

总结

用pandas 生成多个sheet 的数据结构要遵循

data=[

{"001":{"001":[1,2,3],"002":[1,2,3]}},
{"002":{"002":[1,2,3],"002":[1,2,3]}},

]

def save_excel(data):
     writer = pandas.ExcelWriter('cbsDatas.xls')
     for i in data:
       for k,v in i.items():
      data = pandas.DataFrame(v)
          data.to_excel(writer,sheet_name=k)
writer.save()
writer.close()
 
posted @ 2022-07-18 21:34  东方不败--Never  阅读(318)  评论(0)    收藏  举报