将从浏览器上Copy来的cookie字符串转化为Scrapy能使用的Dict
将从浏览器上Copy来的cookie字符串转化为Scrapy能使用的Dict
headers cookies 格式化
# -*- coding: utf-8 -*-
class transCookie:
def __init__(self, request_headers):
self.request_headers = request_headers
def stringToDict(self):
"""
将从浏览器上Copy来的cookie字符串转化为Scrapy能使用的Dict
:return:
"""
self.request_headers = self.request_headers.replace(' ', '')
itemDict = {}
items = self.request_headers.split('\n')
for item in items:
if len(item) > 0:
key = item.split(':')[0].replace(' ', '')
value = item.split(':')[1].replace(' ', '')
itemDict[key] = value
request_headers_dict = itemDict
if itemDict['Cookie']:
cookie = itemDict['Cookie']
cookieDict = {}
items = cookie.split(';')
for item in items:
key = item.split('=')[0].replace(' ', '')
value = item.split('=')[1]
cookieDict[key] = value
if itemDict['Cookie']:
del itemDict['Cookie']
return cookieDict,itemDict
if __name__ == "__main__":
request_headers = """
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6
Cache-Control: max-age=0
Connection: keep-alive
Cookie: select_city=310000; lianjia_ssid=ddcda941-409f-4608-9391-5f99ae1cca04; lianjia_uuid=97743c50-0dac-4ade-a9e0-be3a8af4ec7c; UM_distinctid=176801a2b6251a-0fb930e32aac1d-5a301e44-144000-176801a2b63379; CNZZDATA1253492439=157838622-1608462541-%7C1608462541; CNZZDATA1254525948=2019210225-1608464342-%7C1608464342; CNZZDATA1255633284=2051793068-1608462319-%7C1608462319; CNZZDATA1255604082=202708448-1608465066-%7C1608465066; _smt_uid=5fdf4217.1481dbf9; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22176801a2dba466-0af9f90e0b64b7-5a301e44-1327104-176801a2dbb75d%22%2C%22%24device_id%22%3A%22176801a2dba466-0af9f90e0b64b7-5a301e44-1327104-176801a2dbb75d%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiYzQ0N2I5ZjBmNTgwMzAzNWM1ZGQzYmRmMTEyNDQ3NTIwM2I5OTJhNWFiY2Y2MTBkNTYxZDZmNjEyNjYwMjE3ODYxZTMyOTA1Mjk3YjBhZjhkZmYwY2VjNWRlMTY1NGMwZmE1Mzc3N2YzZTMwNzEzYzU1NjYzODc3ODIyZjg3MDAyYWM3OTliNzgzNGVmNTBlYjFmNjMzODhmODgyMmY0YjI5NjQxM2VkYWQ5OGI1NTVjZWNhNTNhZDNjMDVhMDBiOTgzNDc1YTJkNDYxZGM1MmY1YzRmNmYwODQ4NWFhMDc5MDY0MTZhYjQzNDc4MjViNTZkOTMxNGU2NWEzMTNiY2UwMGJmMjlmODQzMThjOTE5NWFlMTk5YTliOTYzZTQzM2NhOTYxYTVmMjBhMzEzNzY0MmNjNGRiZDI3MzUzYzU0ZjkzNjYwM2U0ZjFhZTczYTM4MzkzNmFiYTk4MzYxM1wiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCJjNTcyNWI2Y1wifSIsInIiOiJodHRwczovL3NoLmxpYW5qaWEuY29tL2Vyc2hvdWZhbmcvIiwib3MiOiJ3ZWIiLCJ2IjoiMC4xIn0=
Host: sh.lianjia.com
Sec-Fetch-Dest: document
Sec-Fetch-Mode: navigate
Sec-Fetch-Site: none
Sec-Fetch-User: ?1
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66
"""
trans = transCookie(request_headers)
cookie,headers = trans.stringToDict()
print('---------------------cookie----------------------')
print(cookie)
print('---------------------headers----------------------')
print(headers)

浙公网安备 33010602011771号