PurpleAir空气质量数据采集

PurpleAir空气质量数据采集

 

# -*- coding: utf-8 -*-

import time, datetime, calendar
import urllib, requests
import queue, threading
import json
import os


if __name__ == '__main__':

    #0. 配置 #2017/10/31 - 2018/12/1
    os.system("cls")

    year = 2017
    month = 10
    day = calendar.monthrange(year,month)[1]
    
    filename = 'data/%s/%4d%02d_V1_%s.csv'%(year, year, month, datetime.datetime.now().strftime('%Y%m%d%H%M%S'))

    #1. 登录
    print("%s\t%4d%02d\t%s\n" % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), year, month, "Ready go..."))
   
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
            }
   
    #2. 
    ides = json.load(open("id2.json",'rb'))
    #写文件
    F = open(filename, 'a+')
    F.write('"ID","Lat","Lon","Year","Month","Day","Hour","Temperature","Humidity","PM2.5","Station"\n')
   
    # 
    pace = 0
    for item in ides:
        pace+=1
        id, label, lat, lon = item[0], item[5], item[6], item[7]
        
        attempts = 0
        maxtimes = 10
        success = False
        while attempts < maxtimes and not success:
            try:
                url1 = "https://www.purpleair.com/json?show=" + str(id)
                response1 = requests.get(url1, headers=headers, timeout=5)
                idkeyinfo = response1.json()
                PRIMARY_ID  = idkeyinfo["results"][0]["THINGSPEAK_PRIMARY_ID"]
                PRIMARY_KEY = idkeyinfo["results"][0]["THINGSPEAK_PRIMARY_ID_READ_KEY"]

                time_start = '%04d-%02d-%02d 00:00:00'%(year, month, 1) 
                time_end   = '%04d-%02d-%02d 23:59:59'%(year, month, calendar.monthrange(year,month)[1])
                # print([time_start ,time_end])
                url2 = "https://thingspeak.com/channels/" + PRIMARY_ID + "/feed.json?api_key=" + PRIMARY_KEY + "&offset=0&average=60&round=2&start=" + time_start + "&end=" + time_end
                print([datetime.datetime.now().strftime('%H:%M:%S'), pace, id, label,url2])
 
                response2 = requests.get(url2, headers=headers, timeout=5)
                feeds = response2.json()
                location = feeds["channel"]["name"]
                # print(location)

                # 考虑空格就要自循环,这里不考虑空格
                for values in feeds["feeds"]:
                    try:
                        dt = time.strptime(values['created_at'], "%Y-%m-%dT%H:%M:%SZ")
                        F.write('%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n'%(id, lat, lon, dt.tm_year, dt.tm_mon, dt.tm_mday, dt.tm_hour, values['field6'], values['field7'], values['field8'],label))
                    except Exception as e:
                        print([repr(e)])
                        break
  
                success = True
            except Exception as e:
                print([repr(e)])
                attempts += 1
                time.sleep(30 * attempts)
                if attempts == maxtimes:
                    break
        

 

posted @ 2019-12-03 21:29  方倍工作室  阅读(487)  评论(0编辑  收藏  举报