Ch2数据导入
环境:windows10--sublimetext3--python3.5
1.从csv导入数据
#!/usr/bin/env python import csv filename = 'ch02-data.csv' data = [] try: with open(filename) as f: reader = csv.reader(f) c = 0 for row in reader: if c == 0: header = row else: data.append(row) c += 1 except csv.Error as e: print("Error reading CSV file at line %s: %s" % (reader.line_num, e)) sys.exit(-1) if header: print(header) print('==================') for datarow in data: print(datarow)
运行结果:

2.从Microsoft Excel文件中导入数据
import xlrd from xlrd.xldate import XLDateAmbiguous file = 'ch02-xlsxdata.xlsx' wb = xlrd.open_workbook(filename=file) ws = wb.sheet_by_name('Sheet1') dataset = [] for r in range(ws.nrows): col = [] for c in range(ws.ncols): col.append(ws.cell(r, c).value) if ws.cell_type(r, c) == xlrd.XL_CELL_DATE: try: print(ws.cell_type(r, c)) from datetime import datetime date_value = xlrd.xldate_as_tuple(ws.cell(r, c).value, wb.datemode) print(datetime(*date_value)) except XLDateAmbiguous as e: print(e) dataset.append(col) from pprint import pprint pprint(dataset)
运行结果:

3.从定宽数据文件导入
样本格式如下:

import struct import string mask='9s14s5s' parse = struct.Struct(mask).unpack_from print('formatstring {!r}, record size: {}'.format(\ mask, struct.calcsize(mask))) datafile = 'ch02-fixed-width-1M.data' with open(datafile, 'r') as f: for line in f: fields = parse(bytes(line, encoding='utf8')) print('fields: ', [field.strip() for field in fields])
运行结果:

4.从制表符分隔的文件中导入
import csv filename = 'ch02-data.tab' data = [] try: with open(filename) as f: reader = csv.reader(f, dialect=csv.excel_tab) c = 0 for row in reader: if c == 0: header = row else: data.append(row) c += 1 except csv.Error as e: print("Error reading CSV file at line %s: %s" % (reader.line_num, e)) sys.exit(-1) if header: print(header) print('===================') for datarow in data: print(datarow)
运行结果:

5.从JSON数据源导入
import requests url = 'https://github.com/timeline.json' r = requests.get(url) json_obj = r.json() repos = set() # we want just unique urls for entry in json_obj: try: repos.add(entry['repository']['url']) except KeyError as e: print("No key %s. Skipping..." % (e)) from pprint import pprint print(repos)
运行结果: 出错,未获取到数据,原因是该网址失效。
浙公网安备 33010602011771号