Ch2数据导入

环境:windows10--sublimetext3--python3.5

 

1.从csv导入数据

#!/usr/bin/env python

import csv

filename = 'ch02-data.csv'

data = []
try:
    with open(filename) as f:
        reader = csv.reader(f)
        c = 0
        for row in reader:
            if c == 0:
                header = row
            else:
                data.append(row)
            c += 1
except csv.Error as e:
    print("Error reading CSV file at line %s: %s" % (reader.line_num, e))
    sys.exit(-1)

if header:
    print(header)
    print('==================')

for datarow in data:
    print(datarow)

运行结果:

 

2.从Microsoft Excel文件中导入数据

import xlrd
from xlrd.xldate import XLDateAmbiguous

file = 'ch02-xlsxdata.xlsx'

wb = xlrd.open_workbook(filename=file)

ws = wb.sheet_by_name('Sheet1')

dataset = []

for r in range(ws.nrows):
    col = []
    for c in range(ws.ncols):
        col.append(ws.cell(r, c).value)
        if ws.cell_type(r, c) == xlrd.XL_CELL_DATE:
            try:
                print(ws.cell_type(r, c))
                from datetime import datetime
                date_value = xlrd.xldate_as_tuple(ws.cell(r, c).value, wb.datemode)
                print(datetime(*date_value))
            except XLDateAmbiguous as e:
                print(e)
    dataset.append(col)

from pprint import pprint

pprint(dataset)

运行结果:

 

3.从定宽数据文件导入

样本格式如下:

 

import struct
import string

mask='9s14s5s'
parse = struct.Struct(mask).unpack_from
print('formatstring {!r}, record size: {}'.format(\
                        mask, struct.calcsize(mask)))

datafile = 'ch02-fixed-width-1M.data'

with open(datafile, 'r') as f:
    for line in f:
        fields = parse(bytes(line, encoding='utf8'))
        print('fields: ', [field.strip() for field in fields]) 

运行结果:

 

4.从制表符分隔的文件中导入

import csv

filename = 'ch02-data.tab'

data = []
try:
    with open(filename) as f:
        reader = csv.reader(f, dialect=csv.excel_tab)
        c = 0
        for row in reader:
            if c == 0:
                header = row
            else:
                data.append(row)
            c += 1
except csv.Error as e:
    print("Error reading CSV file at line %s: %s" % (reader.line_num, e))
    sys.exit(-1)

if header:
    print(header)
    print('===================')

for datarow in data:
    print(datarow)

运行结果:

 

5.从JSON数据源导入

import requests

url = 'https://github.com/timeline.json'

r = requests.get(url)
json_obj = r.json()

repos = set() # we want just unique urls
for entry in json_obj:
    try:
        repos.add(entry['repository']['url'])
    except KeyError as e:
        print("No key %s. Skipping..." % (e))

from pprint import pprint
print(repos)

运行结果: 出错,未获取到数据,原因是该网址失效。

 

 

 

 

 



posted @ 2017-11-06 09:03  CarbinHee  阅读(156)  评论(0)    收藏  举报