推导数据list_comprehension

利用sorted() , BIF, set()处理数据文件

'''#打开txt文件并将其转换为列表且去除空格
with open ('james.txt') as jaf:
    data = jaf.readline()
james = data.strip().split(',')
    
with open ('julie.txt') as juf:
    data = juf.readline()
julie = data.strip().split(',')
    
with open ('mikey.txt') as mif:
    data = mif.readline()
mikey = data.strip().split(',')
    
with open ('sarah.txt') as saf:
    data = saf.readline()
sarah = data.strip().split(',')'''

#调用get_coach_data函数处理文档

from open_file import get_coach_data

james = get_coach_data('james.txt')
julie = get_coach_data('julie.txt')
mikey = get_coach_data('mikey.txt')
sarah = get_coach_data('sarah.txt')

#调用sanitize函数,并利用推导列表格式格式化数据

from sanitize import sanitize

james = sorted(set([sanitize(a) for a in james]))       #利用set()方法来去除重复项,set()方法是无序的集合不能切片
julie = sorted(set([sanitize(b) for b in julie]))       #利用sorted()方法对数据列进行排序,并保存了原来的数据列   
mikey = sorted(set([sanitize(c) for c in mikey]))
sarah = sorted(set([sanitize(d) for d in sarah]))


'''#去除重复数据
james = []
julie = []
mikey = []
sarah = []
for x in clean_james:
    if x not in james:
        #james.append(x)
for x in clean_julie:
    if x not in julie:
        julie.append(x)
for x in clean_mikey:
    if x not in mikey:
        mikey.append(x)
for x in clean_sarah:
    if x not in sarah:
        sarah.append(x)'''
#利用切片打印去重后的数据
print(james[0:3])
print(julie[0:3])
print(mikey[0:3])
print(sarah[0:3])

1.调用的文件处理函数get_coach_data

 #定义函数get_coach_data将文档转化为数列
def get_coach_data(filename):
    try:
        with open (filename) as f:
            data = f.readline()
        return(data.strip().split(','))
    except IOError as ioerr:
        print('File error:' + str(ioerr))   #报错
        return(None)                        #并返回None,指示失败

2.调用的数据格式化函数sanitize

#定义sanitize函数格式化数据
def sanitize(time_string):
    try:
        if '-' in time_string:                
            splitter = '-'

        elif ':' in time_string:
            splitter = ':'

        else:
            return(time_string)        #返回格式化的数据
        (mins, secs) = time_string.split(splitter)

        return(mins + '.'+secs)
    except:
        print ('values error')

 

posted @ 2018-08-26 13:31  nester_liz  阅读(150)  评论(0)    收藏  举报