python代码小实践之split_csv

'''This python script is used to split the .arff file got from marsyas to .csv files, which is easier to be read into the matlab for further processs.
 
Create on 2012-7-13
 
@auther: mainred
'''
import xlrd, xlwt
import time
import re
import os
 
file_name = raw_input('please input name of the arff file:\n')
path = os.getcwd()
 
file_abs_path = path + '\\' + file_name + '.arff'
 
if not os.path.exists(file_abs_path):
    print "There is no %s.xls in such directory" % file_name
    time.sleep(2)
    exit()
 
data = xlrd.open_workbook(file_name+'.arff')
table = data.sheets()[0]
nrows = table.nrows
ncols = table.ncols
 
 
file = xlwt.Workbook()
 
pattern1 = r'[^\\]*$'
 
name = re.search(pattern1,table.row_values(68)[0]).group(0).split('.')[0]
 
tablew = file.add_sheet(name)
 
redundancy = ''
j = 0
pas = False
for i in range(70,nrows):
    if pas:
        pas = False
        continue
    elif table.row_values(i)[0].startswith('% filename'):
 
        name = name + '.csv'
        file.save(name)
        
        file = xlwt.Workbook()
        
        name = re.search(pattern1,table.row_values(i)[0]).group(0).split('.')[0]
        print name
        tablew = file.add_sheet(name)
        pas = True
        j = 0
        continue
    elif table.row_values(i)[0] == '':
        continue
    else:
        if j == 0:
            pattern2 = r'[^,]+?$'
            redundancy = re.search(pattern2, table.row_values(i)[0]).group(0)
            print redundancy
        tablew.write(j,0,table.row_values(i)[0].replace(redundancy,''))
        j = j + 1
    
name = name + '.csv'
file.save(name)

posted on 2012-08-09 11:29  mainred  阅读(501)  评论(0编辑  收藏  举报