csv数据读取
import pandas as pd
from datetime import datetime
import time
def read_csv(from_path):
table = pd.read_csv(from_path) ##选取表
return table
def write_csv(to_path,data):
data.to_csv(to_path,index=False)
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = 'C:/Users/lenovo/Desktop'
file_name ='query_hive_162768.csv'
from_path = path+"/"+file_name
print(from_path)
df = read_csv(from_path)
df.head()
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
print(df.head())
Excel保存
import re
import pandas as pd
from datetime import datetime
import time
def read_xlsx(path,sheet_name):
xlsx_file = pd.ExcelFile(path) ##路径
table = xlsx_file.parse(sheet_name) ##选取表
return table
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = 'C:/Users/lenovo/Desktop'
file_name ='bdl_goods_income.xlsx'
sheet_name_list = {
'hive':'Sheet',
'email':'data',
'mysql':'Sheet4'
}
path = path+"\\"+file_name
sheet_name = sheet_name_list['email']
#sheet_name = sheet_name_list['email']
df = read_xlsx(path,sheet_name)
print(df.head())
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
Excel 读取
import re
import pandas as pd
from datetime import datetime
import time
def read_xlsx(path,sheet_name):
xlsx_file = pd.ExcelFile(path) ##路径
table = xlsx_file.parse(sheet_name) ##选取表
return table
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = 'C:/Users/lenovo/Desktop'
file_name ='华东交付盘质检日报12月.xlsx'
sheet_name_list = {
'hive':'Sheet',
'email':'data',
'mysql':'Sheet4'
}
path = path+"\\"+file_name
sheet_name = sheet_name_list['hive']
#sheet_name = sheet_name_list['email']
df = read_xlsx(path,sheet_name)
print(df.head())
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
存取到mysql
import pymysql
from sqlalchemy import create_engine
import mysql.connector as sql
def py_mysql(table,table_name):
try:
engine = create_engine("mysql+pymysql://root:password@127.0.0.1:3306/temp?charset=utf8mb4")
table.to_sql(name=table_name,con=engine,if_exists='replace',index=False,chunksize=10000)
print ('数据库写入成功')
except :
print ('数据库写入失败')
if __name__ == "__main__":
start_time = time.time() # 开始时间
try:
py_mysql(df,'点位1206')
except:
print('sql查询失败')
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
循环读取目录Csv文件
import os,re
import pandas as pd
dir_path = 'C:/Users/lenovo/Desktop/inv_2'
#col = [] ##选取需要的字段,看了下不是所有的字段都需要的
#df = pd.DataFrame([],columns=col)
df = pd.DataFrame([])
for root,dirs,files in os.walk(dir_path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if '.csv' in file_name: #判定是不是文件是否有o2o_order结尾的文件,是的就继续,不是的就退出了;
path = os.path.join(root,file_name)
print('正在处理的文件是%s'%(path))
try:
data = pd.read_table(path,sep=',') ##路径,注意指定文件的分隔符号
df_tmp = pd.DataFrame(data)
print(df_tmp.head())
df = pd.concat([df,df_tmp], ignore_index=True) ## 数据合并
print(df.shape)
except:
print('读取文件,处理数据失败')
else:
print('warning:非.csv文件不读取')
#print(df.head())
#保存到本地
out_path= 'C:/Users/lenovo/Desktop/inv_2/output-1.xlsx'
writer = pd.ExcelWriter(out_path, engine='xlsxwriter')
df.to_excel(writer,'Sheet1')
writer.save()
循环读取目录Xlsx文件
import os,re
import pandas as pd
dir_path = 'C:/Users/lenovo/Desktop/email'
#col = [] ##选取需要的字段,看了下不是所有的字段都需要的
#df = pd.DataFrame([],columns=col)
df = pd.DataFrame([])
for root,dirs,files in os.walk(dir_path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if '.xls' in file_name: #判定是不是文件是否有o2o_order结尾的文件,是的就继续,不是的就退出了;
path = os.path.join(root,file_name)
print('正在处理的文件是%s'%(path))
try:
xlsx_file = pd.ExcelFile(path) ##路径
data = xlsx_file.parse('data') ##选取表
df_tmp = pd.DataFrame(data)
print(df_tmp.head())
df = pd.concat([df,df_tmp], ignore_index=True, sort=True) ## 数据合并
print(df.shape)
except:
print('读取文件,处理数据失败')
else:
print('warning:非.xlsx文件不读取')
#print(df.head())
#保存到本地
out_path= 'C:/Users/lenovo/Desktop/email/output-1.xlsx'
writer = pd.ExcelWriter(out_path, engine='xlsxwriter')
df.to_excel(writer,'Sheet1')
writer.save()
读取剪贴板数据
# 读取数据
import pandas as pd
df=pd.read_clipboard()