pandas针对某一列循环批量下载
根据values实现批量下载
from py_function_tools import odps_read_sql,write_excle,write_to_database,hive_read_sql
from py_function_tools import odps_write_dataframe,odps_read_table,write_database_from_odps
import time
#df_v1 = odps_read_sql(sql) # 读取odps数据集
#df_v2 = hive_read_sql(sql) # 读取hive数据集,需要手动指定表所在的数据库
#write_excle(df,file_name) #默认保存在downloads目录下
#write_to_database(df,table_name) #存到mysql中ypp数据库
#from pandas_dataframe_agg import dataframe_agg
#table =dataframe_agg(df,dimensions,func=func) # groupby 处理
def write_csv(to_path,data):
data.to_csv(to_path,index=False, encoding='utf_8_sig')
return 'successful write'
i_list = ["202001","202002","202003","202004","202005","202006","202007","202008","202009","202010","202011","202012"]
if __name__ == "__main__":
path = '/Users/xxx/Downloads'
for i in i_list:
start_time = time.time() # 开始时间
print(i)
sql = """
select *
from temp_0205_kelly
where month = '{m}'
""".format(m=i)
df_v1 = odps_read_sql(sql) # 读取odps数据集
print('*'*10)
print(sql)
table_name = 'kelly_month'+i
print(table_name)
file_name ='{table_name}.csv'.format(table_name=table_name)
print(file_name)
to_path = path+"/"+file_name
print(to_path)
write_csv(to_path,df_v1)
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
补充:根据sql的where条件循环下载

浙公网安备 33010602011771号