from multiprocessing.pool import Pool
import boto3
import os
client = boto3.client('s3')
response = client.list_buckets()
buckets = response['Buckets']
bucket_list = [item['Name'] for item in buckets]
# 从桶中获取二级目录,这里会把object也统计进来,如果不需要可以进行过滤
def get_catalog_list(bucket='bucket'):
s3_ls_cmd = "aws s3 ls s3://{bucket}".format(bucket=bucket)
p = os.popen(s3_ls_cmd)
ret = p.read()
p.close()
catalog_list = [item.lstrip() for item in ret.split('\n')]
s3_key_list = ["s3://{bucket}/{key}".format(bucket=bucket, key=catalog[4:]) for catalog in catalog_list if
catalog != '']
print(s3_key_list)
return s3_key_list
# 从目录中获取存储大小
def get_catalog_size(catalog='s3://bucket/key/'):
s3_cmd = "aws s3 ls {catalog} --recursive ".format(catalog=catalog)
print(s3_cmd)
p = os.popen(s3_cmd)
ret = p.read()
p.close()
r1 = ret.split('\n')
r2 = [int(rs.split()[2]) for rs in r1 if rs != '']
return int(sum(r2) / 1024 / 1024)
def save(bucket='bucket'):
f = bucket + '.txt'
with open(f, "a") as file:
for catalog in get_catalog_list(bucket):
size = get_catalog_size(catalog)
str1 = catalog + "," + str(size) + "\n"
print(str1)
file.write(str1)
if __name__ == '__main__':
p = Pool(8)
for bucket in bucket_list:
p.apply_async(save, args=(bucket,))
p.join()
p.close()