python 遍历 统计文件尺寸,列出最大25个
原文来自newsmth python版
#coding=utf-8
#找到当前目录前MACX(default:25)位占用空间最大的文件-多线程
import os,os.path
from ConfigParser import RawConfigParser as rcp
from threading import Thread,Lock
import sys
import time
MAXC=25
def insert(tenmax,filename,filesize):
i=0
while i<len(tenmax) and tenmax[i][1]>filesize:
i+=1
tenmax.insert(i,[filename,filesize])
def keepTenMax(tenmax,filename,filesize):
i=len(tenmax)
if i<MAXC:
insert(tenmax,filename,filesize)
elif filesize>tenmax[i-1][1]:
insert(tenmax,filename,filesize)
tenmax.pop()
elif filesize==tenmax[i-1][1]:
tenmax.append([filename,filesize])
if len(tenmax)>MAXC:
tenmax=tenmax[:MAXC]
else:
return
class MyThread(Thread):#多线程搜索
def __init__(self,root,files,tname):
Thread.__init__(self)
self.root=root
self.files=files
self.name=tname
self.tenmax=[]
def run(self):
global count
for f in self.files:
filename=self.root+os.sep+f
filesize=os.stat(filename).st_size
mylock.acquire() #获得锁
count+=1
mylock.release() #释放锁
keepTenMax(self.tenmax,filename,filesize)
def allDone(threadlist):
for i in threadlist:
if i.isAlive():
return False
return True
#main
if __name__=="__main__":
global count #全局 文件计数
mylock=Lock() #define a lock
count=1
tenmax=[]
threadlist=[]
if len(sys.argv)==1:
wdir='.'
elif len(sys.argv)==2:
wdir=sys.argv[1]
else:
print 'usage:find--.py [wdir]'
sys.exit()
tname=1
begin=time.time()
for root,dirs,files in os.walk(wdir):
if files:
sthread=MyThread(root,files,tname)#对每一个目录开启一个线程搜索
threadlist.append(sthread)
sthread.start()
print 'thread-'+str(tname)+'-start search dir:'+root
tname+=1
for t in threadlist:
t.join()
if allDone(threadlist):#统计结果
for i in threadlist:
tenmax.extend(i.tenmax)
if len(tenmax)<MAXC:
tenmax=tenmax[:len(tenmax)]
else:
a=[]
for f in tenmax:
keepTenMax(a,f[0],f[1])
tenmax=a
#打印并输出到parser文件-当前目录下的result.ini文件
print
print '[=========================threads count',len(threadlist),'====================]'
print '[=========================try',count,'files=======================]'
print '[=========================the ',MAXC,' thMax files list===========]'
print
c=1
myrcp=rcp()
myrcp.add_section('Result')
for fname,fsize in tenmax:
size='%.3fMB' % (fsize/1024.0/1024.0)
print '[%d]%s-%s' % (c,fname,size)
myrcp.set('Result','[%d]%s' % (c,fname),size)
c+=1
myrcp.write(open('result.ini','w'))
end=time.time()
usetime=end-begin
print
print '[=============================================================]'
print 'all time:%.3fs' % usetime
这里用了多线程, 值得学习一下, 执行的速度是真快啊
浙公网安备 33010602011771号