'''
Created on Feb 22, 2013
@author: changxue
@summary: extract all archive files in src_dir into target_dir
'''
import os, shutil
_list_postfix = ['.rar', '.zip', '.jar']
tool_path = os.path.join(r'C:\Program Files\7-Zip')
from common_util import _create_dirs, _execute_cmd, _printlog, _get_dirname_from_filename
def is_archive_valid(file_name):
''' to evaluate whether the file could be extracted(True) or not(False).'''
for postfix in _list_postfix:
if file_name.endswith(postfix):
return True
return False
def get_type_src(full_filename):
''' to check whether there are .class files or .java files.
return bin if only there is one .class file.'''
result = ''
os.chdir(tool_path)
command = '7z l %s > temp.txt' %(full_filename) # list the content of archive
os.system(command)
with open('temp.txt', 'r') as f:
fhandler = f.read()
_printlog(fhandler)
if fhandler.find('.class') != -1:
result = 'bin'
elif fhandler.find('.java') != -1:
result = 'src'
os.remove('temp.txt')
return result
def extract_archive(archive_file_path, target_dir):
os.chdir(tool_path)
command = '7z x %s -o%s' % (archive_file_path, target_dir)
_execute_cmd(command)
def extract_single(file_name, src_path, target_dir):
''' extract non-jar archive files into target directory.'''
dirname = _get_dirname_from_filename(file_name)
if os.path.exists(os.path.join(target_dir, 'src', dirname)):
raise Exception('WARNING: directory already exist: %s\n'%os.path.join(target_dir, 'src', dirname))
elif os.path.exists(os.path.join(target_dir, 'bin', dirname)):
raise Exception('WARNING: directory already exist: %s\n'%os.path.join(target_dir, 'bin', dirname))
else:
archive_file_path = os.path.join(src_path, file_name)
file_type = get_type_src(archive_file_path)
if not file_type:
dst_dir = os.path.join(src_path, 'old', 'invalid')
_create_dirs(dst_dir)
shutil.move(archive_file_path, dst_dir)
raise Exception('WARNING: no java or class file in %s, MOVE TO old/invalid.\n'%archive_file_path)
dest = os.path.join(target_dir, file_type, dirname)
os.makedirs(dest)
extract_archive(archive_file_path, dest)
def extract_all(src_path, extract_dir):
''' Main Entry:
process all archive files in src_path:
.jar: copy it to jar_dir and extract into specified dir(bin)
others: extract into specified dir(src or bin)
'''
_printlog('############## Extract starting ##############\n')
total_count = 0
extract_list =[]
os.chdir(src_path)
for f in os.listdir(src_path):
archive_file_path = os.path.join(src_path, f)
try:
if not os.path.isfile(archive_file_path):
raise Exception('%s is not a file.\n'%f)
else:
total_count += 1
if is_archive_valid(archive_file_path):
_printlog('Extracting %s\n'% archive_file_path)
extract_single(f, src_path, extract_dir)
extract_list.append(f)
except Exception, e:
_printlog(e)
continue
_printlog('total_count=%s\n'%total_count)
_printlog('success_count=%s\n'%len(extract_list))
_printlog('success_list=\n%s'%'\n'.join(extract_list))
_printlog('############## Extract end ##############\n')
if __name__ == '__main__':
usage = '''Usage: extractAll.py archive_path target_dir'''
import sys
argv_len = len(sys.argv)
if argv_len == 1:
src_path = 'C:\\works\\workload_src'
target_dir = "C:\\works\\workload_out"
extract_all(src_path, target_dir)
elif argv_len == 3:
extract_all(sys.argv[1], sys.argv[2])
else:
print usage