Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import argparse
import os # for file walks
import sys # for error handling
import traceback
import patoolib # to extract archives and compressed data, depending on their file type
import magic # to determine files type not be extension but by their content. mimetype won't work, cannot recognize DICOM files
RUNS = 0
MAX_RUNS = 10
parser = argparse.ArgumentParser()
parser.add_argument('dir', help="directory to extract all files within")
args = parser.parse_args()
def compressed_data_exists(dir):
for root, subdirs, files in os.walk(os.path.abspath(dir)):
for fname in files:
ftype = magic.from_file(os.path.join(root, fname))
if 'compressed data' in ftype or 'archive data' in ftype:
return True
return False
errors = {}
noarchives = []
while compressed_data_exists(args.dir) and RUNS < MAX_RUNS:
for root, subdirs, files in os.walk(os.path.abspath(args.dir)):
for fname in filter(lambda fn: fn not in errors and fn not in noarchives, files):
abs_fname = os.path.join(root, fname)
ftype = magic.from_file(abs_fname)
if 'compressed data' in ftype or 'archive data' in ftype:
try:
h,t = os.path.split(fname)
patoolib.extract_archive(abs_fname, outdir=os.path.join(root,h))
except FileExistsError as e:
print(f"Could not extract {abs_fname}, destination already exists", file=sys.stderr)
except Exception as e:
print(f"Failure while extracting: {abs_fname}", file=sys.stderr)
print(f"Error: {sys.exc_info()[0]}", file=sys.stderr)
print(e)
errors[fname]=e
continue
else:
os.remove(abs_fname) # remove the compressed data, as it is extracted now
else:
noarchives.append(fname)
RUNS += 1
print(f'{RUNS} runs completed')
if errors:
print('There were errors with the following files:')
for k,v in errors.items():
print(f'file: {k}')
print(f'cmd: {v}')
sys.exit(1)