Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
dicomclean/xtract.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
57 lines (49 sloc)
2.12 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os # for file walks | |
import sys # for error handling | |
import traceback | |
import patoolib # to extract archives and compressed data, depending on their file type | |
import magic # to determine files type not be extension but by their content. mimetype won't work, cannot recognize DICOM files | |
RUNS = 0 | |
MAX_RUNS = 10 | |
parser = argparse.ArgumentParser() | |
parser.add_argument('dir', help="directory to extract all files within") | |
args = parser.parse_args() | |
def compressed_data_exists(dir): | |
for root, subdirs, files in os.walk(os.path.abspath(dir)): | |
for fname in files: | |
ftype = magic.from_file(os.path.join(root, fname)) | |
if 'compressed data' in ftype or 'archive data' in ftype: | |
return True | |
return False | |
errors = {} | |
noarchives = [] | |
while compressed_data_exists(args.dir) and RUNS < MAX_RUNS: | |
for root, subdirs, files in os.walk(os.path.abspath(args.dir)): | |
for fname in filter(lambda fn: fn not in errors and fn not in noarchives, files): | |
abs_fname = os.path.join(root, fname) | |
ftype = magic.from_file(abs_fname) | |
if 'compressed data' in ftype or 'archive data' in ftype: | |
try: | |
h,t = os.path.split(fname) | |
patoolib.extract_archive(abs_fname, outdir=os.path.join(root,h)) | |
except FileExistsError as e: | |
print(f"Could not extract {abs_fname}, destination already exists", file=sys.stderr) | |
except Exception as e: | |
print(f"Failure while extracting: {abs_fname}", file=sys.stderr) | |
print(f"Error: {sys.exc_info()[0]}", file=sys.stderr) | |
print(e) | |
errors[fname]=e | |
continue | |
else: | |
os.remove(abs_fname) # remove the compressed data, as it is extracted now | |
else: | |
noarchives.append(fname) | |
RUNS += 1 | |
print(f'{RUNS} runs completed') | |
if errors: | |
print('There were errors with the following files:') | |
for k,v in errors.items(): | |
print(f'file: {k}') | |
print(f'cmd: {v}') | |
sys.exit(1) |