Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ipd_extended/runExperiment.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
216 lines (171 sloc)
6.18 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Jun 07 14:27:18 2017 | |
@author: PatAd | |
""" | |
import os | |
import queue | |
import re | |
import subprocess | |
import threading | |
import multiprocessing | |
import sys | |
import signal | |
import time | |
import psutil | |
import main | |
import pandas as pd | |
import discretization_quality_measure as dqm | |
import data_generator as dg | |
newRun = None | |
nbThreads = int(multiprocessing.cpu_count() / 2) | |
# nbThreads = 1 | |
onlyListTasks = False | |
showOutput = False | |
# if "--continue" in sys.argv or "-c" in sys.argv: | |
# newRun=False | |
# elif "--start" in sys.argv or "-s" in sys.argv: | |
# newRun = True | |
# else: | |
# raise RuntimeError("Missing information if starting or continuing experiments") | |
if "--list" in sys.argv or "-l" in sys.argv: | |
onlyListTasks = True | |
if "--verbose" in sys.argv or "-v" in sys.argv: | |
showOutput = True | |
if "--threads" in sys.argv: | |
nbThreads = int(sys.argv[sys.argv.index("--threads") + 1]) | |
if "-t" in sys.argv: | |
nbThreads = int(sys.argv[sys.argv.index("-t") + 1]) | |
items = multiprocessing.Queue() | |
class UnregisteredItem(Exception): | |
pass | |
with multiprocessing.Manager() as manager: | |
class Loader(): | |
def __init__(self): | |
self.dataset = manager.dict() | |
self.ideal_discs = manager.dict() | |
self.global_lock = multiprocessing.RLock() | |
self.dataset_locks = {}#manager.dict() | |
self.ideal_disc_locks = {}#manager.dict() | |
def load_ideal_disc(self, name): | |
if not name in self.ideal_disc_locks: | |
raise UnregisteredItem('Unregistered ideal discretization shall be loaded ', name) | |
with self.ideal_disc_locks[name]: | |
if not name in self.ideal_discs: | |
self.ideal_discs[name] = dqm.parse_cuts(name) | |
return self.ideal_discs[name] | |
def load_dataset(self, path, delim): | |
if not path in self.dataset_locks: | |
raise UnregisteredItem('Unregistered dataset shall be loaded ', path) | |
with self.dataset_locks[path]: | |
if not path in self.dataset: | |
self.dataset[path] = pd.read_csv(path, delimiter=delim, header=None, na_values='?') | |
return self.dataset[path] | |
def register_dataset(self, path): | |
with self.global_lock: | |
if path not in self.dataset_locks: | |
self.dataset_locks[path] = multiprocessing.RLock() | |
def register_ideal_disc(self, name): | |
with self.global_lock: | |
if name not in self.ideal_disc_locks: | |
self.ideal_disc_locks[name] = multiprocessing.RLock() | |
loader = Loader() | |
# todo items.put(WHATEVER PARAMETERS OF TASK) | |
params = dg.produce_all_data_generators() | |
for data_generator in params: | |
items.put(data_generator) | |
# params = main.collect_experiment_params("logs_test") | |
if len(params) == 0: | |
print("no parameters collected!") | |
exit(0) | |
# for param in params: | |
# loader.register_dataset(param.data_file) | |
# loader.register_ideal_disc(param.experiment_name) | |
# items.put(param) | |
if onlyListTasks: | |
while not items.empty(): | |
para = items.get() | |
print(para) | |
nbTasksTotal = len(params) | |
nbTasksDone = [0] | |
counterLock = multiprocessing.RLock() | |
paramQueueLock = multiprocessing.RLock() | |
runningMain = True | |
datasets = multiprocessing.Queue() | |
def worker(worker_id): | |
global items, datasets, counterLock, nbTasksTotal, nbTasksDone, runningMain | |
print('Worker ID ', worker_id, ' is born') | |
while True: | |
# while psutil.virtual_memory().available < 10 * 1024**3: | |
while psutil.virtual_memory().percent > 90: | |
if not runningMain: | |
return | |
print('sleep for 10 seconds') | |
time.sleep(10) | |
para = None | |
print('Worker ID ', worker_id, ' awaits parameters') | |
try: | |
with paramQueueLock: | |
para = items.get(block=False) | |
except queue.Empty: | |
return | |
print('Worker ID ', worker_id, 'is executing', para) | |
# todo generate data sets | |
datasets.put(para.build()) | |
# datasets.put(main.execute(para, loader)) | |
print('Worker ID ', worker_id, ' execution finished') | |
with counterLock: | |
if runningMain: | |
nbTasksDone[0] += 1 | |
print("Jobs done ", nbTasksDone[0], "/", nbTasksTotal) | |
# items.task_done() | |
def datasetWriter(): | |
global datasets, nbTasksDone, nbTasksTotal, runningMain | |
if nbTasksTotal < 1: | |
return | |
while True: | |
while True: | |
try: | |
result = datasets.get(block=True, timeout=10) | |
# todo store | |
dg.store(result) | |
# main.store(result, loader) | |
except queue.Empty: | |
break | |
if datasets.empty() or not runningMain: | |
break | |
with counterLock: | |
if nbTasksDone[0] == nbTasksTotal and datasets.empty() or not runningMain: | |
break | |
def receive_sig_int(signum, frame): | |
global items, datasets, runningMain | |
print("Received SigInt") | |
runningMain = False | |
with items.mutex: | |
items.queue.clear() | |
with datasets.mutex: | |
datasets.queue.clear() | |
print("Processed SigInt") | |
signal.signal(signal.SIGINT, receive_sig_int) | |
print('nbThreads', nbThreads) | |
print('Tasks to do: ', nbTasksTotal) | |
# threads = [] | |
# for i in range(nbThreads): | |
# t = threading.Thread(target=worker) | |
# threads.append(t) | |
# t.daemon = True | |
# t.start() | |
t = threading.Thread(target=datasetWriter) | |
# # threads.append(t) | |
t.daemon = True | |
t.start() | |
nbProcesses = min(nbThreads, len(params)) | |
with multiprocessing.Pool(nbProcesses) as pool: | |
pool.map(worker, [i for i in range(nbProcesses)]) | |
# pool.map(datasetWriter, [0]) | |
pool.close() | |
pool.join() | |
print("Writing") | |
t.join() | |
#for t in threads: | |
# t.join() |