diff --git a/CoMIK b/CoMIK new file mode 100755 index 0000000..35312d0 Binary files /dev/null and b/CoMIK differ diff --git a/README.md b/README.md index 44b709a..e3864cf 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,21 @@ sh install.sh ``` ## Usage: +If you have MATLAB: See the example config file `config-comik.txt`, for an example function call from inside Matlab: For simulated dataset 1 provided in the folder `sample_data/simulated_dataset1` ```Matlab comik_wrapper('config-comik.txt'); ``` +For those who do not have MATLAB, we provide an executable which can be run as follows: +``` +./run_CoMIK.sh +./run_CoMIK.sh /usr/lib/matlab-9.0 config-comik.txt +``` +where `/usr/lib/matlab-9.0` is the typical location of the MATLAB runtime on a Linux machine. + + _CoMIK_ requires two FASTA files as input -- the first FASTA file containing sequences in the positive class; the second FASTA file containing the sequences in the negative class. Other params are explained below. Values for the following parameters are required to be set: diff --git a/config-comik.txt b/config-comik.txt index 16c268e..76ea41d 100644 --- a/config-comik.txt +++ b/config-comik.txt @@ -4,9 +4,9 @@ ## Required Input parameters POSITIVE_FASTA_FILE=./sample_data/simulated_dataset1/pos.fasta NEGATIVE_FASTA_FILE=./sample_data/simulated_dataset1/neg.fasta -NUMBER_OF_POSITIVES=600 -NUMBER_OF_NEGATIVES=600 -TEST_INDICES=[501:600 1101:1200] +NUMBER_OF_POSITIVES=500 +NUMBER_OF_NEGATIVES=500 +TEST_INDICES=[401:500 901:1000] OUTPUT_FOLDER=comik_run_simulated_dataset1 ## ODH requirements @@ -26,6 +26,6 @@ NUMBER_OF_INNER_FOLDS=10 NUMBER_OF_OUTER_FOLDS=5 WHETHER_TO_PLOT_HEATMAP=No WHETHER_TO_VISUALIZE_WEIGHT_VECTOR=Yes -DEBUG_LEVEL=0 +DEBUG_LEVEL=2 DEBUG_MSG_LOCATION=runLog.txt COMPUTATION_VERSION=Looping diff --git a/getExpansionPoints.m b/getExpansionPoints.m index 4f14554..48cc6c2 100644 --- a/getExpansionPoints.m +++ b/getExpansionPoints.m @@ -35,9 +35,10 @@ rng('default'); logMessages(debugMsgLocation, sprintf('with K-means: \n'), debugLevel); if debugLevel == 2 - [idx, expansionPoints, sumD, matrixOfDistancesFromCentres] = kmeans(X, nClusters, 'Replicates', rep, 'Display','Iter');%, 'MaxIter',1000);%Display: final + [idx, expansionPoints, sumD, matrixOfDistancesFromCentres] = kmeans(X, nClusters, 'Replicates', rep, 'Display','off'); + % Display could be set to 'final' elseif debugLevel == 0 - [idx, expansionPoints, sumD, matrixOfDistancesFromCentres] = kmeans(X, nClusters, 'Replicates', rep, 'Display','off');%, 'MaxIter',1000); + [idx, expansionPoints, sumD, matrixOfDistancesFromCentres] = kmeans(X, nClusters, 'Replicates', rep, 'Display','off'); end logMessages(debugMsgLocation, sprintf('--- Took %.3f seconds\n',toc), debugLevel); diff --git a/install.sh b/install.sh index 6622bbb..d0bfe1b 100644 --- a/install.sh +++ b/install.sh @@ -2,4 +2,3 @@ chmod +x mkl.py export PATH=".:$PATH" - diff --git a/readFastaSequences.m b/readFastaSequences.m index cea2d72..7014b64 100644 --- a/readFastaSequences.m +++ b/readFastaSequences.m @@ -51,7 +51,7 @@ % Read Sequences and their FASTA legends fid=fopen(txt,'r'); ofid=fopen(strcat(outputFolder, '/ommittedFastaIds.txt'), 'a'); -fprintf(ofid, '----%s----\n', txt); +fprintf(ofid, '----%s---%s----\n\n', fprintf('%s',datetime('now')), txt); lengths = []; while i < givenNSeqs % feof(fid)==0 lineRead=fgetl(fid);