comik_main_with_weight_vector.m

function [allSeqsAsBags, allSeqsConformedSetKernel, subkernelWeights, thetaVals, instanceWeightsInEachBag, resultString, bestParamComb, test_teAUROC, test_teAUPRC, predictions] = comik_main_with_weight_vector(givenPosFastaFilename, givenNegFastaFilename, givenNPos, givenNNeg, oligoLen, maxDist, segmentSizeInBps, nClusterVals, sigmaVals, Cs, mklNorm, nFolds, testIndices, debugLevel, debugMsgLocation, outputFolder, runSummaryFilename, whetherToPlotHeatmap, computationVersion, whetherToVisualizeWVector)

% COMIK_MAIN_WITH_WITH_WEIHT_VECTOR
% Main function for CoMIK
%
% INPUT PARAMS
% Param 'givenPosFastaFilename'
% Name of FASTA file with positive examples
% 
% Param 'givenNegFastaFilename'
% Name of FASTA file with negative examples
%
% Param 'givenNPos'
% Number of positive examples to use (can be less than those provided in file)
%
% Param 'givenNNeg'
% Number of negative examples to use (can be less than those provided in file)
%
% Param 'oligoLen' and 'maxDist'
% Specify the oligomer length and the maximum distance for the ODH 
% representation. Caution: A combination of large values can be memory intensive!
%
% Param 'segmentSizeInBps'
% Specify the segment-size in basepairs for CoMIK
%
% Param 'nClusterVals' (vector)
% Specify the number of clusters for CoMIK
%
% Param 'sigmaVals' (vector)
% Specify the sigma values for the Gaussian transformation
%
% Param 'Cs' (vector)
% Cost values for SVM
%
% Param 'mklNorm'
% Specify p-norm value for MKL
%
% Param 'nFolds' 
% Specify number of inner cross-validation folds
%
% Param 'testIndices' (vector)
% Indices of the sequences in the FASTA file which are to be considered as unseen 
% test examples. For example, with a FASTA file containing a total of 100 
% positive sequences followed by 100 negative sequences, the test indices 
% are given as
%       testIndices = [81:100 181:200]
% for the corresponding 20 positives and 20 negatives to be treated as test 
% examples.
%
% Param 'debugLevel' (0/1/2)
%
% Param 'debugMsgLocation' (1/fileID)
%
% Param 'outputFolder'
% Specify name of folder to write output results to
%
% OUTPUT PARAMS
% Param 'allSeqsAsBags'
% All sequences represented as bags
% 
% Param 'allSeqsConformedSetKernel' (matrix)
% The conformally transformed MI kernel
%
% Param 'subkernelWeights' (vector)
% Weights assigned to each sub-kernel upon solving MKL
%
% Param 'thetaVals' (vector)
% Theta values used for obtaining the instance weights
% 
% Param 'instanceWeightsInEachBag'
% Weights assigned to instances in each bag using the final model
% 
% Param 'resultString'
% A combined string of per iteration results (as printed in the resultSummary file)
% 
% Param 'bestParamComb' (Struct)
% Best performing values for various params
%
% Param 'test_teAUROC'
% AUROC value for the test sequences
%
% Param 'test_teAUPRC'
% AUPRC value for the test sequences
%
% Param 'predictions'
% Prediction vector
%
% ADDITIONAL NOTES
% -- Segmentation
%    . All segments, shifted and non-shifted in one bag; Order of segments: non-shifted segments followed by shifted segments.
%      This results in #kernels = #clusterCentres
% -- 
%
% Author: snikumbh 
%
% debugLevel 2	prints all messages
% debugLevel 1	place holder level, may be used in the future
% debugLevel 0  silent on most of the messages, prints very few messages for the user

% Setting of seed for the Matlab session for reproducibility: already done in the wrapper function.

% Handle arguments and values/defaults
totalArguments = 20;
predictUsingWeightVector = 1;
if nargin < totalArguments
    whetherToVisualizeWVector = 'Yes';
end
if nargin < totalArguments - 1
    whetherToVisualizeWVector = 'Yes';
    computationVersion = 'Looping';
end
if nargin < totalArguments - 2
   whetherToVisualizeWVector = 'Yes';
   computationVersion = 'Looping';
   whetherToPlotHeatmap = 'No';
end

% Read positives
logMessages(debugMsgLocation,sprintf('Positives...'), debugLevel);
[nPos, omittedPosIndices, allSeqsRawPosFasta] = readFastaSequences(givenPosFastaFilename, segmentSizeInBps, givenNPos, outputFolder, debugLevel, debugMsgLocation);
% nPos and nNeg could be smaller than or equal to givenNPos and givenNNeg resp. 
% due to omitting any sequences whose length is less than the segmentSizeInBps
%
% Read Negatives
logMessages(debugMsgLocation,sprintf('Negatives...'), debugLevel);
[nNeg, omittedNegIndices, allSeqsRawNegFasta] = readFastaSequences(givenNegFastaFilename, segmentSizeInBps, givenNNeg, outputFolder, debugLevel, debugMsgLocation);
%
% Combine positives + negatives
nBags = nPos + nNeg;
% Check if we need to update the testIndices if some sequences have been omitted
% if posIndices and negIndices are empty, then no updates required
if ~isempty(omittedPosIndices)
    %nOmmitted = length(find(ismember(testIndices, omittedPosIndices)));
    testIndices(find(ismember(testIndices, omittedPosIndices))) = [];
    %logMessages(debugMsgLocation,sprintf('%d positive sequences omitted from test set.\n', nOmmitted), debugLevel);
end
if ~isempty(omittedNegIndices)
    %nOmmitted = length(find(ismember(testIndices, omittedNegIndices)));
    testIndices(find(ismember(testIndices, nPos+omittedNegIndices))) = [];%offset by nPos
    %logMessages(debugMsgLocation,sprintf('%d negative sequences omitted from test set.\n', nOmmitted), debugLevel);
end
% Additionally, one needs to account for the reduction in the number of sequences affecting the indices. Remove indices > nBags 
for i=1:length(testIndices)
    testIndices(i) = testIndices(i) - nnz(omittedPosIndices < testIndices(i)) - nnz( (nPos+omittedNegIndices) < testIndices(i));
end
% Put them together
allSeqsRawFasta.sequence = cell(1, nBags);
for i=1:nPos
    allSeqsRawFasta.sequence{i} = allSeqsRawPosFasta.sequence{i};
end
clear allSeqsRawPosFasta;
for i=1:nNeg
    allSeqsRawFasta.sequence{i+nPos} = allSeqsRawNegFasta.sequence{i};%offset by nPos
end
clear allSeqsRawNegFasta;
% Write the revised set of test indices to disk to support reproducibility

if ~isempty(omittedPosIndices) | ~isempty(omittedNegIndices)
    sortedTestIndices = sort(testIndices);
    dlmwrite(strcat(outputFolder, '/testIndices_New.txt'), sortedTestIndices');
end

% generate labels
Y = [ones(1, nPos) -ones(1, nNeg)];
logMessages(debugMsgLocation,sprintf('#Bags: %d\n', nBags), debugLevel);

% default for 'acgt'
alphabet = 'acgt';
% setting default value for segmentSizePercentage. 
% Currently, this is the only valid value.
segmentSizeInPercentage = 0; 
logMessages(debugMsgLocation,sprintf('Collecting all bags...'), debugLevel);
tic;
idx1 = 0;
thisInstances = [];

for k=1:nBags
    if oligoLen >= 3
	% because, for oligoLen 3 or larger, this may take some time.
	if rem(k,100) == 0
            logMessages(debugMsgLocation,sprintf('%d--', k), debugLevel);
	end
    end
    if any(k == testIndices)
	% do-nothing now, we collect them later
	% Collection of test sequences can take place later, 
	% after the whole training stage is completed.
    else
	idx1 = idx1 + 1;
	%% Non-shifted
        [tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
                                        oligoLen, maxDist, segmentSizeInPercentage, ...
                                        segmentSizeInBps, 'no-shift', alphabet);
	%% Shifted
	[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
                                        oligoLen, maxDist, segmentSizeInPercentage, ...
                                        segmentSizeInBps, 'shift', alphabet);
	allSeqsAsBags{idx1} = [tempNS tempS];
	thisInstances = [thisInstances (NS + S)];
    end
end
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
nBagsForTrain = idx1; 
nBagsForTest = nBags-idx1;
logMessages(debugMsgLocation,sprintf('#Test: %d\n#Train: %d\n', nBagsForTest, nBagsForTrain), debugLevel);
logMessages(debugMsgLocation,sprintf('Segmentation statistics:\n'), debugLevel);
logMessages(debugMsgLocation,sprintf(' Mean number of instances in bags: %.2f\t Median: %d\t Max.: %d\t Min.: %d\n', mean(thisInstances), median(thisInstances), max(thisInstances), min(thisInstances)), debugLevel);
% Variable 'nBags' holds nBagsForTrain + nBagsForTest
%
% 2. Compute the instanceWide kernel that is used further
%
instanceEnds = cumsum(thisInstances);
instanceStarts = cumsum(thisInstances) - thisInstances + 1;
nInstances = instanceEnds(end);
logMessages(debugMsgLocation,sprintf('Total instances from all bags: %d\n', nInstances), debugLevel);

% Using the sparseComputation flag for computing the instance-wide kernel.
% IMP: setting sparseComputation to '0' uses the brokendown approach to compute the instance-wide kernel.
% This hasn't been tested for either correctness or efficiency. We recommend using sparseComputation.
% We are using sparseComputation.
sparseComputation = 1;
[instanceStarts, instanceEnds, instanceWideKernel] = computeInstanceWideKernel(allSeqsAsBags, thisInstances, sparseComputation, debugLevel, debugMsgLocation);
%
% Further are functions that will be recomputed per inner cross-validation iteration
% Cross-validation loop is also handled simultaneously
%
% 3. Computing the conformed multi-instance kernel is done via multiple kernel 
%     learning
%  -- conformalXformationParam is the Gaussian bandwidth (sigma) we wish to use
%
resultString = [];
bestParamComb.best_C = 0.0; 
bestParamComb.best_teAUROC = 0.0;
bestParamComb.best_nClusters = 0;
bestParamComb.best_sigma = 0.0;

% open summary file for writing
fid=fopen(runSummaryFilename, 'a');

Youtertest = Y(testIndices);
trainIndicesInY = setdiff([1:nBags], testIndices);
trainIndicesInBags = [1:nBagsForTrain];
YwholeTrain = Y(trainIndicesInY);

nPosTr = size(find(YwholeTrain > 0),2);
nNegTr = size(find(YwholeTrain < 0),2);
% handle imbalance, multiply this to C for negatives
imbalance = nPosTr/nNegTr;
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);

for nc=1:size(nClusterVals,2)
  for sigmaItr=1:size(sigmaVals,2)
    nClusters = nClusterVals(1,nc);
    conformalXformationParam = sigmaVals(1, sigmaItr);
    % send only YwholeTrain and training set of bags
    %fprintf('Seed-setting: 2\n');
    rng(11, 'twister');
    foldIdForIndices = cvpartition(YwholeTrain, 'Kfold', nFolds);
    % ^returns logicals
    %
    % 4. All kernels ready. Pose this as a multiple kernel learning problem next.
    %    - We have used shogun's python modular interface to perform MKL.
    %    - Upon performing this MKL, the kernel weights obtained are squared theta values (see paper for details)
    %
    for c=1:size(Cs,2)
	C = Cs(c);
        test_auROCs = []; test_auPRCs = [];
        logMessages(debugMsgLocation,sprintf('C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', C, nClusters, conformalXformationParam), debugLevel);
	for f=1:nFolds
	    logMessages(debugMsgLocation,sprintf('Fold: %d\n', f), debugLevel);
	    logMessages(debugMsgLocation,sprintf('#training indices: %d\n', length(find(foldIdForIndices.training(f) == 1))), debugLevel);
	    logMessages(debugMsgLocation,sprintf('#test indices: %d\n', length(find(foldIdForIndices.test(f) == 1))), debugLevel);
	    % For fold f, corresponding examples will be used as test
	    thisTrainIndices = find(foldIdForIndices.training(f) == 1); 
	    thisTestIndices = find(foldIdForIndices.test(f) == 1);
	    %
	    % Computation of conformal MI kernel also inside folds loop, because it shouldn't 
	    % use instances from the test bags.
	    % This restriction is for clustering step, rest, the kernel has to be computed for 
	    % all (wholeTrain) indices.
	    % For example, trainIndicesInBags = 1:1000 % row vector
            trainIndicesForConformalComputation = thisTrainIndices;
            logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...'), debugLevel);
	    tic;
            [allSeqsConformedSetKernel, rawConformedSetKernel , allSeqsTransformationKernel] = ...
						computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, ...
						instanceWideKernel, trainIndicesForConformalComputation, allSeqsAsBags, ...
						nClusters, conformalXformationParam, YwholeTrain, ...
						computationVersion, debugLevel, debugMsgLocation);
            logMessages(debugMsgLocation,sprintf('\ndone in %.3f seconds\n', toc), debugLevel);
            %
            logMessages(debugMsgLocation,sprintf('MKL, Fold %d, C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', f, C, nClusters, conformalXformationParam), debugLevel);
	    Ytrain = YwholeTrain(thisTrainIndices);  
	    Ytest = YwholeTrain(thisTestIndices);
	    for k=1:nClusters
	 	% Conformed kernels
                trainConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTrainIndices);
                testConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTestIndices);
		%
	    end
	    % perform MKL: weights, predictions should be returned after mkl
	    whetherPerformTest = 1; % set 1 to use test kernels to make predictions
	    logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
            [subkernelWeights, predictions] = performMKLWithShogunPython(C, Ytrain, Ytest, mklNorm, imbalance, ...
						trainConformedKernel, testConformedKernel, outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
	    % both subkernelWeights and predictions are zero vectors, if solving MKL failed. Treat this here.
	    if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
	        % set the following variables to 0.0. a) this_teAUROC, b) this_teAUPRC
	        this_teAUROC = 0.0;
 	        this_teAUPRC = 0.0;
	    else %solving MKL was fine
                %
                logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
	        [this_teAUROC, this_teAUPRC] = libsvm_plotroc(Ytest', predictions', 'personal');
	        % print this to the summary file
	    end
	    this_resultString = [datestr(now), '--#Clusters:', num2str(nClusters), '--SigmaGaussian:', ...
					num2str(conformalXformationParam), '--C-SVM:', num2str(C), '--mklNorm:', ...
					num2str(mklNorm), '--teAUROC:', num2str(this_teAUROC), '--teAUPRC:', ...
					num2str(this_teAUPRC), '\n'];
	    resultString = [resultString this_resultString];
	    fprintf(fid, this_resultString);
	    test_auROCs = [test_auROCs this_teAUROC];
	    test_auPRCs = [test_auPRCs this_teAUPRC];
	end % nFolds for loop ends
	% Note the best values based on the average performance on the n-folds
	if mean(test_auROCs) >= bestParamComb.best_teAUROC
	    logMessages(debugMsgLocation,sprintf('mean_teAUROC: %.4f\n', mean(test_auROCs)), debugLevel);
	    bestParamComb.best_teAUROC = mean(test_auROCs);
	    bestParamComb.best_C = C;
	    bestParamComb.best_nClusters = nClusters;
	    bestParamComb.best_sigma = conformalXformationParam;
	end
    end % for loop for different values of C ends
    % -- Noted: best performing pair of values for nClusters and Gaussian RBF sigma
    % -- Model selected after cross-validation will be used further to get the instance weights
  end % for loop for different values of sigma ends
end % for loop for different values of nClusters ends
fclose(fid);

% Clear variables created/used during model selection phase, and not required any more
clear trainConformedKernel;
clear testConformedKernel;
%
% 5. Use the best param combinations to train using the whole trainiing set and predict the test set here
%
logMessages(debugMsgLocation,sprintf('---Re-training and Test---\n'), debugLevel);
% Re-train the model with all train instances together
% trainIndicesInBags is 1:nBagsForTrain
nClusters = bestParamComb.best_nClusters;
conformalXformationParam = bestParamComb.best_sigma;
logMessages(debugMsgLocation,sprintf('Best param-values:\nSVM-Cost:%.3f, nClusters: %d, sigma: %.3f\n', nClusters, conformalXformationParam), debugLevel);
logMessages(debugMsgLocation,sprintf('Re-training using complete trainning set examples...\n'), debugLevel);

logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...\n'), debugLevel);
[allSeqsConformedSetKernel, rawConformedSetKernel, allSeqsTransformationKernel, clusterCentres] = ...
						computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, instanceWideKernel, ...
						trainIndicesInBags, allSeqsAsBags, nClusters, conformalXformationParam, ...
						YwholeTrain, computationVersion, debugLevel, debugMsgLocation);
%
%
for k=1:nClusters
    % Conformed kernels
    VtrainConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
    VtestConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags); 
    %dummy place holder; hence train-by-train instead of test-by-train
end
%
whetherPerformTest = 0; 
% We now set this to 0, since we will make predictions using computed weight vector and not the kernel
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
%
[subkernelWeights, predictions] = performMKLWithShogunPython(bestParamComb.best_C, YwholeTrain, Youtertest, mklNorm, imbalance, ...
						VtrainConformedKernel, VtestConformedKernel, ...
						 outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% Ideally, this is not needed here, since such param values won't (or shouldn't) be selected as best values
% If that does happen, something is probably wrong.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
    % set the following variables: train_AUROC, train_AUPRC
    train_AUROC = 0.0;
    train_AUPRC = 0.0;
else %solving MKL was fine
    % Predictions returned at this juncture are predictions for the training examples, they are not test predictions.
    [train_AUROC, train_AUPRC] = libsvm_plotroc(YwholeTrain', predictions', 'personal');
end
logMessages(debugMsgLocation,sprintf('train_AUROC with whole training set: %.4f\n', train_AUROC), debugLevel);
% Below, we make our own predictions for the test examples using the weight vector

% 6.1 We will now collect the test examples
tic;
logMessages(debugMsgLocation, sprintf('Now collecting the unseen test examples...'), debugLevel);
idx1 = 0;
for k=1:nBags
    if any(k == testIndices)
	% Collection of test sequences takes place here, 
	% now that the whole training stage is completed.
        idx1 = idx1 + 1;
	%% Non-shifted
        [tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
					oligoLen, maxDist, segmentSizeInPercentage, ...
					segmentSizeInBps, 'no-shift', alphabet);
	%% Shifted
	[tempS, S]  = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
                                        oligoLen, maxDist, segmentSizeInPercentage, ...
                                        segmentSizeInBps, 'shift', alphabet);
        allSeqsAsBags_Test{idx1} = [tempNS tempS];
    end
end
clear allSeqsRawFasta;
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
% clears up some space useful to load the many test examples as is typical in problems in computational biology
if(predictUsingWeightVector == 1)
    logMessages(debugMsgLocation, sprintf('---Prediction using weight vector---\n'), debugLevel);
    % 6.2 Use the subkernel weights and get transformed instances among the test examples
    % instanceWeights for instances in training bags
    % filename passed as argument to getInstanceWeights is used to write them to file.
    [thetaVals, instanceWeightsInEachBag] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel);
    % Get the transformations for test examples, and the diagonal elements which will be used for normalization further
    % Should one proceed block-wise at this juncture if the number of test examples is extremely large as is typically the case?
    %
    tic;
    logMessages(debugMsgLocation, sprintf('Applying transformation to the test samples, this might take some time...\n'), debugLevel);
    [allSeqsTransformationKernel_Test] = applyTransformation(allSeqsAsBags_Test, clusterCentres, conformalXformationParam, debugLevel, debugMsgLocation);
    logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
    %
    % Getting the instance weightings for the test examples
    [thetaVals, instanceWeightsInEachBag_Test] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel_Test);
    % At this point, all (train + test examples) instance weights have been obtained.
    % 
    % 6.3 Use function obtainWeightVector and procure the weight vectors and the bias value
    % The weight vector need not be ordered by rank. Rather, we need it by its original order.
    tic;
    logMessages(debugMsgLocation, sprintf('Computing the weight vector...'), debugLevel);
    [overallWeightVector, weightVectors, biasValue] = obtainWeightVector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
    logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
    %% Don't need allSeqsAsBags, allSeqsTransformationKernel?
    % clear allSeqsAsBags allSeqsTransformationKernel;
    %
    % 6.4 Use these to make predictions using y = w*x + b
    yhatTemp = 0.0;
    %
    % bagPhi for Test is nFeatures-by-nBagForTest-by-nClusters
    bagPhi_Test = zeros(  size(allSeqsAsBags_Test{1},1), nBagsForTest, nClusters);
    %
    % For normalization of test samples, we need to obtain the normalization factor for them.
    % 
    for i=1:length(allSeqsAsBags_Test)% this is nBagsForTest
        for j=1:nClusters
            % -- allSeqs TransformationKernel already has nInstances x nClusters.
	    % -- Also need normalization
   	    term = sum(allSeqsAsBags_Test{i} .* repmat(allSeqsTransformationKernel_Test{i}(:,j)', size(allSeqsAsBags_Test{i},1), 1), 2);
            bagPhi_Test(:, i, j) = term ./ norm(term, 2);
        end
    end
    logMessages(debugMsgLocation,sprintf('Bag Phi for test samples done.\n'), debugLevel);
    dlmwrite(strcat(outputFolder,'/WeightVectors.txt'), weightVectors');
    for i=1:nClusters
        yhatTemp = yhatTemp + (weightVectors(:, i)' * bagPhi_Test(:, :, i));
    end
    % 
    logMessages(debugMsgLocation,sprintf('Bias value: %.4f\n', biasValue), debugLevel);
    yhat = yhatTemp + biasValue;
    if(length(Youtertest) == length(yhat))
        logMessages(debugMsgLocation, sprintf('%d and %d: Dimensions of yhat and ytest match!\n', length(Youtertest), length(yhat)), debugLevel);
    end
    dlmwrite(strcat(outputFolder,'/predictedLabelsWeightVector.txt'), yhat');
    dlmwrite(strcat(outputFolder,'/givenLabels.txt'), Youtertest');
    logMessages(debugMsgLocation,sprintf('Predicted labels written to disk. Computing the auROC/auPRC, this may take some time...\n'), debugLevel);
    tic;
    [test_teAUROC, test_teAUPRC] = libsvm_plotroc(Youtertest', yhat', 'personal');
    logMessages(debugMsgLocation,sprintf('done in %.3f\n', toc), debugLevel);
    logMessages(debugMsgLocation,sprintf('Validation_teAUROC with test-kernels: %.4f\n', test_teAUROC), debugLevel);
    logMessages(debugMsgLocation,sprintf('Validation_teAUPRC with test-kernels: %.4f\n', test_teAUPRC), debugLevel);
end
%
%
%% Write the instanceWeightInEachBag to file
% dlmwrite('instanceWeights_test.csv', instanceWeightsInEachBag, '-append');
% for i=1:size(instanceWeightsInEachBag,2)
%    dlmwrite('instanceWeights_test.csv', cell2mat(instanceWeightsInEachBag{i})', '-append');
% end

if strcmp(whetherToPlotHeatmap, 'Yes')
    logMessages(debugMsgLocation,sprintf('Plotting heatmaps...'), debugLevel);
    if(predictUsingWeightVector == 1)
        normInstanceWeightsInEachBag = standardizeMatrix(vertcat(cell2mat(instanceWeightsInEachBag)', cell2mat(instanceWeightsInEachBag_Test)'));
    else
  	normInstanceWeightsInEachBag = standardizeMatrix(cell2mat(instanceWeightsInEachBag)');
    end
    %
    heatmapFname = strcat(outputFolder,'/Heatmap-InstanceWeights', '_oligoLen', num2str(oligoLen), '_segmentSize', num2str(segmentSizeInBps) , '_maxDist', num2str(maxDist), '_nClusters', num2str(bestParamComb.best_nClusters), '_svmC', num2str(bestParamComb.best_C) ,'_sigma', num2str(bestParamComb.best_sigma), '.pdf');
    %
    p = plotHeatmap(normInstanceWeightsInEachBag, heatmapFname, 0, 1);
    logMessages(debugMsgLocation,sprintf('done!\n'), debugLevel);
    %
end
%
% 7. Visualize weight vectors and motifs
% 
if strcmp(whetherToVisualizeWVector, 'Yes')
    logMessages(debugMsgLocation,sprintf('Visualizing weight vectors...\n'), debugLevel);
    visualize_wvector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
end
logMessages(debugMsgLocation,sprintf('Done!\n'), debugLevel);
%
end % function ends