Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
function [allSeqsAsBags, allSeqsConformedSetKernel, subkernelWeights, thetaVals, instanceWeightsInEachBag, resultString, bestParamComb, test_teAUROC, test_teAUPRC, predictions] = comik_main_with_weight_vector(givenPosFastaFilename, givenNegFastaFilename, givenNPos, givenNNeg, oligoLen, maxDist, segmentSizeInBps, nClusterVals, sigmaVals, Cs, mklNorm, nFolds, testIndices, debugLevel, debugMsgLocation, outputFolder, runSummaryFilename, whetherToPlotHeatmap, computationVersion, whetherToVisualizeWVector)
% COMIK_MAIN_WITH_WITH_WEIHT_VECTOR
% Main function for CoMIK
%
% INPUT PARAMS
% Param 'givenPosFastaFilename'
% Name of FASTA file with positive examples
%
% Param 'givenNegFastaFilename'
% Name of FASTA file with negative examples
%
% Param 'givenNPos'
% Number of positive examples to use (can be less than those provided in file)
%
% Param 'givenNNeg'
% Number of negative examples to use (can be less than those provided in file)
%
% Param 'oligoLen' and 'maxDist'
% Specify the oligomer length and the maximum distance for the ODH
% representation. Caution: A combination of large values can be memory intensive!
%
% Param 'segmentSizeInBps'
% Specify the segment-size in basepairs for CoMIK
%
% Param 'nClusterVals' (vector)
% Specify the number of clusters for CoMIK
%
% Param 'sigmaVals' (vector)
% Specify the sigma values for the Gaussian transformation
%
% Param 'Cs' (vector)
% Cost values for SVM
%
% Param 'mklNorm'
% Specify p-norm value for MKL
%
% Param 'nFolds'
% Specify number of inner cross-validation folds
%
% Param 'testIndices' (vector)
% Indices of the sequences in the FASTA file which are to be considered as unseen
% test examples. For example, with a FASTA file containing a total of 100
% positive sequences followed by 100 negative sequences, the test indices
% are given as
% testIndices = [81:100 181:200]
% for the corresponding 20 positives and 20 negatives to be treated as test
% examples.
%
% Param 'debugLevel' (0/1/2)
%
% Param 'debugMsgLocation' (1/fileID)
%
% Param 'outputFolder'
% Specify name of folder to write output results to
%
% OUTPUT PARAMS
% Param 'allSeqsAsBags'
% All sequences represented as bags
%
% Param 'allSeqsConformedSetKernel' (matrix)
% The conformally transformed MI kernel
%
% Param 'subkernelWeights' (vector)
% Weights assigned to each sub-kernel upon solving MKL
%
% Param 'thetaVals' (vector)
% Theta values used for obtaining the instance weights
%
% Param 'instanceWeightsInEachBag'
% Weights assigned to instances in each bag using the final model
%
% Param 'resultString'
% A combined string of per iteration results (as printed in the resultSummary file)
%
% Param 'bestParamComb' (Struct)
% Best performing values for various params
%
% Param 'test_teAUROC'
% AUROC value for the test sequences
%
% Param 'test_teAUPRC'
% AUPRC value for the test sequences
%
% Param 'predictions'
% Prediction vector
%
% ADDITIONAL NOTES
% -- Segmentation
% . All segments, shifted and non-shifted in one bag; Order of segments: non-shifted segments followed by shifted segments.
% This results in #kernels = #clusterCentres
% --
%
% Author: snikumbh
%
% debugLevel 2 prints all messages
% debugLevel 1 place holder level, may be used in the future
% debugLevel 0 silent on most of the messages, prints very few messages for the user
% Setting of seed for the Matlab session for reproducibility: already done in the wrapper function.
% Handle arguments and values/defaults
totalArguments = 20;
predictUsingWeightVector = 1;
if nargin < totalArguments
whetherToVisualizeWVector = 'Yes';
end
if nargin < totalArguments - 1
whetherToVisualizeWVector = 'Yes';
computationVersion = 'Looping';
end
if nargin < totalArguments - 2
whetherToVisualizeWVector = 'Yes';
computationVersion = 'Looping';
whetherToPlotHeatmap = 'No';
end
% Read positives
logMessages(debugMsgLocation,sprintf('Positives...'), debugLevel);
[nPos, omittedPosIndices, allSeqsRawPosFasta] = readFastaSequences(givenPosFastaFilename, segmentSizeInBps, givenNPos, outputFolder, debugLevel, debugMsgLocation);
% nPos and nNeg could be smaller than or equal to givenNPos and givenNNeg resp.
% due to omitting any sequences whose length is less than the segmentSizeInBps
%
% Read Negatives
logMessages(debugMsgLocation,sprintf('Negatives...'), debugLevel);
[nNeg, omittedNegIndices, allSeqsRawNegFasta] = readFastaSequences(givenNegFastaFilename, segmentSizeInBps, givenNNeg, outputFolder, debugLevel, debugMsgLocation);
%
% Combine positives + negatives
nBags = nPos + nNeg;
% Check if we need to update the testIndices if some sequences have been omitted
% if posIndices and negIndices are empty, then no updates required
if ~isempty(omittedPosIndices)
%nOmmitted = length(find(ismember(testIndices, omittedPosIndices)));
testIndices(find(ismember(testIndices, omittedPosIndices))) = [];
%logMessages(debugMsgLocation,sprintf('%d positive sequences omitted from test set.\n', nOmmitted), debugLevel);
end
if ~isempty(omittedNegIndices)
%nOmmitted = length(find(ismember(testIndices, omittedNegIndices)));
testIndices(find(ismember(testIndices, nPos+omittedNegIndices))) = [];%offset by nPos
%logMessages(debugMsgLocation,sprintf('%d negative sequences omitted from test set.\n', nOmmitted), debugLevel);
end
% Additionally, one needs to account for the reduction in the number of sequences affecting the indices. Remove indices > nBags
for i=1:length(testIndices)
testIndices(i) = testIndices(i) - nnz(omittedPosIndices < testIndices(i)) - nnz( (nPos+omittedNegIndices) < testIndices(i));
end
% Put them together
allSeqsRawFasta.sequence = cell(1, nBags);
for i=1:nPos
allSeqsRawFasta.sequence{i} = allSeqsRawPosFasta.sequence{i};
end
clear allSeqsRawPosFasta;
for i=1:nNeg
allSeqsRawFasta.sequence{i+nPos} = allSeqsRawNegFasta.sequence{i};%offset by nPos
end
clear allSeqsRawNegFasta;
% Write the revised set of test indices to disk to support reproducibility
if ~isempty(omittedPosIndices) | ~isempty(omittedNegIndices)
sortedTestIndices = sort(testIndices);
dlmwrite(strcat(outputFolder, '/testIndices_New.txt'), sortedTestIndices');
end
% generate labels
Y = [ones(1, nPos) -ones(1, nNeg)];
logMessages(debugMsgLocation,sprintf('#Bags: %d\n', nBags), debugLevel);
% default for 'acgt'
alphabet = 'acgt';
% setting default value for segmentSizePercentage.
% Currently, this is the only valid value.
segmentSizeInPercentage = 0;
logMessages(debugMsgLocation,sprintf('Collecting all bags...'), debugLevel);
tic;
idx1 = 0;
thisInstances = [];
for k=1:nBags
if oligoLen >= 3
% because, for oligoLen 3 or larger, this may take some time.
if rem(k,100) == 0
logMessages(debugMsgLocation,sprintf('%d--', k), debugLevel);
end
end
if any(k == testIndices)
% do-nothing now, we collect them later
% Collection of test sequences can take place later,
% after the whole training stage is completed.
else
idx1 = idx1 + 1;
%% Non-shifted
[tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'no-shift', alphabet);
%% Shifted
[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'shift', alphabet);
allSeqsAsBags{idx1} = [tempNS tempS];
thisInstances = [thisInstances (NS + S)];
end
end
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
nBagsForTrain = idx1;
nBagsForTest = nBags-idx1;
logMessages(debugMsgLocation,sprintf('#Test: %d\n#Train: %d\n', nBagsForTest, nBagsForTrain), debugLevel);
logMessages(debugMsgLocation,sprintf('Segmentation statistics:\n'), debugLevel);
logMessages(debugMsgLocation,sprintf(' Mean number of instances in bags: %.2f\t Median: %d\t Max.: %d\t Min.: %d\n', mean(thisInstances), median(thisInstances), max(thisInstances), min(thisInstances)), debugLevel);
% Variable 'nBags' holds nBagsForTrain + nBagsForTest
%
% 2. Compute the instanceWide kernel that is used further
%
instanceEnds = cumsum(thisInstances);
instanceStarts = cumsum(thisInstances) - thisInstances + 1;
nInstances = instanceEnds(end);
logMessages(debugMsgLocation,sprintf('Total instances from all bags: %d\n', nInstances), debugLevel);
% Using the sparseComputation flag for computing the instance-wide kernel.
% IMP: setting sparseComputation to '0' uses the brokendown approach to compute the instance-wide kernel.
% This hasn't been tested for either correctness or efficiency. We recommend using sparseComputation.
% We are using sparseComputation.
sparseComputation = 1;
[instanceStarts, instanceEnds, instanceWideKernel] = computeInstanceWideKernel(allSeqsAsBags, thisInstances, sparseComputation, debugLevel, debugMsgLocation);
%
% Further are functions that will be recomputed per inner cross-validation iteration
% Cross-validation loop is also handled simultaneously
%
% 3. Computing the conformed multi-instance kernel is done via multiple kernel
% learning
% -- conformalXformationParam is the Gaussian bandwidth (sigma) we wish to use
%
resultString = [];
bestParamComb.best_C = 0.0;
bestParamComb.best_teAUROC = 0.0;
bestParamComb.best_nClusters = 0;
bestParamComb.best_sigma = 0.0;
% open summary file for writing
fid=fopen(runSummaryFilename, 'a');
Youtertest = Y(testIndices);
trainIndicesInY = setdiff([1:nBags], testIndices);
trainIndicesInBags = [1:nBagsForTrain];
YwholeTrain = Y(trainIndicesInY);
nPosTr = size(find(YwholeTrain > 0),2);
nNegTr = size(find(YwholeTrain < 0),2);
% handle imbalance, multiply this to C for negatives
imbalance = nPosTr/nNegTr;
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
for nc=1:size(nClusterVals,2)
for sigmaItr=1:size(sigmaVals,2)
nClusters = nClusterVals(1,nc);
conformalXformationParam = sigmaVals(1, sigmaItr);
% send only YwholeTrain and training set of bags
%fprintf('Seed-setting: 2\n');
rng(11, 'twister');
foldIdForIndices = cvpartition(YwholeTrain, 'Kfold', nFolds);
% ^returns logicals
%
% 4. All kernels ready. Pose this as a multiple kernel learning problem next.
% - We have used shogun's python modular interface to perform MKL.
% - Upon performing this MKL, the kernel weights obtained are squared theta values (see paper for details)
%
for c=1:size(Cs,2)
C = Cs(c);
test_auROCs = []; test_auPRCs = [];
logMessages(debugMsgLocation,sprintf('C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', C, nClusters, conformalXformationParam), debugLevel);
for f=1:nFolds
logMessages(debugMsgLocation,sprintf('Fold: %d\n', f), debugLevel);
logMessages(debugMsgLocation,sprintf('#training indices: %d\n', length(find(foldIdForIndices.training(f) == 1))), debugLevel);
logMessages(debugMsgLocation,sprintf('#test indices: %d\n', length(find(foldIdForIndices.test(f) == 1))), debugLevel);
% For fold f, corresponding examples will be used as test
thisTrainIndices = find(foldIdForIndices.training(f) == 1);
thisTestIndices = find(foldIdForIndices.test(f) == 1);
%
% Computation of conformal MI kernel also inside folds loop, because it shouldn't
% use instances from the test bags.
% This restriction is for clustering step, rest, the kernel has to be computed for
% all (wholeTrain) indices.
% For example, trainIndicesInBags = 1:1000 % row vector
trainIndicesForConformalComputation = thisTrainIndices;
logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...'), debugLevel);
tic;
[allSeqsConformedSetKernel, rawConformedSetKernel , allSeqsTransformationKernel] = ...
computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, ...
instanceWideKernel, trainIndicesForConformalComputation, allSeqsAsBags, ...
nClusters, conformalXformationParam, YwholeTrain, ...
computationVersion, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation,sprintf('\ndone in %.3f seconds\n', toc), debugLevel);
%
logMessages(debugMsgLocation,sprintf('MKL, Fold %d, C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', f, C, nClusters, conformalXformationParam), debugLevel);
Ytrain = YwholeTrain(thisTrainIndices);
Ytest = YwholeTrain(thisTestIndices);
for k=1:nClusters
% Conformed kernels
trainConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTrainIndices);
testConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTestIndices);
%
end
% perform MKL: weights, predictions should be returned after mkl
whetherPerformTest = 1; % set 1 to use test kernels to make predictions
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
[subkernelWeights, predictions] = performMKLWithShogunPython(C, Ytrain, Ytest, mklNorm, imbalance, ...
trainConformedKernel, testConformedKernel, outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% both subkernelWeights and predictions are zero vectors, if solving MKL failed. Treat this here.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
% set the following variables to 0.0. a) this_teAUROC, b) this_teAUPRC
this_teAUROC = 0.0;
this_teAUPRC = 0.0;
else %solving MKL was fine
%
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
[this_teAUROC, this_teAUPRC] = libsvm_plotroc(Ytest', predictions', 'personal');
% print this to the summary file
end
this_resultString = [datestr(now), '--#Clusters:', num2str(nClusters), '--SigmaGaussian:', ...
num2str(conformalXformationParam), '--C-SVM:', num2str(C), '--mklNorm:', ...
num2str(mklNorm), '--teAUROC:', num2str(this_teAUROC), '--teAUPRC:', ...
num2str(this_teAUPRC), '\n'];
resultString = [resultString this_resultString];
fprintf(fid, this_resultString);
test_auROCs = [test_auROCs this_teAUROC];
test_auPRCs = [test_auPRCs this_teAUPRC];
end % nFolds for loop ends
% Note the best values based on the average performance on the n-folds
if mean(test_auROCs) >= bestParamComb.best_teAUROC
logMessages(debugMsgLocation,sprintf('mean_teAUROC: %.4f\n', mean(test_auROCs)), debugLevel);
bestParamComb.best_teAUROC = mean(test_auROCs);
bestParamComb.best_C = C;
bestParamComb.best_nClusters = nClusters;
bestParamComb.best_sigma = conformalXformationParam;
end
end % for loop for different values of C ends
% -- Noted: best performing pair of values for nClusters and Gaussian RBF sigma
% -- Model selected after cross-validation will be used further to get the instance weights
end % for loop for different values of sigma ends
end % for loop for different values of nClusters ends
fclose(fid);
% Clear variables created/used during model selection phase, and not required any more
clear trainConformedKernel;
clear testConformedKernel;
%
% 5. Use the best param combinations to train using the whole trainiing set and predict the test set here
%
logMessages(debugMsgLocation,sprintf('---Re-training and Test---\n'), debugLevel);
% Re-train the model with all train instances together
% trainIndicesInBags is 1:nBagsForTrain
nClusters = bestParamComb.best_nClusters;
conformalXformationParam = bestParamComb.best_sigma;
logMessages(debugMsgLocation,sprintf('Best param-values:\nSVM-Cost:%.3f, nClusters: %d, sigma: %.3f\n', nClusters, conformalXformationParam), debugLevel);
logMessages(debugMsgLocation,sprintf('Re-training using complete trainning set examples...\n'), debugLevel);
logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...\n'), debugLevel);
[allSeqsConformedSetKernel, rawConformedSetKernel, allSeqsTransformationKernel, clusterCentres] = ...
computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, instanceWideKernel, ...
trainIndicesInBags, allSeqsAsBags, nClusters, conformalXformationParam, ...
YwholeTrain, computationVersion, debugLevel, debugMsgLocation);
%
%
for k=1:nClusters
% Conformed kernels
VtrainConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
VtestConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
%dummy place holder; hence train-by-train instead of test-by-train
end
%
whetherPerformTest = 0;
% We now set this to 0, since we will make predictions using computed weight vector and not the kernel
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
%
[subkernelWeights, predictions] = performMKLWithShogunPython(bestParamComb.best_C, YwholeTrain, Youtertest, mklNorm, imbalance, ...
VtrainConformedKernel, VtestConformedKernel, ...
outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% Ideally, this is not needed here, since such param values won't (or shouldn't) be selected as best values
% If that does happen, something is probably wrong.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
% set the following variables: train_AUROC, train_AUPRC
train_AUROC = 0.0;
train_AUPRC = 0.0;
else %solving MKL was fine
% Predictions returned at this juncture are predictions for the training examples, they are not test predictions.
[train_AUROC, train_AUPRC] = libsvm_plotroc(YwholeTrain', predictions', 'personal');
end
logMessages(debugMsgLocation,sprintf('train_AUROC with whole training set: %.4f\n', train_AUROC), debugLevel);
% Below, we make our own predictions for the test examples using the weight vector
% 6.1 We will now collect the test examples
tic;
logMessages(debugMsgLocation, sprintf('Now collecting the unseen test examples...'), debugLevel);
idx1 = 0;
for k=1:nBags
if any(k == testIndices)
% Collection of test sequences takes place here,
% now that the whole training stage is completed.
idx1 = idx1 + 1;
%% Non-shifted
[tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'no-shift', alphabet);
%% Shifted
[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'shift', alphabet);
allSeqsAsBags_Test{idx1} = [tempNS tempS];
end
end
clear allSeqsRawFasta;
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
% clears up some space useful to load the many test examples as is typical in problems in computational biology
if(predictUsingWeightVector == 1)
logMessages(debugMsgLocation, sprintf('---Prediction using weight vector---\n'), debugLevel);
% 6.2 Use the subkernel weights and get transformed instances among the test examples
% instanceWeights for instances in training bags
% filename passed as argument to getInstanceWeights is used to write them to file.
[thetaVals, instanceWeightsInEachBag] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel);
% Get the transformations for test examples, and the diagonal elements which will be used for normalization further
% Should one proceed block-wise at this juncture if the number of test examples is extremely large as is typically the case?
%
tic;
logMessages(debugMsgLocation, sprintf('Applying transformation to the test samples, this might take some time...\n'), debugLevel);
[allSeqsTransformationKernel_Test] = applyTransformation(allSeqsAsBags_Test, clusterCentres, conformalXformationParam, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
%
% Getting the instance weightings for the test examples
[thetaVals, instanceWeightsInEachBag_Test] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel_Test);
% At this point, all (train + test examples) instance weights have been obtained.
%
% 6.3 Use function obtainWeightVector and procure the weight vectors and the bias value
% The weight vector need not be ordered by rank. Rather, we need it by its original order.
tic;
logMessages(debugMsgLocation, sprintf('Computing the weight vector...'), debugLevel);
[overallWeightVector, weightVectors, biasValue] = obtainWeightVector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
%% Don't need allSeqsAsBags, allSeqsTransformationKernel?
% clear allSeqsAsBags allSeqsTransformationKernel;
%
% 6.4 Use these to make predictions using y = w*x + b
yhatTemp = 0.0;
%
% bagPhi for Test is nFeatures-by-nBagForTest-by-nClusters
bagPhi_Test = zeros( size(allSeqsAsBags_Test{1},1), nBagsForTest, nClusters);
%
% For normalization of test samples, we need to obtain the normalization factor for them.
%
for i=1:length(allSeqsAsBags_Test)% this is nBagsForTest
for j=1:nClusters
% -- allSeqs TransformationKernel already has nInstances x nClusters.
% -- Also need normalization
term = sum(allSeqsAsBags_Test{i} .* repmat(allSeqsTransformationKernel_Test{i}(:,j)', size(allSeqsAsBags_Test{i},1), 1), 2);
bagPhi_Test(:, i, j) = term ./ norm(term, 2);
end
end
logMessages(debugMsgLocation,sprintf('Bag Phi for test samples done.\n'), debugLevel);
dlmwrite(strcat(outputFolder,'/WeightVectors.txt'), weightVectors');
for i=1:nClusters
yhatTemp = yhatTemp + (weightVectors(:, i)' * bagPhi_Test(:, :, i));
end
%
logMessages(debugMsgLocation,sprintf('Bias value: %.4f\n', biasValue), debugLevel);
yhat = yhatTemp + biasValue;
if(length(Youtertest) == length(yhat))
logMessages(debugMsgLocation, sprintf('%d and %d: Dimensions of yhat and ytest match!\n', length(Youtertest), length(yhat)), debugLevel);
end
dlmwrite(strcat(outputFolder,'/predictedLabelsWeightVector.txt'), yhat');
dlmwrite(strcat(outputFolder,'/givenLabels.txt'), Youtertest');
logMessages(debugMsgLocation,sprintf('Predicted labels written to disk. Computing the auROC/auPRC, this may take some time...\n'), debugLevel);
tic;
[test_teAUROC, test_teAUPRC] = libsvm_plotroc(Youtertest', yhat', 'personal');
logMessages(debugMsgLocation,sprintf('done in %.3f\n', toc), debugLevel);
logMessages(debugMsgLocation,sprintf('Validation_teAUROC with test-kernels: %.4f\n', test_teAUROC), debugLevel);
logMessages(debugMsgLocation,sprintf('Validation_teAUPRC with test-kernels: %.4f\n', test_teAUPRC), debugLevel);
end
%
%
%% Write the instanceWeightInEachBag to file
% dlmwrite('instanceWeights_test.csv', instanceWeightsInEachBag, '-append');
% for i=1:size(instanceWeightsInEachBag,2)
% dlmwrite('instanceWeights_test.csv', cell2mat(instanceWeightsInEachBag{i})', '-append');
% end
if strcmp(whetherToPlotHeatmap, 'Yes')
logMessages(debugMsgLocation,sprintf('Plotting heatmaps...'), debugLevel);
if(predictUsingWeightVector == 1)
normInstanceWeightsInEachBag = standardizeMatrix(vertcat(cell2mat(instanceWeightsInEachBag)', cell2mat(instanceWeightsInEachBag_Test)'));
else
normInstanceWeightsInEachBag = standardizeMatrix(cell2mat(instanceWeightsInEachBag)');
end
%
heatmapFname = strcat(outputFolder,'/Heatmap-InstanceWeights', '_oligoLen', num2str(oligoLen), '_segmentSize', num2str(segmentSizeInBps) , '_maxDist', num2str(maxDist), '_nClusters', num2str(bestParamComb.best_nClusters), '_svmC', num2str(bestParamComb.best_C) ,'_sigma', num2str(bestParamComb.best_sigma), '.pdf');
%
p = plotHeatmap(normInstanceWeightsInEachBag, heatmapFname, 0, 1);
logMessages(debugMsgLocation,sprintf('done!\n'), debugLevel);
%
end
%
% 7. Visualize weight vectors and motifs
%
if strcmp(whetherToVisualizeWVector, 'Yes')
logMessages(debugMsgLocation,sprintf('Visualizing weight vectors...\n'), debugLevel);
visualize_wvector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
end
logMessages(debugMsgLocation,sprintf('Done!\n'), debugLevel);
%
end % function ends