Skip to content
Switch branches/tags
Go to file
Cannot retrieve contributors at this time
function [allSeqsAsBags, allSeqsConformedSetKernel, subkernelWeights, thetaVals, instanceWeightsInEachBag, resultString, bestParamComb, test_teAUROC, test_teAUPRC, predictions] = comik_main_with_weight_vector(givenPosFastaFilename, givenNegFastaFilename, givenNPos, givenNNeg, oligoLen, maxDist, segmentSizeInBps, nClusterVals, sigmaVals, Cs, mklNorm, nFolds, testIndices, debugLevel, debugMsgLocation, outputFolder, runSummaryFilename, whetherToPlotHeatmap, computationVersion, whetherToVisualizeWVector)
% Main function for CoMIK
% Param 'givenPosFastaFilename'
% Name of FASTA file with positive examples
% Param 'givenNegFastaFilename'
% Name of FASTA file with negative examples
% Param 'givenNPos'
% Number of positive examples to use (can be less than those provided in file)
% Param 'givenNNeg'
% Number of negative examples to use (can be less than those provided in file)
% Param 'oligoLen' and 'maxDist'
% Specify the oligomer length and the maximum distance for the ODH
% representation. Caution: A combination of large values can be memory intensive!
% Param 'segmentSizeInBps'
% Specify the segment-size in basepairs for CoMIK
% Param 'nClusterVals' (vector)
% Specify the number of clusters for CoMIK
% Param 'sigmaVals' (vector)
% Specify the sigma values for the Gaussian transformation
% Param 'Cs' (vector)
% Cost values for SVM
% Param 'mklNorm'
% Specify p-norm value for MKL
% Param 'nFolds'
% Specify number of inner cross-validation folds
% Param 'testIndices' (vector)
% Indices of the sequences in the FASTA file which are to be considered as unseen
% test examples. For example, with a FASTA file containing a total of 100
% positive sequences followed by 100 negative sequences, the test indices
% are given as
% testIndices = [81:100 181:200]
% for the corresponding 20 positives and 20 negatives to be treated as test
% examples.
% Param 'debugLevel' (0/1/2)
% Param 'debugMsgLocation' (1/fileID)
% Param 'outputFolder'
% Specify name of folder to write output results to
% Param 'allSeqsAsBags'
% All sequences represented as bags
% Param 'allSeqsConformedSetKernel' (matrix)
% The conformally transformed MI kernel
% Param 'subkernelWeights' (vector)
% Weights assigned to each sub-kernel upon solving MKL
% Param 'thetaVals' (vector)
% Theta values used for obtaining the instance weights
% Param 'instanceWeightsInEachBag'
% Weights assigned to instances in each bag using the final model
% Param 'resultString'
% A combined string of per iteration results (as printed in the resultSummary file)
% Param 'bestParamComb' (Struct)
% Best performing values for various params
% Param 'test_teAUROC'
% AUROC value for the test sequences
% Param 'test_teAUPRC'
% AUPRC value for the test sequences
% Param 'predictions'
% Prediction vector
% -- Segmentation
% . All segments, shifted and non-shifted in one bag; Order of segments: non-shifted segments followed by shifted segments.
% This results in #kernels = #clusterCentres
% --
% Author: snikumbh
% debugLevel 2 prints all messages
% debugLevel 1 place holder level, may be used in the future
% debugLevel 0 silent on most of the messages, prints very few messages for the user
% Setting of seed for the Matlab session for reproducibility: already done in the wrapper function.
% Handle arguments and values/defaults
totalArguments = 20;
predictUsingWeightVector = 1;
if nargin < totalArguments
whetherToVisualizeWVector = 'Yes';
if nargin < totalArguments - 1
whetherToVisualizeWVector = 'Yes';
computationVersion = 'Looping';
if nargin < totalArguments - 2
whetherToVisualizeWVector = 'Yes';
computationVersion = 'Looping';
whetherToPlotHeatmap = 'No';
% Read positives
logMessages(debugMsgLocation,sprintf('Positives...'), debugLevel);
[nPos, omittedPosIndices, allSeqsRawPosFasta] = readFastaSequences(givenPosFastaFilename, segmentSizeInBps, givenNPos, outputFolder, debugLevel, debugMsgLocation);
% nPos and nNeg could be smaller than or equal to givenNPos and givenNNeg resp.
% due to omitting any sequences whose length is less than the segmentSizeInBps
% Read Negatives
logMessages(debugMsgLocation,sprintf('Negatives...'), debugLevel);
[nNeg, omittedNegIndices, allSeqsRawNegFasta] = readFastaSequences(givenNegFastaFilename, segmentSizeInBps, givenNNeg, outputFolder, debugLevel, debugMsgLocation);
% Combine positives + negatives
nBags = nPos + nNeg;
% Check if we need to update the testIndices if some sequences have been omitted
% if posIndices and negIndices are empty, then no updates required
if ~isempty(omittedPosIndices)
%nOmmitted = length(find(ismember(testIndices, omittedPosIndices)));
testIndices(find(ismember(testIndices, omittedPosIndices))) = [];
%logMessages(debugMsgLocation,sprintf('%d positive sequences omitted from test set.\n', nOmmitted), debugLevel);
if ~isempty(omittedNegIndices)
%nOmmitted = length(find(ismember(testIndices, omittedNegIndices)));
testIndices(find(ismember(testIndices, nPos+omittedNegIndices))) = [];%offset by nPos
%logMessages(debugMsgLocation,sprintf('%d negative sequences omitted from test set.\n', nOmmitted), debugLevel);
% Additionally, one needs to account for the reduction in the number of sequences affecting the indices. Remove indices > nBags
for i=1:length(testIndices)
testIndices(i) = testIndices(i) - nnz(omittedPosIndices < testIndices(i)) - nnz( (nPos+omittedNegIndices) < testIndices(i));
% Put them together
allSeqsRawFasta.sequence = cell(1, nBags);
for i=1:nPos
allSeqsRawFasta.sequence{i} = allSeqsRawPosFasta.sequence{i};
clear allSeqsRawPosFasta;
for i=1:nNeg
allSeqsRawFasta.sequence{i+nPos} = allSeqsRawNegFasta.sequence{i};%offset by nPos
clear allSeqsRawNegFasta;
% Write the revised set of test indices to disk to support reproducibility
if ~isempty(omittedPosIndices) | ~isempty(omittedNegIndices)
sortedTestIndices = sort(testIndices);
dlmwrite(strcat(outputFolder, '/testIndices_New.txt'), sortedTestIndices');
% generate labels
Y = [ones(1, nPos) -ones(1, nNeg)];
logMessages(debugMsgLocation,sprintf('#Bags: %d\n', nBags), debugLevel);
% default for 'acgt'
alphabet = 'acgt';
% setting default value for segmentSizePercentage.
% Currently, this is the only valid value.
segmentSizeInPercentage = 0;
logMessages(debugMsgLocation,sprintf('Collecting all bags...'), debugLevel);
idx1 = 0;
thisInstances = [];
for k=1:nBags
if oligoLen >= 3
% because, for oligoLen 3 or larger, this may take some time.
if rem(k,100) == 0
logMessages(debugMsgLocation,sprintf('%d--', k), debugLevel);
if any(k == testIndices)
% do-nothing now, we collect them later
% Collection of test sequences can take place later,
% after the whole training stage is completed.
idx1 = idx1 + 1;
%% Non-shifted
[tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'no-shift', alphabet);
%% Shifted
[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'shift', alphabet);
allSeqsAsBags{idx1} = [tempNS tempS];
thisInstances = [thisInstances (NS + S)];
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
nBagsForTrain = idx1;
nBagsForTest = nBags-idx1;
logMessages(debugMsgLocation,sprintf('#Test: %d\n#Train: %d\n', nBagsForTest, nBagsForTrain), debugLevel);
logMessages(debugMsgLocation,sprintf('Segmentation statistics:\n'), debugLevel);
logMessages(debugMsgLocation,sprintf(' Mean number of instances in bags: %.2f\t Median: %d\t Max.: %d\t Min.: %d\n', mean(thisInstances), median(thisInstances), max(thisInstances), min(thisInstances)), debugLevel);
% Variable 'nBags' holds nBagsForTrain + nBagsForTest
% 2. Compute the instanceWide kernel that is used further
instanceEnds = cumsum(thisInstances);
instanceStarts = cumsum(thisInstances) - thisInstances + 1;
nInstances = instanceEnds(end);
logMessages(debugMsgLocation,sprintf('Total instances from all bags: %d\n', nInstances), debugLevel);
% Using the sparseComputation flag for computing the instance-wide kernel.
% IMP: setting sparseComputation to '0' uses the brokendown approach to compute the instance-wide kernel.
% This hasn't been tested for either correctness or efficiency. We recommend using sparseComputation.
% We are using sparseComputation.
sparseComputation = 1;
[instanceStarts, instanceEnds, instanceWideKernel] = computeInstanceWideKernel(allSeqsAsBags, thisInstances, sparseComputation, debugLevel, debugMsgLocation);
% Further are functions that will be recomputed per inner cross-validation iteration
% Cross-validation loop is also handled simultaneously
% 3. Computing the conformed multi-instance kernel is done via multiple kernel
% learning
% -- conformalXformationParam is the Gaussian bandwidth (sigma) we wish to use
resultString = [];
bestParamComb.best_C = 0.0;
bestParamComb.best_teAUROC = 0.0;
bestParamComb.best_nClusters = 0;
bestParamComb.best_sigma = 0.0;
% open summary file for writing
fid=fopen(runSummaryFilename, 'a');
Youtertest = Y(testIndices);
trainIndicesInY = setdiff([1:nBags], testIndices);
trainIndicesInBags = [1:nBagsForTrain];
YwholeTrain = Y(trainIndicesInY);
nPosTr = size(find(YwholeTrain > 0),2);
nNegTr = size(find(YwholeTrain < 0),2);
% handle imbalance, multiply this to C for negatives
imbalance = nPosTr/nNegTr;
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
for nc=1:size(nClusterVals,2)
for sigmaItr=1:size(sigmaVals,2)
nClusters = nClusterVals(1,nc);
conformalXformationParam = sigmaVals(1, sigmaItr);
% send only YwholeTrain and training set of bags
%fprintf('Seed-setting: 2\n');
rng(11, 'twister');
foldIdForIndices = cvpartition(YwholeTrain, 'Kfold', nFolds);
% ^returns logicals
% 4. All kernels ready. Pose this as a multiple kernel learning problem next.
% - We have used shogun's python modular interface to perform MKL.
% - Upon performing this MKL, the kernel weights obtained are squared theta values (see paper for details)
for c=1:size(Cs,2)
C = Cs(c);
test_auROCs = []; test_auPRCs = [];
logMessages(debugMsgLocation,sprintf('C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', C, nClusters, conformalXformationParam), debugLevel);
for f=1:nFolds
logMessages(debugMsgLocation,sprintf('Fold: %d\n', f), debugLevel);
logMessages(debugMsgLocation,sprintf('#training indices: %d\n', length(find( == 1))), debugLevel);
logMessages(debugMsgLocation,sprintf('#test indices: %d\n', length(find(foldIdForIndices.test(f) == 1))), debugLevel);
% For fold f, corresponding examples will be used as test
thisTrainIndices = find( == 1);
thisTestIndices = find(foldIdForIndices.test(f) == 1);
% Computation of conformal MI kernel also inside folds loop, because it shouldn't
% use instances from the test bags.
% This restriction is for clustering step, rest, the kernel has to be computed for
% all (wholeTrain) indices.
% For example, trainIndicesInBags = 1:1000 % row vector
trainIndicesForConformalComputation = thisTrainIndices;
logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...'), debugLevel);
[allSeqsConformedSetKernel, rawConformedSetKernel , allSeqsTransformationKernel] = ...
computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, ...
instanceWideKernel, trainIndicesForConformalComputation, allSeqsAsBags, ...
nClusters, conformalXformationParam, YwholeTrain, ...
computationVersion, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation,sprintf('\ndone in %.3f seconds\n', toc), debugLevel);
logMessages(debugMsgLocation,sprintf('MKL, Fold %d, C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', f, C, nClusters, conformalXformationParam), debugLevel);
Ytrain = YwholeTrain(thisTrainIndices);
Ytest = YwholeTrain(thisTestIndices);
for k=1:nClusters
% Conformed kernels
trainConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTrainIndices);
testConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTestIndices);
% perform MKL: weights, predictions should be returned after mkl
whetherPerformTest = 1; % set 1 to use test kernels to make predictions
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
[subkernelWeights, predictions] = performMKLWithShogunPython(C, Ytrain, Ytest, mklNorm, imbalance, ...
trainConformedKernel, testConformedKernel, outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% both subkernelWeights and predictions are zero vectors, if solving MKL failed. Treat this here.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
% set the following variables to 0.0. a) this_teAUROC, b) this_teAUPRC
this_teAUROC = 0.0;
this_teAUPRC = 0.0;
else %solving MKL was fine
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
[this_teAUROC, this_teAUPRC] = libsvm_plotroc(Ytest', predictions', 'personal');
% print this to the summary file
this_resultString = [datestr(now), '--#Clusters:', num2str(nClusters), '--SigmaGaussian:', ...
num2str(conformalXformationParam), '--C-SVM:', num2str(C), '--mklNorm:', ...
num2str(mklNorm), '--teAUROC:', num2str(this_teAUROC), '--teAUPRC:', ...
num2str(this_teAUPRC), '\n'];
resultString = [resultString this_resultString];
fprintf(fid, this_resultString);
test_auROCs = [test_auROCs this_teAUROC];
test_auPRCs = [test_auPRCs this_teAUPRC];
end % nFolds for loop ends
% Note the best values based on the average performance on the n-folds
if mean(test_auROCs) >= bestParamComb.best_teAUROC
logMessages(debugMsgLocation,sprintf('mean_teAUROC: %.4f\n', mean(test_auROCs)), debugLevel);
bestParamComb.best_teAUROC = mean(test_auROCs);
bestParamComb.best_C = C;
bestParamComb.best_nClusters = nClusters;
bestParamComb.best_sigma = conformalXformationParam;
end % for loop for different values of C ends
% -- Noted: best performing pair of values for nClusters and Gaussian RBF sigma
% -- Model selected after cross-validation will be used further to get the instance weights
end % for loop for different values of sigma ends
end % for loop for different values of nClusters ends
% Clear variables created/used during model selection phase, and not required any more
clear trainConformedKernel;
clear testConformedKernel;
% 5. Use the best param combinations to train using the whole trainiing set and predict the test set here
logMessages(debugMsgLocation,sprintf('---Re-training and Test---\n'), debugLevel);
% Re-train the model with all train instances together
% trainIndicesInBags is 1:nBagsForTrain
nClusters = bestParamComb.best_nClusters;
conformalXformationParam = bestParamComb.best_sigma;
logMessages(debugMsgLocation,sprintf('Best param-values:\nSVM-Cost:%.3f, nClusters: %d, sigma: %.3f\n', nClusters, conformalXformationParam), debugLevel);
logMessages(debugMsgLocation,sprintf('Re-training using complete trainning set examples...\n'), debugLevel);
logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...\n'), debugLevel);
[allSeqsConformedSetKernel, rawConformedSetKernel, allSeqsTransformationKernel, clusterCentres] = ...
computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, instanceWideKernel, ...
trainIndicesInBags, allSeqsAsBags, nClusters, conformalXformationParam, ...
YwholeTrain, computationVersion, debugLevel, debugMsgLocation);
for k=1:nClusters
% Conformed kernels
VtrainConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
VtestConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
%dummy place holder; hence train-by-train instead of test-by-train
whetherPerformTest = 0;
% We now set this to 0, since we will make predictions using computed weight vector and not the kernel
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
[subkernelWeights, predictions] = performMKLWithShogunPython(bestParamComb.best_C, YwholeTrain, Youtertest, mklNorm, imbalance, ...
VtrainConformedKernel, VtestConformedKernel, ...
outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% Ideally, this is not needed here, since such param values won't (or shouldn't) be selected as best values
% If that does happen, something is probably wrong.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
% set the following variables: train_AUROC, train_AUPRC
train_AUROC = 0.0;
train_AUPRC = 0.0;
else %solving MKL was fine
% Predictions returned at this juncture are predictions for the training examples, they are not test predictions.
[train_AUROC, train_AUPRC] = libsvm_plotroc(YwholeTrain', predictions', 'personal');
logMessages(debugMsgLocation,sprintf('train_AUROC with whole training set: %.4f\n', train_AUROC), debugLevel);
% Below, we make our own predictions for the test examples using the weight vector
% 6.1 We will now collect the test examples
logMessages(debugMsgLocation, sprintf('Now collecting the unseen test examples...'), debugLevel);
idx1 = 0;
for k=1:nBags
if any(k == testIndices)
% Collection of test sequences takes place here,
% now that the whole training stage is completed.
idx1 = idx1 + 1;
%% Non-shifted
[tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'no-shift', alphabet);
%% Shifted
[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'shift', alphabet);
allSeqsAsBags_Test{idx1} = [tempNS tempS];
clear allSeqsRawFasta;
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
% clears up some space useful to load the many test examples as is typical in problems in computational biology
if(predictUsingWeightVector == 1)
logMessages(debugMsgLocation, sprintf('---Prediction using weight vector---\n'), debugLevel);
% 6.2 Use the subkernel weights and get transformed instances among the test examples
% instanceWeights for instances in training bags
% filename passed as argument to getInstanceWeights is used to write them to file.
[thetaVals, instanceWeightsInEachBag] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel);
% Get the transformations for test examples, and the diagonal elements which will be used for normalization further
% Should one proceed block-wise at this juncture if the number of test examples is extremely large as is typically the case?
logMessages(debugMsgLocation, sprintf('Applying transformation to the test samples, this might take some time...\n'), debugLevel);
[allSeqsTransformationKernel_Test] = applyTransformation(allSeqsAsBags_Test, clusterCentres, conformalXformationParam, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
% Getting the instance weightings for the test examples
[thetaVals, instanceWeightsInEachBag_Test] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel_Test);
% At this point, all (train + test examples) instance weights have been obtained.
% 6.3 Use function obtainWeightVector and procure the weight vectors and the bias value
% The weight vector need not be ordered by rank. Rather, we need it by its original order.
logMessages(debugMsgLocation, sprintf('Computing the weight vector...'), debugLevel);
[overallWeightVector, weightVectors, biasValue] = obtainWeightVector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
%% Don't need allSeqsAsBags, allSeqsTransformationKernel?
% clear allSeqsAsBags allSeqsTransformationKernel;
% 6.4 Use these to make predictions using y = w*x + b
yhatTemp = 0.0;
% bagPhi for Test is nFeatures-by-nBagForTest-by-nClusters
bagPhi_Test = zeros( size(allSeqsAsBags_Test{1},1), nBagsForTest, nClusters);
% For normalization of test samples, we need to obtain the normalization factor for them.
for i=1:length(allSeqsAsBags_Test)% this is nBagsForTest
for j=1:nClusters
% -- allSeqs TransformationKernel already has nInstances x nClusters.
% -- Also need normalization
term = sum(allSeqsAsBags_Test{i} .* repmat(allSeqsTransformationKernel_Test{i}(:,j)', size(allSeqsAsBags_Test{i},1), 1), 2);
bagPhi_Test(:, i, j) = term ./ norm(term, 2);
logMessages(debugMsgLocation,sprintf('Bag Phi for test samples done.\n'), debugLevel);
dlmwrite(strcat(outputFolder,'/WeightVectors.txt'), weightVectors');
for i=1:nClusters
yhatTemp = yhatTemp + (weightVectors(:, i)' * bagPhi_Test(:, :, i));
logMessages(debugMsgLocation,sprintf('Bias value: %.4f\n', biasValue), debugLevel);
yhat = yhatTemp + biasValue;
if(length(Youtertest) == length(yhat))
logMessages(debugMsgLocation, sprintf('%d and %d: Dimensions of yhat and ytest match!\n', length(Youtertest), length(yhat)), debugLevel);
dlmwrite(strcat(outputFolder,'/predictedLabelsWeightVector.txt'), yhat');
dlmwrite(strcat(outputFolder,'/givenLabels.txt'), Youtertest');
logMessages(debugMsgLocation,sprintf('Predicted labels written to disk. Computing the auROC/auPRC, this may take some time...\n'), debugLevel);
[test_teAUROC, test_teAUPRC] = libsvm_plotroc(Youtertest', yhat', 'personal');
logMessages(debugMsgLocation,sprintf('done in %.3f\n', toc), debugLevel);
logMessages(debugMsgLocation,sprintf('Validation_teAUROC with test-kernels: %.4f\n', test_teAUROC), debugLevel);
logMessages(debugMsgLocation,sprintf('Validation_teAUPRC with test-kernels: %.4f\n', test_teAUPRC), debugLevel);
%% Write the instanceWeightInEachBag to file
% dlmwrite('instanceWeights_test.csv', instanceWeightsInEachBag, '-append');
% for i=1:size(instanceWeightsInEachBag,2)
% dlmwrite('instanceWeights_test.csv', cell2mat(instanceWeightsInEachBag{i})', '-append');
% end
if strcmp(whetherToPlotHeatmap, 'Yes')
logMessages(debugMsgLocation,sprintf('Plotting heatmaps...'), debugLevel);
if(predictUsingWeightVector == 1)
normInstanceWeightsInEachBag = standardizeMatrix(vertcat(cell2mat(instanceWeightsInEachBag)', cell2mat(instanceWeightsInEachBag_Test)'));
normInstanceWeightsInEachBag = standardizeMatrix(cell2mat(instanceWeightsInEachBag)');
heatmapFname = strcat(outputFolder,'/Heatmap-InstanceWeights', '_oligoLen', num2str(oligoLen), '_segmentSize', num2str(segmentSizeInBps) , '_maxDist', num2str(maxDist), '_nClusters', num2str(bestParamComb.best_nClusters), '_svmC', num2str(bestParamComb.best_C) ,'_sigma', num2str(bestParamComb.best_sigma), '.pdf');
p = plotHeatmap(normInstanceWeightsInEachBag, heatmapFname, 0, 1);
logMessages(debugMsgLocation,sprintf('done!\n'), debugLevel);
% 7. Visualize weight vectors and motifs
if strcmp(whetherToVisualizeWVector, 'Yes')
logMessages(debugMsgLocation,sprintf('Visualizing weight vectors...\n'), debugLevel);
visualize_wvector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation,sprintf('Done!\n'), debugLevel);
end % function ends