Skip to content
Permalink
a056a1c421
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
507 lines (489 sloc) 24.2 KB
function [allSeqsAsBags, allSeqsConformedSetKernel, subkernelWeights, thetaVals, instanceWeightsInEachBag, resultString, bestParamComb, test_teAUROC, test_teAUPRC, predictions] = comik_main_with_weight_vector(givenPosFastaFilename, givenNegFastaFilename, givenNPos, givenNNeg, oligoLen, maxDist, segmentSizeInBps, nClusterVals, sigmaVals, Cs, mklNorm, nFolds, testIndices, debugLevel, debugMsgLocation, outputFolder, runSummaryFilename, whetherToPlotHeatmap, computationVersion, whetherToVisualizeWVector)
% COMIK_MAIN_WITH_WITH_WEIHT_VECTOR
% Main function for CoMIK
%
% INPUT PARAMS
% Param 'givenPosFastaFilename'
% Name of FASTA file with positive examples
%
% Param 'givenNegFastaFilename'
% Name of FASTA file with negative examples
%
% Param 'givenNPos'
% Number of positive examples to use (can be less than those provided in file)
%
% Param 'givenNNeg'
% Number of negative examples to use (can be less than those provided in file)
%
% Param 'oligoLen' and 'maxDist'
% Specify the oligomer length and the maximum distance for the ODH
% representation. Caution: A combination of large values can be memory intensive!
%
% Param 'segmentSizeInBps'
% Specify the segment-size in basepairs for CoMIK
%
% Param 'nClusterVals' (vector)
% Specify the number of clusters for CoMIK
%
% Param 'sigmaVals' (vector)
% Specify the sigma values for the Gaussian transformation
%
% Param 'Cs' (vector)
% Cost values for SVM
%
% Param 'mklNorm'
% Specify p-norm value for MKL
%
% Param 'nFolds'
% Specify number of inner cross-validation folds
%
% Param 'testIndices' (vector)
% Indices of the sequences in the FASTA file which are to be considered as unseen
% test examples. For example, with a FASTA file containing a total of 100
% positive sequences followed by 100 negative sequences, the test indices
% are given as
% testIndices = [81:100 181:200]
% for the corresponding 20 positives and 20 negatives to be treated as test
% examples.
%
% Param 'debugLevel' (0/1/2)
%
% Param 'debugMsgLocation' (1/fileID)
%
% Param 'outputFolder'
% Specify name of folder to write output results to
%
% OUTPUT PARAMS
% Param 'allSeqsAsBags'
% All sequences represented as bags
%
% Param 'allSeqsConformedSetKernel' (matrix)
% The conformally transformed MI kernel
%
% Param 'subkernelWeights' (vector)
% Weights assigned to each sub-kernel upon solving MKL
%
% Param 'thetaVals' (vector)
% Theta values used for obtaining the instance weights
%
% Param 'instanceWeightsInEachBag'
% Weights assigned to instances in each bag using the final model
%
% Param 'resultString'
% A combined string of per iteration results (as printed in the resultSummary file)
%
% Param 'bestParamComb' (Struct)
% Best performing values for various params
%
% Param 'test_teAUROC'
% AUROC value for the test sequences
%
% Param 'test_teAUPRC'
% AUPRC value for the test sequences
%
% Param 'predictions'
% Prediction vector
%
% ADDITIONAL NOTES
% -- Segmentation
% . All segments, shifted and non-shifted in one bag; Order of segments: non-shifted segments followed by shifted segments.
% This results in #kernels = #clusterCentres
% --
%
% Author: snikumbh
%
% debugLevel 2 prints all messages
% debugLevel 1 place holder level, may be used in the future
% debugLevel 0 silent on most of the messages, prints very few messages for the user
% Setting of seed for the Matlab session for reproducibility: already done in the wrapper function.
% Handle arguments and values/defaults
totalArguments = 20;
predictUsingWeightVector = 1;
if nargin < totalArguments
whetherToVisualizeWVector = 'Yes';
end
if nargin < totalArguments - 1
whetherToVisualizeWVector = 'Yes';
computationVersion = 'Looping';
end
if nargin < totalArguments - 2
whetherToVisualizeWVector = 'Yes';
computationVersion = 'Looping';
whetherToPlotHeatmap = 'No';
end
% Read positives
logMessages(debugMsgLocation,sprintf('Positives...'), debugLevel);
[nPos, omittedPosIndices, allSeqsRawPosFasta] = readFastaSequences(givenPosFastaFilename, segmentSizeInBps, givenNPos, outputFolder, debugLevel, debugMsgLocation);
% nPos and nNeg could be smaller than or equal to givenNPos and givenNNeg resp.
% due to omitting any sequences whose length is less than the segmentSizeInBps
%
% Read Negatives
logMessages(debugMsgLocation,sprintf('Negatives...'), debugLevel);
[nNeg, omittedNegIndices, allSeqsRawNegFasta] = readFastaSequences(givenNegFastaFilename, segmentSizeInBps, givenNNeg, outputFolder, debugLevel, debugMsgLocation);
%
% Combine positives + negatives
nBags = nPos + nNeg;
% Check if we need to update the testIndices if some sequences have been omitted
% if posIndices and negIndices are empty, then no updates required
if ~isempty(omittedPosIndices)
%nOmmitted = length(find(ismember(testIndices, omittedPosIndices)));
testIndices(find(ismember(testIndices, omittedPosIndices))) = [];
%logMessages(debugMsgLocation,sprintf('%d positive sequences omitted from test set.\n', nOmmitted), debugLevel);
end
if ~isempty(omittedNegIndices)
%nOmmitted = length(find(ismember(testIndices, omittedNegIndices)));
testIndices(find(ismember(testIndices, nPos+omittedNegIndices))) = [];%offset by nPos
%logMessages(debugMsgLocation,sprintf('%d negative sequences omitted from test set.\n', nOmmitted), debugLevel);
end
% Additionally, one needs to account for the reduction in the number of sequences affecting the indices. Remove indices > nBags
for i=1:length(testIndices)
testIndices(i) = testIndices(i) - nnz(omittedPosIndices < testIndices(i)) - nnz( (nPos+omittedNegIndices) < testIndices(i));
end
% Put them together
allSeqsRawFasta.sequence = cell(1, nBags);
for i=1:nPos
allSeqsRawFasta.sequence{i} = allSeqsRawPosFasta.sequence{i};
end
clear allSeqsRawPosFasta;
for i=1:nNeg
allSeqsRawFasta.sequence{i+nPos} = allSeqsRawNegFasta.sequence{i};%offset by nPos
end
clear allSeqsRawNegFasta;
% Write the revised set of test indices to disk to support reproducibility
if ~isempty(omittedPosIndices) | ~isempty(omittedNegIndices)
sortedTestIndices = sort(testIndices);
dlmwrite(strcat(outputFolder, '/testIndices_New.txt'), sortedTestIndices');
end
% generate labels
Y = [ones(1, nPos) -ones(1, nNeg)];
logMessages(debugMsgLocation,sprintf('#Bags: %d\n', nBags), debugLevel);
% default for 'acgt'
alphabet = 'acgt';
% setting default value for segmentSizePercentage.
% Currently, this is the only valid value.
segmentSizeInPercentage = 0;
logMessages(debugMsgLocation,sprintf('Collecting all bags...'), debugLevel);
tic;
idx1 = 0;
thisInstances = [];
for k=1:nBags
if oligoLen >= 3
% because, for oligoLen 3 or larger, this may take some time.
if rem(k,100) == 0
logMessages(debugMsgLocation,sprintf('%d--', k), debugLevel);
end
end
if any(k == testIndices)
% do-nothing now, we collect them later
% Collection of test sequences can take place later,
% after the whole training stage is completed.
else
idx1 = idx1 + 1;
%% Non-shifted
[tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'no-shift', alphabet);
%% Shifted
[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'shift', alphabet);
allSeqsAsBags{idx1} = [tempNS tempS];
thisInstances = [thisInstances (NS + S)];
end
end
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
nBagsForTrain = idx1;
nBagsForTest = nBags-idx1;
logMessages(debugMsgLocation,sprintf('#Test: %d\n#Train: %d\n', nBagsForTest, nBagsForTrain), debugLevel);
logMessages(debugMsgLocation,sprintf('Segmentation statistics:\n'), debugLevel);
logMessages(debugMsgLocation,sprintf(' Mean number of instances in bags: %.2f\t Median: %d\t Max.: %d\t Min.: %d\n', mean(thisInstances), median(thisInstances), max(thisInstances), min(thisInstances)), debugLevel);
% Variable 'nBags' holds nBagsForTrain + nBagsForTest
%
% 2. Compute the instanceWide kernel that is used further
%
instanceEnds = cumsum(thisInstances);
instanceStarts = cumsum(thisInstances) - thisInstances + 1;
nInstances = instanceEnds(end);
logMessages(debugMsgLocation,sprintf('Total instances from all bags: %d\n', nInstances), debugLevel);
% Using the sparseComputation flag for computing the instance-wide kernel.
% IMP: setting sparseComputation to '0' uses the brokendown approach to compute the instance-wide kernel.
% This hasn't been tested for either correctness or efficiency. We recommend using sparseComputation.
% We are using sparseComputation.
sparseComputation = 1;
[instanceStarts, instanceEnds, instanceWideKernel] = computeInstanceWideKernel(allSeqsAsBags, thisInstances, sparseComputation, debugLevel, debugMsgLocation);
%
% Further are functions that will be recomputed per inner cross-validation iteration
% Cross-validation loop is also handled simultaneously
%
% 3. Computing the conformed multi-instance kernel is done via multiple kernel
% learning
% -- conformalXformationParam is the Gaussian bandwidth (sigma) we wish to use
%
resultString = [];
bestParamComb.best_C = 0.0;
bestParamComb.best_teAUROC = 0.0;
bestParamComb.best_nClusters = 0;
bestParamComb.best_sigma = 0.0;
% open summary file for writing
fid=fopen(runSummaryFilename, 'a');
Youtertest = Y(testIndices);
trainIndicesInY = setdiff([1:nBags], testIndices);
trainIndicesInBags = [1:nBagsForTrain];
YwholeTrain = Y(trainIndicesInY);
nPosTr = size(find(YwholeTrain > 0),2);
nNegTr = size(find(YwholeTrain < 0),2);
% handle imbalance, multiply this to C for negatives
imbalance = nPosTr/nNegTr;
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
for nc=1:size(nClusterVals,2)
for sigmaItr=1:size(sigmaVals,2)
nClusters = nClusterVals(1,nc);
conformalXformationParam = sigmaVals(1, sigmaItr);
% send only YwholeTrain and training set of bags
%fprintf('Seed-setting: 2\n');
rng(11, 'twister');
foldIdForIndices = cvpartition(YwholeTrain, 'Kfold', nFolds);
% ^returns logicals
%
% 4. All kernels ready. Pose this as a multiple kernel learning problem next.
% - We have used shogun's python modular interface to perform MKL.
% - Upon performing this MKL, the kernel weights obtained are squared theta values (see paper for details)
%
for c=1:size(Cs,2)
C = Cs(c);
test_auROCs = []; test_auPRCs = [];
logMessages(debugMsgLocation,sprintf('C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', C, nClusters, conformalXformationParam), debugLevel);
for f=1:nFolds
logMessages(debugMsgLocation,sprintf('Fold: %d\n', f), debugLevel);
logMessages(debugMsgLocation,sprintf('#training indices: %d\n', length(find(foldIdForIndices.training(f) == 1))), debugLevel);
logMessages(debugMsgLocation,sprintf('#test indices: %d\n', length(find(foldIdForIndices.test(f) == 1))), debugLevel);
% For fold f, corresponding examples will be used as test
thisTrainIndices = find(foldIdForIndices.training(f) == 1);
thisTestIndices = find(foldIdForIndices.test(f) == 1);
%
% Computation of conformal MI kernel also inside folds loop, because it shouldn't
% use instances from the test bags.
% This restriction is for clustering step, rest, the kernel has to be computed for
% all (wholeTrain) indices.
% For example, trainIndicesInBags = 1:1000 % row vector
trainIndicesForConformalComputation = thisTrainIndices;
logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...'), debugLevel);
tic;
[allSeqsConformedSetKernel, rawConformedSetKernel , allSeqsTransformationKernel] = ...
computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, ...
instanceWideKernel, trainIndicesForConformalComputation, allSeqsAsBags, ...
nClusters, conformalXformationParam, YwholeTrain, ...
computationVersion, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation,sprintf('\ndone in %.3f seconds\n', toc), debugLevel);
%
logMessages(debugMsgLocation,sprintf('MKL, Fold %d, C-SVM: %.3f, nClusters: %d, Sigma: %.3f\n', f, C, nClusters, conformalXformationParam), debugLevel);
Ytrain = YwholeTrain(thisTrainIndices);
Ytest = YwholeTrain(thisTestIndices);
for k=1:nClusters
% Conformed kernels
trainConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTrainIndices);
testConformedKernel{k} = allSeqsConformedSetKernel{k}(thisTrainIndices, thisTestIndices);
%
end
% perform MKL: weights, predictions should be returned after mkl
whetherPerformTest = 1; % set 1 to use test kernels to make predictions
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
[subkernelWeights, predictions] = performMKLWithShogunPython(C, Ytrain, Ytest, mklNorm, imbalance, ...
trainConformedKernel, testConformedKernel, outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% both subkernelWeights and predictions are zero vectors, if solving MKL failed. Treat this here.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
% set the following variables to 0.0. a) this_teAUROC, b) this_teAUPRC
this_teAUROC = 0.0;
this_teAUPRC = 0.0;
else %solving MKL was fine
%
logMessages(debugMsgLocation,sprintf('done in %.3f seconds\n', toc), debugLevel);
[this_teAUROC, this_teAUPRC] = libsvm_plotroc(Ytest', predictions', 'personal');
% print this to the summary file
end
this_resultString = [datestr(now), '--#Clusters:', num2str(nClusters), '--SigmaGaussian:', ...
num2str(conformalXformationParam), '--C-SVM:', num2str(C), '--mklNorm:', ...
num2str(mklNorm), '--teAUROC:', num2str(this_teAUROC), '--teAUPRC:', ...
num2str(this_teAUPRC), '\n'];
resultString = [resultString this_resultString];
fprintf(fid, this_resultString);
test_auROCs = [test_auROCs this_teAUROC];
test_auPRCs = [test_auPRCs this_teAUPRC];
end % nFolds for loop ends
% Note the best values based on the average performance on the n-folds
if mean(test_auROCs) >= bestParamComb.best_teAUROC
logMessages(debugMsgLocation,sprintf('mean_teAUROC: %.4f\n', mean(test_auROCs)), debugLevel);
bestParamComb.best_teAUROC = mean(test_auROCs);
bestParamComb.best_C = C;
bestParamComb.best_nClusters = nClusters;
bestParamComb.best_sigma = conformalXformationParam;
end
end % for loop for different values of C ends
% -- Noted: best performing pair of values for nClusters and Gaussian RBF sigma
% -- Model selected after cross-validation will be used further to get the instance weights
end % for loop for different values of sigma ends
end % for loop for different values of nClusters ends
fclose(fid);
% Clear variables created/used during model selection phase, and not required any more
clear trainConformedKernel;
clear testConformedKernel;
%
% 5. Use the best param combinations to train using the whole trainiing set and predict the test set here
%
logMessages(debugMsgLocation,sprintf('---Re-training and Test---\n'), debugLevel);
% Re-train the model with all train instances together
% trainIndicesInBags is 1:nBagsForTrain
nClusters = bestParamComb.best_nClusters;
conformalXformationParam = bestParamComb.best_sigma;
logMessages(debugMsgLocation,sprintf('Best param-values:\nSVM-Cost:%.3f, nClusters: %d, sigma: %.3f\n', nClusters, conformalXformationParam), debugLevel);
logMessages(debugMsgLocation,sprintf('Re-training using complete trainning set examples...\n'), debugLevel);
logMessages(debugMsgLocation,sprintf('Conformed Multi-instance kernel for all bags...\n'), debugLevel);
[allSeqsConformedSetKernel, rawConformedSetKernel, allSeqsTransformationKernel, clusterCentres] = ...
computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, instanceWideKernel, ...
trainIndicesInBags, allSeqsAsBags, nClusters, conformalXformationParam, ...
YwholeTrain, computationVersion, debugLevel, debugMsgLocation);
%
%
for k=1:nClusters
% Conformed kernels
VtrainConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
VtestConformedKernel{k} = allSeqsConformedSetKernel{k}(trainIndicesInBags, trainIndicesInBags);
%dummy place holder; hence train-by-train instead of test-by-train
end
%
whetherPerformTest = 0;
% We now set this to 0, since we will make predictions using computed weight vector and not the kernel
logMessages(debugMsgLocation,sprintf('Imbalance: %.2f\n', imbalance), debugLevel);
%
[subkernelWeights, predictions] = performMKLWithShogunPython(bestParamComb.best_C, YwholeTrain, Youtertest, mklNorm, imbalance, ...
VtrainConformedKernel, VtestConformedKernel, ...
outputFolder, whetherPerformTest, debugLevel, debugMsgLocation);
% Ideally, this is not needed here, since such param values won't (or shouldn't) be selected as best values
% If that does happen, something is probably wrong.
if sum(any(subkernelWeights)) == 0 && sum(any(predictions)) == 0
% set the following variables: train_AUROC, train_AUPRC
train_AUROC = 0.0;
train_AUPRC = 0.0;
else %solving MKL was fine
% Predictions returned at this juncture are predictions for the training examples, they are not test predictions.
[train_AUROC, train_AUPRC] = libsvm_plotroc(YwholeTrain', predictions', 'personal');
end
logMessages(debugMsgLocation,sprintf('train_AUROC with whole training set: %.4f\n', train_AUROC), debugLevel);
% Below, we make our own predictions for the test examples using the weight vector
% 6.1 We will now collect the test examples
tic;
logMessages(debugMsgLocation, sprintf('Now collecting the unseen test examples...'), debugLevel);
idx1 = 0;
for k=1:nBags
if any(k == testIndices)
% Collection of test sequences takes place here,
% now that the whole training stage is completed.
idx1 = idx1 + 1;
%% Non-shifted
[tempNS, NS] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'no-shift', alphabet);
%% Shifted
[tempS, S] = getODHFeatureVecInstances(allSeqsRawFasta.sequence{k}, ...
oligoLen, maxDist, segmentSizeInPercentage, ...
segmentSizeInBps, 'shift', alphabet);
allSeqsAsBags_Test{idx1} = [tempNS tempS];
end
end
clear allSeqsRawFasta;
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
% clears up some space useful to load the many test examples as is typical in problems in computational biology
if(predictUsingWeightVector == 1)
logMessages(debugMsgLocation, sprintf('---Prediction using weight vector---\n'), debugLevel);
% 6.2 Use the subkernel weights and get transformed instances among the test examples
% instanceWeights for instances in training bags
% filename passed as argument to getInstanceWeights is used to write them to file.
[thetaVals, instanceWeightsInEachBag] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel);
% Get the transformations for test examples, and the diagonal elements which will be used for normalization further
% Should one proceed block-wise at this juncture if the number of test examples is extremely large as is typically the case?
%
tic;
logMessages(debugMsgLocation, sprintf('Applying transformation to the test samples, this might take some time...\n'), debugLevel);
[allSeqsTransformationKernel_Test] = applyTransformation(allSeqsAsBags_Test, clusterCentres, conformalXformationParam, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
%
% Getting the instance weightings for the test examples
[thetaVals, instanceWeightsInEachBag_Test] = getInstanceWeights(subkernelWeights, allSeqsTransformationKernel_Test);
% At this point, all (train + test examples) instance weights have been obtained.
%
% 6.3 Use function obtainWeightVector and procure the weight vectors and the bias value
% The weight vector need not be ordered by rank. Rather, we need it by its original order.
tic;
logMessages(debugMsgLocation, sprintf('Computing the weight vector...'), debugLevel);
[overallWeightVector, weightVectors, biasValue] = obtainWeightVector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
%% Don't need allSeqsAsBags, allSeqsTransformationKernel?
% clear allSeqsAsBags allSeqsTransformationKernel;
%
% 6.4 Use these to make predictions using y = w*x + b
yhatTemp = 0.0;
%
% bagPhi for Test is nFeatures-by-nBagForTest-by-nClusters
bagPhi_Test = zeros( size(allSeqsAsBags_Test{1},1), nBagsForTest, nClusters);
%
% For normalization of test samples, we need to obtain the normalization factor for them.
%
for i=1:length(allSeqsAsBags_Test)% this is nBagsForTest
for j=1:nClusters
% -- allSeqs TransformationKernel already has nInstances x nClusters.
% -- Also need normalization
term = sum(allSeqsAsBags_Test{i} .* repmat(allSeqsTransformationKernel_Test{i}(:,j)', size(allSeqsAsBags_Test{i},1), 1), 2);
bagPhi_Test(:, i, j) = term ./ norm(term, 2);
end
end
logMessages(debugMsgLocation,sprintf('Bag Phi for test samples done.\n'), debugLevel);
dlmwrite(strcat(outputFolder,'/WeightVectors.txt'), weightVectors');
for i=1:nClusters
yhatTemp = yhatTemp + (weightVectors(:, i)' * bagPhi_Test(:, :, i));
end
%
logMessages(debugMsgLocation,sprintf('Bias value: %.4f\n', biasValue), debugLevel);
yhat = yhatTemp + biasValue;
if(length(Youtertest) == length(yhat))
logMessages(debugMsgLocation, sprintf('%d and %d: Dimensions of yhat and ytest match!\n', length(Youtertest), length(yhat)), debugLevel);
end
dlmwrite(strcat(outputFolder,'/predictedLabelsWeightVector.txt'), yhat');
dlmwrite(strcat(outputFolder,'/givenLabels.txt'), Youtertest');
logMessages(debugMsgLocation,sprintf('Predicted labels written to disk. Computing the auROC/auPRC, this may take some time...\n'), debugLevel);
tic;
[test_teAUROC, test_teAUPRC] = libsvm_plotroc(Youtertest', yhat', 'personal');
logMessages(debugMsgLocation,sprintf('done in %.3f\n', toc), debugLevel);
logMessages(debugMsgLocation,sprintf('Validation_teAUROC with test-kernels: %.4f\n', test_teAUROC), debugLevel);
logMessages(debugMsgLocation,sprintf('Validation_teAUPRC with test-kernels: %.4f\n', test_teAUPRC), debugLevel);
end
%
%
%% Write the instanceWeightInEachBag to file
% dlmwrite('instanceWeights_test.csv', instanceWeightsInEachBag, '-append');
% for i=1:size(instanceWeightsInEachBag,2)
% dlmwrite('instanceWeights_test.csv', cell2mat(instanceWeightsInEachBag{i})', '-append');
% end
if strcmp(whetherToPlotHeatmap, 'Yes')
logMessages(debugMsgLocation,sprintf('Plotting heatmaps...'), debugLevel);
if(predictUsingWeightVector == 1)
normInstanceWeightsInEachBag = standardizeMatrix(vertcat(cell2mat(instanceWeightsInEachBag)', cell2mat(instanceWeightsInEachBag_Test)'));
else
normInstanceWeightsInEachBag = standardizeMatrix(cell2mat(instanceWeightsInEachBag)');
end
%
heatmapFname = strcat(outputFolder,'/Heatmap-InstanceWeights', '_oligoLen', num2str(oligoLen), '_segmentSize', num2str(segmentSizeInBps) , '_maxDist', num2str(maxDist), '_nClusters', num2str(bestParamComb.best_nClusters), '_svmC', num2str(bestParamComb.best_C) ,'_sigma', num2str(bestParamComb.best_sigma), '.pdf');
%
p = plotHeatmap(normInstanceWeightsInEachBag, heatmapFname, 0, 1);
logMessages(debugMsgLocation,sprintf('done!\n'), debugLevel);
%
end
%
% 7. Visualize weight vectors and motifs
%
if strcmp(whetherToVisualizeWVector, 'Yes')
logMessages(debugMsgLocation,sprintf('Visualizing weight vectors...\n'), debugLevel);
visualize_wvector(char(outputFolder), oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation);
end
logMessages(debugMsgLocation,sprintf('Done!\n'), debugLevel);
%
end % function ends