Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
function [overallWeightVector, weightVectors, biasValue] = obtainWeightVector(folderName, oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation)
% OBTAINWEIGHTVECTOR
%
% INPUT PARAMS
% Param 'folderName'
% Location/Path on disk
%
% Param 'oligoLen'
% Value of oligomer length for the ODH representation
%
% Param 'maxDist'
% Value of the maximum distance for the ODH representation
%
% Param 'subkernelWeights' (vector)
% Weights assigned to each sub-kernel upon solving MKL
%
% Param 'allSeqsAsBags' (cell array)
% All sequences represented as bags
%
% Param 'rawConformedSetKernel' (matrix)
% The unnormalized conformally transformed MI kernel
%
% Param 'allSeqsTransformationKernel' (matrix)
% Transformations of all segments for each sequence in the collection
%
% Param 'debugLevel'
%
% Param 'debugMsgLocation'
%
% OUTPUT PARAMS
% Param 'overallWeightVector'
% The overall weight vector which is a linear combination of the individual
% weight vectors
%
% Param 'weightVectors'
% The individual weight vectors corresponding the multiple kernels in CoMIK
%
% Param 'biasValue'
% The bias value obtained solving SVM
%
%
% ADDITIONAL NOTES
% -- Alpha values are given from the trained model
% -- Location for alpha values: folderName specified in arguments
% -- Output arguments: overallWeightVector and biasValue
% -- We have subkernel weights given, all corresponding weight vectors are to be computed
% -- Passing a value for 'folderName', is essential, where one can find the subkernel weights
% . Therefore, there are no default set of values of subkernelWeights
%
% Author: snikumbh@mpi-inf.mpg.de
alphaFilename = 'alphas.txt';
svFilename = 'sv.txt';
biasFilename = 'biasValue.txt';
alphaValues = dlmread(strcat(folderName, '/', alphaFilename));
alphaIndices = dlmread(strcat(folderName, '/', svFilename));
alphaIndices = alphaIndices + 1; %these are nothing but bagIndices
biasValue = dlmread(strcat(folderName, '/', biasFilename));
% Compute the weight vectors corresponding to all subkernels, keep them in the same order as the kernels
% -- Read in the output from the python script and
% return back the values.
%
nClusters = size(subkernelWeights, 1);
nKernels = nClusters;
thetaVals = sqrt(subkernelWeights);
% prepare bag-level phi, restrict only to support vectorsa
% bagPhi is nSVBags-by-nClusters
bagPhi = zeros( size(allSeqsAsBags{1},1), size(alphaIndices,1), nClusters);
idx = 0;
for i=1:length(allSeqsAsBags)% this is nBags
% restrict to sv indices
if any(i == alphaIndices)
idx = idx + 1; % this is second dimension of the bagPhi, ranges from 1 to #alphaIndices
for j=1:nClusters
% allSeqs TransformationKernel already has nInstances x nClusters
normFactor = (1/sqrt(rawConformedSetKernel{j}(i,i)));
bagPhi(:, idx, j) = normFactor * sum(allSeqsAsBags{i} .* repmat(allSeqsTransformationKernel{i}(:,j)', size(allSeqsAsBags{i},1), 1), 2);
end
end
end
logMessages(debugMsgLocation, sprintf('BagPhi done for %d clusters or %d kernels\n', nClusters, nKernels), debugLevel);
% The first dimension of the weight vector comes from the featurevector length which depends on oligoLength param
weightVectors = zeros(size(allSeqsAsBags{1},1), nClusters);
for i=1:nClusters
% subkernels for non-shifted bags
weightVectors(:, i) = (subkernelWeights(i,1) * alphaValues' * bagPhi(:, :, i)')';
end
% Get the ranked order (weight vector corresponding to the subkernel with the highest weight is ranked at the top)
[sortedSubkernelWeights, rankedIndices] = sort(subkernelWeights, 'descend');
rankedOrderWeightVectors = weightVectors(:,rankedIndices');
overallWeightVector = sum(weightVectors,2);
end %function ends