Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
function [overallWeightVector, weightVectors, biasValue] = obtainWeightVector(folderName, oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation)
% OBTAINWEIGHTVECTOR
%
% INPUT PARAMS
% Param 'folderName'
% Location/Path on disk
%
% Param 'oligoLen'
% Value of oligomer length for the ODH representation
%
% Param 'maxDist'
% Value of the maximum distance for the ODH representation
%
% Param 'subkernelWeights' (vector)
% Weights assigned to each sub-kernel upon solving MKL
%
% Param 'allSeqsAsBags' (cell array)
% All sequences represented as bags
%
% Param 'rawConformedSetKernel' (matrix)
% The unnormalized conformally transformed MI kernel
%
% Param 'allSeqsTransformationKernel' (matrix)
% Transformations of all segments for each sequence in the collection
%
% Param 'debugLevel'
%
% Param 'debugMsgLocation'
%
% OUTPUT PARAMS
% Param 'overallWeightVector'
% The overall weight vector which is a linear combination of the individual
% weight vectors
%
% Param 'weightVectors'
% The individual weight vectors corresponding the multiple kernels in CoMIK
%
% Param 'biasValue'
% The bias value obtained solving SVM
%
%
% ADDITIONAL NOTES
% -- Alpha values are given from the trained model
% -- Location for alpha values: folderName specified in arguments
% -- Output arguments: overallWeightVector and biasValue
% -- We have subkernel weights given, all corresponding weight vectors are to be computed
% -- Passing a value for 'folderName', is essential, where one can find the subkernel weights
% . Therefore, there are no default set of values of subkernelWeights
%
% Author: snikumbh@mpi-inf.mpg.de
alphaFilename = 'alphas.txt';
svFilename = 'sv.txt';
biasFilename = 'biasValue.txt';
alphaValues = dlmread(strcat(folderName, '/', alphaFilename));
alphaIndices = dlmread(strcat(folderName, '/', svFilename));
alphaIndices = alphaIndices + 1; %these are nothing but bagIndices
biasValue = dlmread(strcat(folderName, '/', biasFilename));
% Compute the weight vectors corresponding to all subkernels, keep them in the same order as the kernels
% -- Read in the output from the python script and
% return back the values.
%
nClusters = size(subkernelWeights, 1);
nKernels = nClusters;
thetaVals = sqrt(subkernelWeights);
% prepare bag-level phi, restrict only to support vectorsa
% bagPhi is nSVBags-by-nClusters
bagPhi = zeros( size(allSeqsAsBags{1},1), size(alphaIndices,1), nClusters);
idx = 0;
for i=1:length(allSeqsAsBags)% this is nBags
% restrict to sv indices
if any(i == alphaIndices)
idx = idx + 1; % this is second dimension of the bagPhi, ranges from 1 to #alphaIndices
for j=1:nClusters
% allSeqs TransformationKernel already has nInstances x nClusters
normFactor = (1/sqrt(rawConformedSetKernel{j}(i,i)));
bagPhi(:, idx, j) = normFactor * sum(allSeqsAsBags{i} .* repmat(allSeqsTransformationKernel{i}(:,j)', size(allSeqsAsBags{i},1), 1), 2);
end
end
end
logMessages(debugMsgLocation, sprintf('BagPhi done for %d clusters or %d kernels\n', nClusters, nKernels), debugLevel);
% The first dimension of the weight vector comes from the featurevector length which depends on oligoLength param
weightVectors = zeros(size(allSeqsAsBags{1},1), nClusters);
for i=1:nClusters
% subkernels for non-shifted bags
weightVectors(:, i) = (subkernelWeights(i,1) * alphaValues' * bagPhi(:, :, i)')';
end
% Get the ranked order (weight vector corresponding to the subkernel with the highest weight is ranked at the top)
[sortedSubkernelWeights, rankedIndices] = sort(subkernelWeights, 'descend');
rankedOrderWeightVectors = weightVectors(:,rankedIndices');
overallWeightVector = sum(weightVectors,2);
end %function ends