Skip to content

Commit

Permalink
Cleaning up compute ConformalMI kernel implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
snikumbh committed Jul 19, 2017
1 parent 462c8c0 commit 02bed78
Showing 1 changed file with 32 additions and 76 deletions.
108 changes: 32 additions & 76 deletions computeConformedMultiInstanceKernel.m
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
function [conformedMultiInstanceKernel, rawConformedMultiInstanceKernel, instancesTransformationKernel, clusterCentres] = computeConformedMultiInstanceKernel(instanceStarts, instanceEnds, instanceWideKernel, trainIndices, allSeqsAsBags, nClusters, sig, Y, computationVersion, debugLevel, debugMsgLocation)
%
% 1. Obtain expansion points
% Obtain expansion points
%
% 1.1 Convert allSeqsAsBags in to a matrix X of dimensions nInstances x FVlength
% -- Convert allSeqsAsBags in to a matrix X of dimensions nInstances x FVlength
%
% spread out the instances, get the nInstances
% trainIndices are trainingIndices
% -- spread out the instances, get the nInstances
% -- trainIndices are trainingIndices
% Edit: We earlier expected trainIndices to be a row vector. Now it can be either
% a row or a column vector.

if nargin < 10
% default
computationVersion = 'Looping';
end
nBags = length(allSeqsAsBags);
Expand All @@ -20,24 +22,25 @@
instanceIDVector = zeros(1,instanceEnds(end));
end

%nInstancesPos = 0;

% maintain a count of the instances arising from training indices/bags
% this is used later to define the number of rows for XtoPassFrom
% Maintain a count of the instances arising from training indices/bags
% This is used later to define the number of rows for XtoPassFrom
currentTotal = 0;
for i=1:nBags
% #instances at each index/bag
idxInstances(i) = size(allSeqsAsBags{i}, 2);
if strcmp(computationVersion, 'AccumArray')
instanceIDVector(instanceStarts(i):instanceEnds(i)) = repmat(i, [1 size(allSeqsAsBags{i}, 2)]);
end
% useful to have stratified repressentation, we don't need this now.
%% useful to have stratified repressentation, we don't need this now.
% if i <= posInd
% nInstancesPos = nInstancesPos + size(allSeqsAsBags{i}, 2);
% end % useful to have stratified repressentation
% end
%% useful to have stratified repressentation
if any(i==trainIndices) == 1 %check, if a trainingIndex, get instances
currentTotal = currentTotal+idxInstances(i);
end % useful to count instances only in training bags
end
%% useful to count instances only in training bags
end
forIndicesInX = cumsum(idxInstances);
nInstances = sum(idxInstances);
Expand All @@ -49,7 +52,7 @@
%X = zeros( nInstances, size(allSeqsAsBags{1},1) );
logMessages(debugMsgLocation, sprintf('\n--- currentTotal for setting a zeros XtoPassFrom: %d\n', currentTotal), debugLevel);
XtoPassFrom = zeros( currentTotal, size(allSeqsAsBags{1},1) );
% make sure that only instances from train bags are taken
% Make sure that only instances from training bags are taken
% trainIndices store the right set of indices as passed to it
currentTotal = 0;
for i=1:length(trainIndices)%indices of relevant bags/ training bags
Expand All @@ -59,21 +62,22 @@
logMessages(debugMsgLocation, sprintf('--- allSeqsAsBags is now n x p matrix, ready for kmeans\n'), debugLevel);
%
% 1.2. Apply kmeans
% -- we are using Matlab's algorithm for now.
% -- we are using Matlab's algorithm for kmeans.
% -- using the buckshot hueristic means:
% - randomly sample sqrt(nClusters * nInstances)
% data points from nInstances
% - this will give nClusters

% sending complete X for k-means clustering, also perform replicates
% XtoPass = X;
% If sending complete X for k-means clustering set XtoPass = X;
% Replicates
nofRep = 1;
% randomly sample without replacement
% Randomly sample without replacement
% XtoPassFrom contains only the relevant instances, hence freely select any instances from it
nSampled = round(sqrt(nClusters * currentTotal));
logMessages(debugMsgLocation, sprintf('--- for kmeans, Total: %d, nSampled: %d, nClusters: %d\n', currentTotal, nSampled, nClusters), debugLevel);
XtoPass = XtoPassFrom( transpose(randsample( currentTotal, round(sqrt(nClusters * currentTotal)) )), :);
%XtoPass = XtoPassFrom;
%% Without the buckshot heuristic, use
% XtoPass = XtoPassFram
clear('XtoPassFrom');
[clusterCentres, matrixOfDistancesFromCentresForPassedVectors] = getExpansionPoints(XtoPass, nClusters, nofRep, debugLevel, debugMsgLocation);
clear('XtoPass');
Expand All @@ -96,7 +100,7 @@
% instancesTransformationKernel is cell array for number of bags
% instancesTransformationKernel{eachBag} is nInstanceInBag-by-nClusters

%initialize kernels
% Initialize kernels
for i=1:nClusters
conformedMultiInstanceKernel{i} = zeros(nBags);
rawConformedMultiInstanceKernel{i} = zeros(nBags);
Expand All @@ -110,13 +114,12 @@
%%instanceSubs are only the upper-triangular indices including the diagonal
%clear('tempInstanceSubs');
instanceWideKernelRepeated = repmat(instanceWideKernel, 1, 1, nClusters);
%allInstancesTransformations = zeros(nInstances, nClusters);
allInstancesTransformations = cat(1,instancesTransformationKernel{:});
for i=1:nClusters
KernelAsVector = (allInstancesTransformations(:,i) * allInstancesTransformations(:,i)') .* instanceWideKernelRepeated(:,:,i);
tempKernel = accumarray(instanceSubs, KernelAsVector(:));
assert(size(tempKernel,1) == size(zeros(nBags),1));
%tempKernel = triu(tempKernel, 1) + tempKernel'; %baecause instanceSubs stored indices for only the upper-triangular part of the matrix
%tempKernel = triu(tempKernel, 1) + tempKernel'; %because instanceSubs stored indices for only the upper-triangular part of the matrix
conformedMultiInstanceKernel{i} = normalizeKernel(tempKernel);
end
clear('instanceWideKernelRepeated');
Expand All @@ -126,54 +129,16 @@
logMessages(debugMsgLocation, sprintf('done in %.3f seconds', toc), debugLevel);
end
if strcmp(computationVersion, 'Looping')
% one can convert the transformationKernels into 3D matrices
%% one can convert the transformationKernels into 3D matrices
logMessages(debugMsgLocation, sprintf('\n--- Looping-over-bags version...'), debugLevel);
do_reshaped = 1;
if issparse(allSeqsAsBags{1}) && do_reshaped == 0
logMessages(debugMsgLocation, sprintf('Doing the reshaped version for sparse vectors..\n'), debugLevel);
% pre-allocate memory, doesn't help in speed
%for i=1:nClusters
% rawConformedMultiInstanceKernel{i} = zeros(nBags);
% conformedMultiInstanceKernel{i} = zeros(nBags);
%end
tic;
tempKernelCollection = zeros(nBags, nBags, nClusters);
for b1=1:nBags
if rem(b1, 100) == 0
logMessages(debugMsgLocation, sprintf('Bag/Row %d--', b1), debugLevel);
end
b1r = reshape(instancesTransformationKernel{b1}, idxInstances(b1) , [], nClusters);
b1BagAsMat = full(allSeqsAsBags{b1});
for b2=b1:nBags
b2r = reshape(instancesTransformationKernel{b2}, idxInstances(b2) , [], nClusters);
b2rt = permute(b2r,[2,1,3]);
b1b2_transformationProducts = bsxfun(@times, b1r , b2rt);
b2BagAsMat = full(allSeqsAsBags{b2});
tempToBeSummed = bsxfun(@times, b1b2_transformationProducts, (b1BagAsMat' * b2BagAsMat));
%tempToBeSummed = bsxfun(@times, b1b2_transformationProducts, full(allSeqsAsBags{b1}' * allSeqsAsBags{b2}));
%tempToBeSummed = bsxfun(@times, b1b2_transformationProducts, instanceWideKernel(instanceStarts(b1):instanceEnds(b1), instanceStarts(b2):instanceEnds(b2)) );
tempKernelCollection(b1, b2, :) = 1/(idxInstances(b1) * idxInstances(b2)) * sum(sum(tempToBeSummed,1));
end
end
clear b1BagAsMat;
clear b2BagAsMat;
logMessages(debugMsgLocation, sprintf('Bag-wise kernel done in %.3f seconds\n', toc), debugLevel);
tic;logMessages(debugMsgLocation, sprintf('Making from upper-triangular to full...'), debugLevel);
for i=1:nClusters
tempKernel = zeros(nBags);
tempKernel = triu(tempKernelCollection(:,:,i), 1) + tempKernelCollection(:,:,i)';
rawConformedMultiInstanceKernel{i} = tempKernel;
%fprintf('***Max.: %.4f --- Min.:%.4f***\n', max(max(tempKernel)), min(min(tempKernel)));
conformedMultiInstanceKernel{i} = normalizeKernel(tempKernel);
end
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
elseif issparse(allSeqsAsBags{1}) && do_reshaped == 1
%% Performing the reshaped version
if issparse(allSeqsAsBags{1})
logMessages(debugMsgLocation, sprintf('Doing the reshaped version..\n'), debugLevel);
% pre-allocate memory, doesn't help in speed
%for i=1:nClusters
% rawConformedMultiInstanceKernel{i} = zeros(nBags);
% conformedMultiInstanceKernel{i} = zeros(nBags);
%end
%% Pre-allocate memory, doesn't help in speed
% for i=1:nClusters
% rawConformedMultiInstanceKernel{i} = zeros(nBags);
% conformedMultiInstanceKernel{i} = zeros(nBags);
% end
tic;
tempKernelCollection = zeros(nBags, nBags, nClusters);
for b1=1:nBags
Expand All @@ -182,7 +147,6 @@
b2r = reshape(instancesTransformationKernel{b2}, idxInstances(b2) , [], nClusters);
b2rt = permute(b2r,[2,1,3]);
b1b2_transformationProducts = bsxfun(@times, b1r , b2rt);
%tempToBeSummed = bsxfun(@times, b1b2_transformationProducts, full(allSeqsAsBags{b1}' * allSeqsAsBags{b2}));
tempToBeSummed = bsxfun(@times, b1b2_transformationProducts, instanceWideKernel(instanceStarts(b1):instanceEnds(b1), instanceStarts(b2):instanceEnds(b2)) );
term = 1/(idxInstances(b1) * idxInstances(b2));
tempKernelCollection(b1, b2, :) = sum(sum(tempToBeSummed,1));
Expand All @@ -192,21 +156,13 @@
tic;
for i=1:nClusters
tempKernel = zeros(nBags);
%tempKernel = tempKernelCollection(:,:,i)';
tempKernel = triu(tempKernelCollection(:,:,i), 1) + tempKernelCollection(:,:,i)';
%if issymmetric(tempKernel)
% fprintf('Samarth symmetric\n');
%else
% fprintf('Samarth.. not sysmmetric.. triu operation is needed\n');
%end
rawConformedMultiInstanceKernel{i} = tempKernel;
%fprintf('***Max.: %.4f --- Min.:%.4f***\n', max(max(tempKernel)), min(min(tempKernel)));
conformedMultiInstanceKernel{i} = normalizeKernel(tempKernel);
%fprintf('***Max.: %.4f --- Min.:%.4f***\n', max(max(conformedMultiInstanceKernel{i})), min(min(conformedMultiInstanceKernel{i})));
end
logMessages(debugMsgLocation, sprintf('done in %.3f seconds\n', toc), debugLevel);
else % if do-reshaped ends
% % % % %
else % when not sparse
% not tested either for correctness or efficiency. We recommend only using sparse vectors, thus the approach above.
tic;
for i=1:size(clusterCentres,1)
tempKernel = zeros(nBags);
Expand Down

0 comments on commit 02bed78

Please sign in to comment.