Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
function [] = visualize_wvector(folderName, oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation)
% VISUALIZE_WVECTOR
% For the given weight vector, plot distance-centric k-mer visualizations with
% the help of analyse_wvector
%
% INPUT PARAMS
% Param 'folderName' (string)
% Location/Path on disk of the output folder
%
% Param 'oligoLen'
% Oligomer length for the ODH representation
%
% Param 'maxDist'
% Maximum distance value for the ODH representation
%
% Param 'subkernelWeights' (vector)
% Weights assigned to the sub-kernel by MKL
%
% Param 'allSeqsAsBags' (cell array)
% Collection of all sequences
%
% Param 'rawConformedSetKernel' (matrix)
% Unnormalized conformally transformed MI kernel
%
% Param 'allSeqsTransformationKernel' (matrix)
% Transformation for all instances of all sequences
%
% Param 'debugLevel'
%
% Param 'debugMsgLocation'
%
% OUTPUT PARAMS
%
% ADDITIONAL NOTES
% -- We have subkernel weights given, all corresponding weight vectors are to be computed
% -- Alpha values are given from the trained model
% -- Location for alpha values: folderName specified in arguments
% -- Output the list of ranked weight vectors, specifying the individual
% weights doesn't seem to serve any/much purpose
% -- Can't work without folderName, thus, there are no default set of values
% of subkernelWeights
%
%
% Author: snikumbh@mpi-inf.mpg.de
totalArguments = 9;
if nargin < totalArguments
debugMsgLocation = 1;
end
if nargin < totalArguments - 1
debugMsgLocation = 1;
debugLevel = 2;
end
max_or_topN = 1;
% max_or_topN to be set to 1 for AMPD visualization, 0 for topN visualization
topN = 25; % Although this value is set, it is used when max_or_topN is set to 0
alphaFilename = 'alphas.txt';
svFilename = 'sv.txt';
alphaValues = dlmread(strcat(folderName, '/', alphaFilename));
alphaIndices = dlmread(strcat(folderName, '/', svFilename));
alphaIndices = alphaIndices + 1; %these are nothing but bagIndices
%
% Compute the weight vectors corresponding to all subkernels, keep them in the
% same order as the kernels
% -- read in the output from the python script and return back the values.
%
nClusters = size(subkernelWeights, 1);
nKernels = nClusters;
% prepare bag-level phi, restrict only to support vectors
% bagPhi is nSVBags-by-nClusters
bagPhi = zeros( size(allSeqsAsBags{1},1), size(alphaIndices,1), nClusters);
idx = 0;
for i=1:length(allSeqsAsBags)% this is nBags
% restrict to sv indices
if any(i == alphaIndices)
idx = idx + 1;% this is second dimension of the bagPhi, ranges from 1 to #alphaIndices
for j=1:nClusters
% allSeqs TransformationKernel already has nInstances x nClusters
bagPhi(:, idx, j) = (1/sqrt(rawConformedSetKernel{j}(i,i))) * sum(allSeqsAsBags{i} .* repmat(allSeqsTransformationKernel{i}(:,j)', size(allSeqsAsBags{i},1), 1), 2);
end
end
end
logMessages(debugMsgLocation, sprintf('BagPhi done for %d clusters or %d kernels\n', nClusters, nKernels), debugLevel);
% The first dimension of the weight vector comes from the featurevector length which depends on oligoLength param
weightVectors = zeros(size(allSeqsAsBags{1},1), nClusters);
for i=1:nClusters
weightVectors(:, i) = (subkernelWeights(i,1) * alphaValues' * bagPhi(:, :, i)')';
end
% Get the ranked order (weight vector corresponding to the subkernel with the highest weight is ranked at the top)
[sortedSubkernelWeights, rankedIndices] = sort(subkernelWeights, 'descend');
logMessages(debugMsgLocation, sprintf('rank: %d, ', rankedIndices), debugLevel);
rankedOrderWeightVectors = weightVectors(:,rankedIndices');
overallWeightVector = sum(weightVectors,2);
verbose_for_analyze_wvector_call = 0;
if exist(strcat(folderName,'/motifs'), 'dir') == 7
%do-nothing
else
status = mkdir(strcat(folderName,'/motifs'));
end
%pdfcrop_lines = [];
for i=1:nKernels
[collated_pdf_name, pdfcrop_line] = analyze_wvector(folderName, i, weightVectors(:, rankedIndices(i))' , oligoLen, maxDist, topN, verbose_for_analyze_wvector_call, max_or_topN, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('%s-crop.pdf ', [pdfcrop_line(1,9:end-1)]), debugLevel);
% Plotting l2-norm heatmaps of weight vector
inputCSVFile = strcat(folderName, '/weightVectorNormed_rank', num2str(i), '.csv');
[status, output] = system(['Rscript plot_heatmap.R ' inputCSVFile ' ' folderName ' ' num2str(i) ' ' num2str(oligoLen)]);
logMessages(debugMsgLocation, sprintf('R script to plot heatmaps\n'), debugLevel);
logMessages(debugMsgLocation, sprintf('%s\n', output), debugLevel);
if status ~= 0
logMessages(debugMsgLocation, sprintf('*ERR* running the R script to plot the normed weight vector as a heatmap\n'), debugLevel);
end
end
logMessages(debugMsgLocation, sprintf('cat output %s\n\n', collated_pdf_name), debugLevel);
% Analyze weight vector for the combined kernel
[collated_pdf_name, pdfcrop_line] = analyze_wvector(folderName, 0, overallWeightVector' , oligoLen, maxDist, topN, verbose_for_analyze_wvector_call, max_or_topN, debugLevel, debugMsgLocation);
% Combine/append all PDFs
% fprintf('\npdftk ');
% for i= 1:size(pdfcrop_lines,1)
% fprintf('%s-crop.pdf ', [pdfcrop_lines(i,9:end-1)]);
% end
% fprintf('cat output %s\n\n', collated_pdf_name);
end %function end