Skip to content
Permalink
a056a1c421
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
133 lines (121 sloc) 5.25 KB
function [] = visualize_wvector(folderName, oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation)
% VISUALIZE_WVECTOR
% For the given weight vector, plot distance-centric k-mer visualizations with
% the help of analyse_wvector
%
% INPUT PARAMS
% Param 'folderName' (string)
% Location/Path on disk of the output folder
%
% Param 'oligoLen'
% Oligomer length for the ODH representation
%
% Param 'maxDist'
% Maximum distance value for the ODH representation
%
% Param 'subkernelWeights' (vector)
% Weights assigned to the sub-kernel by MKL
%
% Param 'allSeqsAsBags' (cell array)
% Collection of all sequences
%
% Param 'rawConformedSetKernel' (matrix)
% Unnormalized conformally transformed MI kernel
%
% Param 'allSeqsTransformationKernel' (matrix)
% Transformation for all instances of all sequences
%
% Param 'debugLevel'
%
% Param 'debugMsgLocation'
%
% OUTPUT PARAMS
%
% ADDITIONAL NOTES
% -- We have subkernel weights given, all corresponding weight vectors are to be computed
% -- Alpha values are given from the trained model
% -- Location for alpha values: folderName specified in arguments
% -- Output the list of ranked weight vectors, specifying the individual
% weights doesn't seem to serve any/much purpose
% -- Can't work without folderName, thus, there are no default set of values
% of subkernelWeights
%
%
% Author: snikumbh@mpi-inf.mpg.de
totalArguments = 9;
if nargin < totalArguments
debugMsgLocation = 1;
end
if nargin < totalArguments - 1
debugMsgLocation = 1;
debugLevel = 2;
end
max_or_topN = 1;
% max_or_topN to be set to 1 for AMPD visualization, 0 for topN visualization
topN = 25; % Although this value is set, it is used when max_or_topN is set to 0
alphaFilename = 'alphas.txt';
svFilename = 'sv.txt';
alphaValues = dlmread(strcat(folderName, '/', alphaFilename));
alphaIndices = dlmread(strcat(folderName, '/', svFilename));
alphaIndices = alphaIndices + 1; %these are nothing but bagIndices
%
% Compute the weight vectors corresponding to all subkernels, keep them in the
% same order as the kernels
% -- read in the output from the python script and return back the values.
%
nClusters = size(subkernelWeights, 1);
nKernels = nClusters;
% prepare bag-level phi, restrict only to support vectors
% bagPhi is nSVBags-by-nClusters
bagPhi = zeros( size(allSeqsAsBags{1},1), size(alphaIndices,1), nClusters);
idx = 0;
for i=1:length(allSeqsAsBags)% this is nBags
% restrict to sv indices
if any(i == alphaIndices)
idx = idx + 1;% this is second dimension of the bagPhi, ranges from 1 to #alphaIndices
for j=1:nClusters
% allSeqs TransformationKernel already has nInstances x nClusters
bagPhi(:, idx, j) = (1/sqrt(rawConformedSetKernel{j}(i,i))) * sum(allSeqsAsBags{i} .* repmat(allSeqsTransformationKernel{i}(:,j)', size(allSeqsAsBags{i},1), 1), 2);
end
end
end
logMessages(debugMsgLocation, sprintf('BagPhi done for %d clusters or %d kernels\n', nClusters, nKernels), debugLevel);
% The first dimension of the weight vector comes from the featurevector length which depends on oligoLength param
weightVectors = zeros(size(allSeqsAsBags{1},1), nClusters);
for i=1:nClusters
weightVectors(:, i) = (subkernelWeights(i,1) * alphaValues' * bagPhi(:, :, i)')';
end
% Get the ranked order (weight vector corresponding to the subkernel with the highest weight is ranked at the top)
[sortedSubkernelWeights, rankedIndices] = sort(subkernelWeights, 'descend');
logMessages(debugMsgLocation, sprintf('rank: %d, ', rankedIndices), debugLevel);
rankedOrderWeightVectors = weightVectors(:,rankedIndices');
overallWeightVector = sum(weightVectors,2);
verbose_for_analyze_wvector_call = 0;
if exist(strcat(folderName,'/motifs'), 'dir') == 7
%do-nothing
else
status = mkdir(strcat(folderName,'/motifs'));
end
%pdfcrop_lines = [];
for i=1:nKernels
[collated_pdf_name, pdfcrop_line] = analyze_wvector(folderName, i, weightVectors(:, rankedIndices(i))' , oligoLen, maxDist, topN, verbose_for_analyze_wvector_call, max_or_topN, debugLevel, debugMsgLocation);
logMessages(debugMsgLocation, sprintf('%s-crop.pdf ', [pdfcrop_line(1,9:end-1)]), debugLevel);
% Plotting l2-norm heatmaps of weight vector
inputCSVFile = strcat(folderName, '/weightVectorNormed_rank', num2str(i), '.csv');
[status, output] = system(['Rscript plot_heatmap.R ' inputCSVFile ' ' folderName ' ' num2str(i) ' ' num2str(oligoLen)]);
logMessages(debugMsgLocation, sprintf('R script to plot heatmaps\n'), debugLevel);
logMessages(debugMsgLocation, sprintf('%s\n', output), debugLevel);
if status ~= 0
logMessages(debugMsgLocation, sprintf('*ERR* running the R script to plot the normed weight vector as a heatmap\n'), debugLevel);
end
end
logMessages(debugMsgLocation, sprintf('cat output %s\n\n', collated_pdf_name), debugLevel);
% Analyze weight vector for the combined kernel
[collated_pdf_name, pdfcrop_line] = analyze_wvector(folderName, 0, overallWeightVector' , oligoLen, maxDist, topN, verbose_for_analyze_wvector_call, max_or_topN, debugLevel, debugMsgLocation);
% Combine/append all PDFs
% fprintf('\npdftk ');
% for i= 1:size(pdfcrop_lines,1)
% fprintf('%s-crop.pdf ', [pdfcrop_lines(i,9:end-1)]);
% end
% fprintf('cat output %s\n\n', collated_pdf_name);
end %function end