Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
comik/visualize_wvector.m
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
133 lines (121 sloc)
5.25 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [] = visualize_wvector(folderName, oligoLen, maxDist, subkernelWeights, allSeqsAsBags, rawConformedSetKernel, allSeqsTransformationKernel, debugLevel, debugMsgLocation) | |
% VISUALIZE_WVECTOR | |
% For the given weight vector, plot distance-centric k-mer visualizations with | |
% the help of analyse_wvector | |
% | |
% INPUT PARAMS | |
% Param 'folderName' (string) | |
% Location/Path on disk of the output folder | |
% | |
% Param 'oligoLen' | |
% Oligomer length for the ODH representation | |
% | |
% Param 'maxDist' | |
% Maximum distance value for the ODH representation | |
% | |
% Param 'subkernelWeights' (vector) | |
% Weights assigned to the sub-kernel by MKL | |
% | |
% Param 'allSeqsAsBags' (cell array) | |
% Collection of all sequences | |
% | |
% Param 'rawConformedSetKernel' (matrix) | |
% Unnormalized conformally transformed MI kernel | |
% | |
% Param 'allSeqsTransformationKernel' (matrix) | |
% Transformation for all instances of all sequences | |
% | |
% Param 'debugLevel' | |
% | |
% Param 'debugMsgLocation' | |
% | |
% OUTPUT PARAMS | |
% | |
% ADDITIONAL NOTES | |
% -- We have subkernel weights given, all corresponding weight vectors are to be computed | |
% -- Alpha values are given from the trained model | |
% -- Location for alpha values: folderName specified in arguments | |
% -- Output the list of ranked weight vectors, specifying the individual | |
% weights doesn't seem to serve any/much purpose | |
% -- Can't work without folderName, thus, there are no default set of values | |
% of subkernelWeights | |
% | |
% | |
% Author: snikumbh@mpi-inf.mpg.de | |
totalArguments = 9; | |
if nargin < totalArguments | |
debugMsgLocation = 1; | |
end | |
if nargin < totalArguments - 1 | |
debugMsgLocation = 1; | |
debugLevel = 2; | |
end | |
max_or_topN = 1; | |
% max_or_topN to be set to 1 for AMPD visualization, 0 for topN visualization | |
topN = 25; % Although this value is set, it is used when max_or_topN is set to 0 | |
alphaFilename = 'alphas.txt'; | |
svFilename = 'sv.txt'; | |
alphaValues = dlmread(strcat(folderName, '/', alphaFilename)); | |
alphaIndices = dlmread(strcat(folderName, '/', svFilename)); | |
alphaIndices = alphaIndices + 1; %these are nothing but bagIndices | |
% | |
% Compute the weight vectors corresponding to all subkernels, keep them in the | |
% same order as the kernels | |
% -- read in the output from the python script and return back the values. | |
% | |
nClusters = size(subkernelWeights, 1); | |
nKernels = nClusters; | |
% prepare bag-level phi, restrict only to support vectors | |
% bagPhi is nSVBags-by-nClusters | |
bagPhi = zeros( size(allSeqsAsBags{1},1), size(alphaIndices,1), nClusters); | |
idx = 0; | |
for i=1:length(allSeqsAsBags)% this is nBags | |
% restrict to sv indices | |
if any(i == alphaIndices) | |
idx = idx + 1;% this is second dimension of the bagPhi, ranges from 1 to #alphaIndices | |
for j=1:nClusters | |
% allSeqs TransformationKernel already has nInstances x nClusters | |
bagPhi(:, idx, j) = (1/sqrt(rawConformedSetKernel{j}(i,i))) * sum(allSeqsAsBags{i} .* repmat(allSeqsTransformationKernel{i}(:,j)', size(allSeqsAsBags{i},1), 1), 2); | |
end | |
end | |
end | |
logMessages(debugMsgLocation, sprintf('BagPhi done for %d clusters or %d kernels\n', nClusters, nKernels), debugLevel); | |
% The first dimension of the weight vector comes from the featurevector length which depends on oligoLength param | |
weightVectors = zeros(size(allSeqsAsBags{1},1), nClusters); | |
for i=1:nClusters | |
weightVectors(:, i) = (subkernelWeights(i,1) * alphaValues' * bagPhi(:, :, i)')'; | |
end | |
% Get the ranked order (weight vector corresponding to the subkernel with the highest weight is ranked at the top) | |
[sortedSubkernelWeights, rankedIndices] = sort(subkernelWeights, 'descend'); | |
logMessages(debugMsgLocation, sprintf('rank: %d, ', rankedIndices), debugLevel); | |
rankedOrderWeightVectors = weightVectors(:,rankedIndices'); | |
overallWeightVector = sum(weightVectors,2); | |
verbose_for_analyze_wvector_call = 0; | |
if exist(strcat(folderName,'/motifs'), 'dir') == 7 | |
%do-nothing | |
else | |
status = mkdir(strcat(folderName,'/motifs')); | |
end | |
%pdfcrop_lines = []; | |
for i=1:nKernels | |
[collated_pdf_name, pdfcrop_line] = analyze_wvector(folderName, i, weightVectors(:, rankedIndices(i))' , oligoLen, maxDist, topN, verbose_for_analyze_wvector_call, max_or_topN, debugLevel, debugMsgLocation); | |
logMessages(debugMsgLocation, sprintf('%s-crop.pdf ', [pdfcrop_line(1,9:end-1)]), debugLevel); | |
% Plotting l2-norm heatmaps of weight vector | |
inputCSVFile = strcat(folderName, '/weightVectorNormed_rank', num2str(i), '.csv'); | |
[status, output] = system(['Rscript plot_heatmap.R ' inputCSVFile ' ' folderName ' ' num2str(i) ' ' num2str(oligoLen)]); | |
logMessages(debugMsgLocation, sprintf('R script to plot heatmaps\n'), debugLevel); | |
logMessages(debugMsgLocation, sprintf('%s\n', output), debugLevel); | |
if status ~= 0 | |
logMessages(debugMsgLocation, sprintf('*ERR* running the R script to plot the normed weight vector as a heatmap\n'), debugLevel); | |
end | |
end | |
logMessages(debugMsgLocation, sprintf('cat output %s\n\n', collated_pdf_name), debugLevel); | |
% Analyze weight vector for the combined kernel | |
[collated_pdf_name, pdfcrop_line] = analyze_wvector(folderName, 0, overallWeightVector' , oligoLen, maxDist, topN, verbose_for_analyze_wvector_call, max_or_topN, debugLevel, debugMsgLocation); | |
% Combine/append all PDFs | |
% fprintf('\npdftk '); | |
% for i= 1:size(pdfcrop_lines,1) | |
% fprintf('%s-crop.pdf ', [pdfcrop_lines(i,9:end-1)]); | |
% end | |
% fprintf('cat output %s\n\n', collated_pdf_name); | |
end %function end |