Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
HowToKB/src/main/java/kb/howtokb/clustering/DataForClustering.java
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
202 lines (193 sloc)
5.98 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package kb.howtokb.clustering; | |
import java.util.List; | |
import kb.howtokb.clustering.sim.CategorySimilarity; | |
import kb.howtokb.clustering.sim.SimilarityComputation; | |
import kb.howtokb.taskframe.WikiHowTaskFrame; | |
import kb.howtokb.utils.AdjacencyBackedSparseMatrix; | |
import kb.howtokb.utils.SparseSimMatrix; | |
public class DataForClustering { | |
/*Get similarity matrix of a cluster | |
* s(i,j) is similarity between i and j | |
* 0 at diagonal | |
*/ | |
public static double[][] getSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs) throws Exception{ | |
double [][] res = new double[list.size()][list.size()]; | |
for (int i=0; i<list.size()-1; i++){ | |
for (int j=i+1; j<list.size(); j++){ | |
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j)); | |
res[i][j] = sim; | |
res[j][i] = sim; | |
System.out.print(sim + " "); | |
} | |
System.out.println(); | |
} | |
for (int i=0; i<list.size(); i++){ | |
res[i][i] = 0; | |
} | |
return res; | |
} | |
/*Get similarity matrix of a cluster | |
* s(i,j) is similarity between i and j | |
* 1 at diagonal | |
*/ | |
public static double[][] getFullSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs) throws Exception{ | |
double [][] res = new double[list.size()][list.size()]; | |
for (int i=0; i<list.size()-1; i++){ | |
for (int j=i+1; j<list.size(); j++){ | |
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j)); | |
res[i][j] = sim; | |
res[j][i] = sim; | |
System.out.print(sim + " "); | |
} | |
System.out.println(); | |
} | |
for (int i=0; i<list.size(); i++){ | |
res[i][i] = 1; | |
} | |
return res; | |
} | |
/*Get sparse similarity matrix of a cluster | |
* s(i,j) is similarity between i and j | |
* | |
*/ | |
public static SparseSimMatrix getSparseSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs, double threshold) throws Exception{ | |
SparseSimMatrix res = new SparseSimMatrix((float) threshold); | |
for (int i=0; i<list.size()-1; i++){ | |
for (int j=i+1; j<list.size(); j++){ | |
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j)); | |
res.set(i, j, (float)sim); | |
System.out.print(sim + " "); | |
} | |
System.out.println(); | |
} | |
for (int i=0; i<list.size(); i++){ | |
res.set(i, i, (float) 1.0); | |
} | |
return res; | |
} | |
/*Get dynamic sparse similarity matrix of a cluster | |
* s(i,j) is similarity between i and j | |
* | |
*/ | |
public static AdjacencyBackedSparseMatrix getAdjacencyBackedSparseSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs, double threshold) throws Exception{ | |
int n = list.size(); | |
AdjacencyBackedSparseMatrix res = new AdjacencyBackedSparseMatrix((float) threshold, n); | |
for (int i=0; i<list.size()-1; i++){ | |
for (int j=i+1; j<list.size(); j++){ | |
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j)); | |
res.set(i, j, (float)sim); | |
System.out.print(sim + " "); | |
} | |
System.out.println(); | |
} | |
for (int i=0; i<list.size(); i++){ | |
res.set(i, i, (float) 1.0); | |
} | |
return res; | |
} | |
// /* | |
// * Get Unnormalized Laplacian Matrix | |
// */ | |
// public static double[][] getUnnormalizedLaplacianMatrix(double[][] simMatrix){ | |
// double[][] res = new double[simMatrix.length][simMatrix.length]; | |
// for (int i=0; i<simMatrix.length; i++){ | |
// double temp = 0; | |
// for (int j=0; j<simMatrix[i].length; j++){ | |
// if (i != j){ | |
// res[i][j] = 0 - simMatrix[i][j]; | |
// temp += simMatrix[i][j]; | |
// } | |
// } | |
// res[i][i] = temp; | |
// } | |
// return res; | |
// } | |
// | |
// /* | |
// * Get Normalized Laplacian Matrix | |
// */ | |
// public static double[][] getNormalizedLaplacianMatrix(double[][] simMatrix){ | |
// double[][] res = getUnnormalizedLaplacianMatrix(simMatrix); | |
// for (int i=0; i<res.length - 1; i++){ | |
// for (int j=i+1; j < res.length; j++){ | |
// double tmp = res[i][j] / Math.sqrt(res[i][i] * res[j][j]); | |
// res[i][j] = tmp; | |
// res[j][i] = tmp; | |
// } | |
// res[i][i] = 1; | |
// } | |
// res[res.length - 1][res.length - 1] = 1; | |
// return res; | |
// } | |
// | |
// /* | |
// * EigenDecomposition | |
// * Return matrix to do k-means | |
// */ | |
// public static double[][] getMatrixForKMean(double[][] laplacian, int k){ | |
// double [][] res = new double[laplacian.length][k]; | |
// | |
// //Doing eigen decomposition | |
// EigenvalueDecomposition eigen = new EigenvalueDecomposition( | |
// new DenseDoubleMatrix2D(laplacian)); | |
// double[][] U = eigen.getV().toArray(); | |
// System.out.println(eigen.getV().toString()); | |
// //Get only k eigenvectors corresponding to k smallest eigenvalues | |
// // Y = (U_n U_n-1 ....U_n-k+1) where lamda_n <= lamda_n-1 <= ... <= lamda_n-k+1 | |
// for (int i=0; i < U.length; i++){ | |
// for (int j=U.length - 1; j >= (U.length - 1) - k + 1; j--){ | |
// res[i][U.length - 1 - j] = U[i][j]; | |
// } | |
// } | |
// | |
//// //Normalize rows of Y to use for k-means on row of Y | |
//// for (int i=0; i<res.length; i++){ | |
//// double sum = Arrays.stream(res[i]).sum(); | |
//// if (sum != 0){ | |
//// for (int j=0; j<res[i].length; j++){ | |
//// res[i][j] /= sum; | |
//// System.out.print(res[i][j] + " "); | |
//// } | |
//// } | |
//// System.out.println(); | |
//// } | |
// | |
// //Norm 1 | |
// double[] tmp = new double[res.length]; | |
// for (int i=0; i<res.length; i++){ | |
// tmp[i] = 0; | |
// for (int j=0; j<res[i].length; j++){ | |
// tmp[i] += Math.pow(res[i][j], 2); | |
// } | |
// tmp[i] = Math.sqrt(tmp[i]); | |
// } | |
// for (int i=0; i<res.length; i++){ | |
// if (tmp[i] != 0){ | |
// for (int j=0; j<res[i].length; j++){ | |
// res[i][j] /= tmp[i]; | |
// System.out.print(res[i][j] + " "); | |
// } | |
// } | |
// System.out.println(); | |
// } | |
// | |
// return res; | |
// } | |
// | |
// | |
// /* | |
// * EigenDecomposition | |
// * Return matrix to do k-means | |
// */ | |
// public static double[][] getMatrixForKMean(CategorySimilarity cs, List<WikiHowTaskFrame> list, boolean unnormalized, int k) throws Exception{ | |
// //Get similarity matrix | |
// double[][] simMatrix = getSimilarityMatrix(list, cs); | |
// //Get laplacian matrix | |
// double[][] laplacian; | |
// if (unnormalized){ | |
// laplacian = getUnnormalizedLaplacianMatrix(simMatrix); | |
// }else laplacian = getNormalizedLaplacianMatrix(simMatrix); | |
// | |
// return getMatrixForKMean(laplacian, k); | |
// } | |
} |