Skip to content
Permalink
d63876f642
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
202 lines (193 sloc) 5.98 KB
package kb.howtokb.clustering;
import java.util.List;
import kb.howtokb.clustering.sim.CategorySimilarity;
import kb.howtokb.clustering.sim.SimilarityComputation;
import kb.howtokb.taskframe.WikiHowTaskFrame;
import kb.howtokb.utils.AdjacencyBackedSparseMatrix;
import kb.howtokb.utils.SparseSimMatrix;
public class DataForClustering {
/*Get similarity matrix of a cluster
* s(i,j) is similarity between i and j
* 0 at diagonal
*/
public static double[][] getSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs) throws Exception{
double [][] res = new double[list.size()][list.size()];
for (int i=0; i<list.size()-1; i++){
for (int j=i+1; j<list.size(); j++){
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j));
res[i][j] = sim;
res[j][i] = sim;
System.out.print(sim + " ");
}
System.out.println();
}
for (int i=0; i<list.size(); i++){
res[i][i] = 0;
}
return res;
}
/*Get similarity matrix of a cluster
* s(i,j) is similarity between i and j
* 1 at diagonal
*/
public static double[][] getFullSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs) throws Exception{
double [][] res = new double[list.size()][list.size()];
for (int i=0; i<list.size()-1; i++){
for (int j=i+1; j<list.size(); j++){
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j));
res[i][j] = sim;
res[j][i] = sim;
System.out.print(sim + " ");
}
System.out.println();
}
for (int i=0; i<list.size(); i++){
res[i][i] = 1;
}
return res;
}
/*Get sparse similarity matrix of a cluster
* s(i,j) is similarity between i and j
*
*/
public static SparseSimMatrix getSparseSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs, double threshold) throws Exception{
SparseSimMatrix res = new SparseSimMatrix((float) threshold);
for (int i=0; i<list.size()-1; i++){
for (int j=i+1; j<list.size(); j++){
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j));
res.set(i, j, (float)sim);
System.out.print(sim + " ");
}
System.out.println();
}
for (int i=0; i<list.size(); i++){
res.set(i, i, (float) 1.0);
}
return res;
}
/*Get dynamic sparse similarity matrix of a cluster
* s(i,j) is similarity between i and j
*
*/
public static AdjacencyBackedSparseMatrix getAdjacencyBackedSparseSimilarityMatrix(List<WikiHowTaskFrame> list, CategorySimilarity cs, double threshold) throws Exception{
int n = list.size();
AdjacencyBackedSparseMatrix res = new AdjacencyBackedSparseMatrix((float) threshold, n);
for (int i=0; i<list.size()-1; i++){
for (int j=i+1; j<list.size(); j++){
double sim = SimilarityComputation.getSimilarity(cs, list.get(i), list.get(j));
res.set(i, j, (float)sim);
System.out.print(sim + " ");
}
System.out.println();
}
for (int i=0; i<list.size(); i++){
res.set(i, i, (float) 1.0);
}
return res;
}
// /*
// * Get Unnormalized Laplacian Matrix
// */
// public static double[][] getUnnormalizedLaplacianMatrix(double[][] simMatrix){
// double[][] res = new double[simMatrix.length][simMatrix.length];
// for (int i=0; i<simMatrix.length; i++){
// double temp = 0;
// for (int j=0; j<simMatrix[i].length; j++){
// if (i != j){
// res[i][j] = 0 - simMatrix[i][j];
// temp += simMatrix[i][j];
// }
// }
// res[i][i] = temp;
// }
// return res;
// }
//
// /*
// * Get Normalized Laplacian Matrix
// */
// public static double[][] getNormalizedLaplacianMatrix(double[][] simMatrix){
// double[][] res = getUnnormalizedLaplacianMatrix(simMatrix);
// for (int i=0; i<res.length - 1; i++){
// for (int j=i+1; j < res.length; j++){
// double tmp = res[i][j] / Math.sqrt(res[i][i] * res[j][j]);
// res[i][j] = tmp;
// res[j][i] = tmp;
// }
// res[i][i] = 1;
// }
// res[res.length - 1][res.length - 1] = 1;
// return res;
// }
//
// /*
// * EigenDecomposition
// * Return matrix to do k-means
// */
// public static double[][] getMatrixForKMean(double[][] laplacian, int k){
// double [][] res = new double[laplacian.length][k];
//
// //Doing eigen decomposition
// EigenvalueDecomposition eigen = new EigenvalueDecomposition(
// new DenseDoubleMatrix2D(laplacian));
// double[][] U = eigen.getV().toArray();
// System.out.println(eigen.getV().toString());
// //Get only k eigenvectors corresponding to k smallest eigenvalues
// // Y = (U_n U_n-1 ....U_n-k+1) where lamda_n <= lamda_n-1 <= ... <= lamda_n-k+1
// for (int i=0; i < U.length; i++){
// for (int j=U.length - 1; j >= (U.length - 1) - k + 1; j--){
// res[i][U.length - 1 - j] = U[i][j];
// }
// }
//
//// //Normalize rows of Y to use for k-means on row of Y
//// for (int i=0; i<res.length; i++){
//// double sum = Arrays.stream(res[i]).sum();
//// if (sum != 0){
//// for (int j=0; j<res[i].length; j++){
//// res[i][j] /= sum;
//// System.out.print(res[i][j] + " ");
//// }
//// }
//// System.out.println();
//// }
//
// //Norm 1
// double[] tmp = new double[res.length];
// for (int i=0; i<res.length; i++){
// tmp[i] = 0;
// for (int j=0; j<res[i].length; j++){
// tmp[i] += Math.pow(res[i][j], 2);
// }
// tmp[i] = Math.sqrt(tmp[i]);
// }
// for (int i=0; i<res.length; i++){
// if (tmp[i] != 0){
// for (int j=0; j<res[i].length; j++){
// res[i][j] /= tmp[i];
// System.out.print(res[i][j] + " ");
// }
// }
// System.out.println();
// }
//
// return res;
// }
//
//
// /*
// * EigenDecomposition
// * Return matrix to do k-means
// */
// public static double[][] getMatrixForKMean(CategorySimilarity cs, List<WikiHowTaskFrame> list, boolean unnormalized, int k) throws Exception{
// //Get similarity matrix
// double[][] simMatrix = getSimilarityMatrix(list, cs);
// //Get laplacian matrix
// double[][] laplacian;
// if (unnormalized){
// laplacian = getUnnormalizedLaplacianMatrix(simMatrix);
// }else laplacian = getNormalizedLaplacianMatrix(simMatrix);
//
// return getMatrixForKMean(laplacian, k);
// }
}