Skip to content
Permalink
0d82ff1dc4
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
154 lines (130 sloc) 5.88 KB
package kb.howtokb.clustering;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import kb.howtokb.clustering.basicobj.CSKSimpleCluster;
import kb.howtokb.clustering.basicobj.Instance;
import kb.howtokb.clustering.sim.CategorySimilarity;
import kb.howtokb.taskframe.WikiHowTaskFrame;
import kb.howtokb.utils.AdjacencyBackedSparseMatrix;
public class HeuristicTopDownClusteringDynamicSparse
implements ITopDownClustering<CSKSimpleCluster<WikiHowTaskFrame>, Instance<WikiHowTaskFrame>> {
private IStopping<CSKSimpleCluster<WikiHowTaskFrame>> stopper;
private AdjacencyBackedSparseMatrix simMatrix;
private CSKSimpleCluster<WikiHowTaskFrame> inputCluster;
public HeuristicTopDownClusteringDynamicSparse(List<WikiHowTaskFrame> pts, boolean isNormalizedCut, double threshold, int k, double thresforSparse) throws SQLException, Exception {
List<Instance<WikiHowTaskFrame>> ints = new ArrayList<>();
for (int i=0; i<pts.size(); i++)
ints.add(new Instance<WikiHowTaskFrame>(i, pts.get(i)));
inputCluster = new CSKSimpleCluster<>(-1, ints);
simMatrix = DataForClustering.getAdjacencyBackedSparseSimilarityMatrix(pts, new CategorySimilarity(), thresforSparse);
System.out.println("================================================");
this.stopper = new SimpleSimilarityStopping(threshold, simMatrix, isNormalizedCut);
}
public CSKSimpleCluster<WikiHowTaskFrame> getInputCluster() {
return inputCluster;
}
@Override
public boolean canSplitFrom(CSKSimpleCluster<WikiHowTaskFrame> c1, CSKSimpleCluster<WikiHowTaskFrame> c2,
ISimilarity<Instance<WikiHowTaskFrame>> simFunc, double simThreshold) {
// TODO Auto-generated method stub
return false;
}
@Override
public List<CSKSimpleCluster<WikiHowTaskFrame>> splitACluster(CSKSimpleCluster<WikiHowTaskFrame> c, int k)
throws Exception {
List<CSKSimpleCluster<WikiHowTaskFrame>> res = new ArrayList<>();
List<Instance<WikiHowTaskFrame>> members = new ArrayList<>(c.getClusterMembers());
// c.getClusterMembers();
Instance<WikiHowTaskFrame> farthestPt = getFarthestPt(members);
CSKSimpleCluster<WikiHowTaskFrame> initCluster = new CSKSimpleCluster<>(-1, farthestPt);
members.remove(farthestPt);
CSKSimpleCluster<WikiHowTaskFrame> leftCluster = new CSKSimpleCluster<>(-1, members);
assignCluster(initCluster, leftCluster);
//System.out.println(initCluster.getClusterMembers().size() + " " + leftCluster.getClusterMembers().size());
List<CSKSimpleCluster<WikiHowTaskFrame>> children = new ArrayList<>();
children.add(initCluster); children.add(leftCluster);
if (stopper.split(c, children)){
for (int i=0; i<children.size(); i++){
if (children.get(i).getClusterMembers().size() > 1)
res.addAll(splitACluster(children.get(i), k));
else res.add(children.get(i));
}
}else{
// System.out.println(c.getClusterMembers().size());
res.add(c);
}
return res;
}
public Instance<WikiHowTaskFrame> getFarthestPt(List<Instance<WikiHowTaskFrame>> members){
System.out.println("Find the farthest Pt....");
Instance<WikiHowTaskFrame> farthestPt = new Instance<>();
double min = Double.MAX_VALUE;
for (int i=0; i<members.size(); i++){
double sum = 0;
for (int j=0; j<members.size(); j++){
sum += simMatrix.get(members.get(i).getID(),members.get(j).getID());
}
if (min > sum){
min = sum;
farthestPt = members.get(i);
}
}
System.out.println("Finishing finding the farthest pt...");
return farthestPt;
}
public void assignCluster(CSKSimpleCluster<WikiHowTaskFrame> initCluster, CSKSimpleCluster<WikiHowTaskFrame> leftCluster){
System.out.println("Assign cluster.....");
List<Instance<WikiHowTaskFrame>> members = leftCluster.getClusterMembers();
for (int i=0; i<members.size(); i++){
if (simPtToCluster(members.get(i), initCluster) > simPtToCluster(members.get(i), leftCluster)){
initCluster.addClusterMember(members.get(i));
leftCluster.removeMember(members.get(i));
i--;
}
}
System.out.println("Done assign.....");
}
public double simPtToCluster(Instance<WikiHowTaskFrame> pt, CSKSimpleCluster<WikiHowTaskFrame> cluster){
List<Instance<WikiHowTaskFrame>> members = cluster.getClusterMembers();
double sum = 0.0;
for (int i=0; i<members.size(); i++){
sum += simMatrix.get(pt.getID(), members.get(i).getID());
}
return sum/members.size();
}
/**
* Stopping when inter-cluster similarity is greater than a given threshold
* @author cxchu
*
* @param <T>
*/
public static class SimpleSimilarityStopping implements
IStopping<CSKSimpleCluster<WikiHowTaskFrame>> {
private double threshold;
private AdjacencyBackedSparseMatrix simMatrix;
private boolean isNormalizedCut;
public SimpleSimilarityStopping(double thres, AdjacencyBackedSparseMatrix simMatrix, boolean isNormalizedCut) {
this.threshold = thres;
this.simMatrix = simMatrix;
this.isNormalizedCut = isNormalizedCut;
}
@Override
public boolean split(CSKSimpleCluster<WikiHowTaskFrame> parent, List<CSKSimpleCluster<WikiHowTaskFrame>> children) {
if (parent.getClusterMembers().size() <= 1) return false;
if (isNormalizedCut) return split(children.get(0), children.get(1));
return SimpleClusterSimilarity.averageIntraClusterSimilarity(parent, simMatrix) < this.threshold;
}
//Similar to minimizing min cut
public boolean split(CSKSimpleCluster<WikiHowTaskFrame> c1, CSKSimpleCluster<WikiHowTaskFrame> c2) {
double volC1 = SimpleClusterSimilarity.volOfCluster(c1, simMatrix);
double volC2 = SimpleClusterSimilarity.volOfCluster(c2, simMatrix);
double interC = SimpleClusterSimilarity.volOfInterClusters(c1, c2, simMatrix);
if (volC1 == 1 && volC2 == 1) return false; //Don't split into two singletons
if (volC1 == 1) return interC/volC2 < this.threshold;
if (volC2 == 1) return interC/volC1 < this.threshold;
System.out.println(volC1 + " " + volC2 + " " + interC);
return (interC/volC1 + interC/volC2) < this.threshold;
}
}
}