diff --git a/pom.xml b/pom.xml
index f96eb31..aa320ac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,13 +1,13 @@
- 4.0.0
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ 4.0.0
- kb
- howtokb
- 0.0.1-SNAPSHOT
- jar
-
-
+ kb
+ howtokb
+ 0.0.1-SNAPSHOT
+ jar
+
+
src
@@ -33,7 +33,6 @@
-
package
@@ -44,7 +43,7 @@
- test.java.kb.howtokb.TextToWikiHowTaskFrameTest
+ kb.howtokb.clustering.HeuristicBottomUpClusteringTest
@@ -54,4 +53,42 @@
+
+ 1.7
+ 1.7
+
+
+
+ com.googlecode.json-simple
+ json-simple
+ 1.1.1
+
+
+ org.jsoup
+ jsoup
+ 1.9.1
+
+
+ edu.stanford.nlp
+ stanford-corenlp
+ 3.3.0
+
+
+ edu.washington.cs.knowitall.openie
+ openie_2.10
+ 4.2.1
+
+
+ colt
+ colt
+ 1.2.0
+
+
+ niket.tools
+ javatools
+ 1.0.0
+
+
+
+
diff --git a/src/main/java/kb/howtokb/clustering/HeuristicBottomupClustering.java b/src/main/java/kb/howtokb/clustering/HeuristicBottomupClustering.java
index 4c3fd59..c4d48f9 100644
--- a/src/main/java/kb/howtokb/clustering/HeuristicBottomupClustering.java
+++ b/src/main/java/kb/howtokb/clustering/HeuristicBottomupClustering.java
@@ -1,8 +1,9 @@
package kb.howtokb.clustering;
import java.io.BufferedReader;
-import java.io.FileReader;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -81,7 +82,9 @@ public HeuristicBottomupClustering(Map activityT
}
private void loadElemsFromDb(String activityTb) throws IOException {
- try (BufferedReader br = new BufferedReader(new FileReader(activityTb))) {
+ ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+ InputStream inputs = classLoader.getResourceAsStream(activityTb);
+ try (BufferedReader br = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = br.readLine()) != null) {
String [] line = sCurrentLine.split("\t");
diff --git a/src/main/java/kb/howtokb/tools/InformationExtraction.java b/src/main/java/kb/howtokb/tools/InformationExtraction.java
index a59459d..90806d0 100644
--- a/src/main/java/kb/howtokb/tools/InformationExtraction.java
+++ b/src/main/java/kb/howtokb/tools/InformationExtraction.java
@@ -11,6 +11,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
@@ -18,7 +19,9 @@
import kb.howtokb.reader.TaskFrameReader;
import kb.howtokb.taskframe.WikiHowTaskFrame;
+import kb.howtokb.utils.AutoMap;
import kb.howtokb.utils.SQLiteJDBCConnector;
+import kb.howtokb.wkhobject.Category_Json;
public class InformationExtraction {
@@ -141,6 +144,57 @@ public static List getAllFrame(String inputfile) throws IOExce
return allframe;
}
+ // Get all activity frame in a same category
+ /**
+ * Get all frame which have the same category parent
+ * @param allframe
+ * @param category
+ * @return
+ * @throws Exception
+ */
+ public List getFrameFromCategory(List allframe, String category) throws Exception {
+ System.out.println("Get a list of activities in a same category " + category);
+ List list = new ArrayList<>();
+ List listCate = InformationExtraction.getListofAllChildren(InformationExtraction.getCategoryID(category));
+
+ for (int i = 0; i < allframe.size(); i++) {
+ if (listCate.contains(Integer.parseInt(allframe.get(i).getActivity().getCategoryID()))) {
+ list.add(allframe.get(i));
+ }
+ }
+ System.out.println("Number of activities in a same category: " + list.size());
+ return list;
+ }
+
+ // get all children of a category
+ /**
+ * Get all categories which are children of a given category
+ * @param id
+ * @return
+ * @throws SQLException
+ * @throws ClassNotFoundException
+ * @throws IOException
+ */
+ public static List getListofAllChildren(int id) throws SQLException, ClassNotFoundException, IOException {
+ Map> parentChains = new AutoMap<>();
+ // "rootpath":[57,54,52,150,1]
+ ResultSet rs = SQLiteJDBCConnector.q("select id, json from categoryjson");
+ while (rs.next()) {
+ try {
+ parentChains.put(rs.getInt(1), Category_Json.fromJson(rs.getString(2)).getRootpath());
+ } catch (Exception e) {
+ System.out.print("\n---- JSONException in category: " + rs.getInt(1));
+ }
+ }
+ List res = new ArrayList<>();
+ for (Entry> e : parentChains.entrySet()) {
+ if (e.getValue().contains(id))
+ if (!res.contains(e.getKey()))
+ res.add(e.getKey());
+ }
+ return res;
+ }
+
/**
* get map from a file
* @param inputfile
diff --git a/src/main/java/kb/howtokb/utils/SQLiteJDBCConnector.java b/src/main/java/kb/howtokb/utils/SQLiteJDBCConnector.java
index f2bc130..785a3e7 100644
--- a/src/main/java/kb/howtokb/utils/SQLiteJDBCConnector.java
+++ b/src/main/java/kb/howtokb/utils/SQLiteJDBCConnector.java
@@ -75,7 +75,7 @@ public static void createDB() throws SQLException, ClassNotFoundException, IOExc
rs = st.executeQuery("select json from categoryjson where id=1;");
if (!rs.next()){
- String input = "/var/tmp/cxchu/data-server/For-Database/wikihow-id-category.json";
+ String input = "/var/tmp/cxchu/wikihow-id-category.json";
System.out.println("Updating data into table 'category'.....");
update(st, "categoryjson", input);
}
diff --git a/src/test/java/kb/howtokb/TaskFrameReaderTest.java b/src/test/java/kb/howtokb/TaskFrameReaderTest.java
index 70f1ed4..e8e0715 100644
--- a/src/test/java/kb/howtokb/TaskFrameReaderTest.java
+++ b/src/test/java/kb/howtokb/TaskFrameReaderTest.java
@@ -1,6 +1,10 @@
package kb.howtokb;
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
import java.util.ArrayList;
import org.json.simple.parser.ParseException;
@@ -10,15 +14,25 @@
public class TaskFrameReaderTest {
public static void main(String[] args) throws ClassNotFoundException, IOException, ParseException {
- String input = "/var/tmp/cxchu/act-frame-test.json";
+ String input = "/var/tmp/cxchu/data-wordnet/act-frame.json";
ArrayList allframe = TaskFrameReader.extractWikiHowTaskFrameFromJSONFile(input);
+ Writer textout = new BufferedWriter(new OutputStreamWriter(
+ new FileOutputStream("/var/tmp/cxchu/data-wordnet/act-frame-wikihow-task.json"), "utf-8"));
+
for (WikiHowTaskFrame f: allframe){
- System.out.println(f.toString());
+
+ if (f.getActivity().getSubActivities().size() > 0){
+ textout.write(f.toJsonObject().toJSONString() + "\n");
+ }
+
+ //System.out.println(f.toString());
}
System.out.println("Total of frames: " + allframe.size());
+ textout.close();
+
}
}
diff --git a/src/test/java/kb/howtokb/TextToWikiHowTaskFrameTest.java b/src/test/java/kb/howtokb/TextToWikiHowTaskFrameTest.java
index a37d707..1bfa35c 100644
--- a/src/test/java/kb/howtokb/TextToWikiHowTaskFrameTest.java
+++ b/src/test/java/kb/howtokb/TextToWikiHowTaskFrameTest.java
@@ -19,19 +19,19 @@
public class TextToWikiHowTaskFrameTest {
public static void main(String[] args) throws ClassNotFoundException, IOException, ParseException {
- System.setOut(new PrintStream(new FileOutputStream("log.txt")));
+ //System.setOut(new PrintStream(new FileOutputStream("log.txt")));
TextToWikiHowTaskFrame extract = new TextToWikiHowTaskFrame();
// Extract all question
System.out.println("Reading json data file.....");
- String input = "/var/tmp/cxchu/data-test/articles_test.json";
+ String input = "/var/tmp/cxchu/data-for-test-code/articles_test.json";
ArrayList allQuestions = WikiHowArticleReader.WikiHowArticleReaderFromJSONFile(input);
int frames = 0;
try {
Writer textout = new BufferedWriter(new OutputStreamWriter(
- new FileOutputStream("/var/tmp/cxchu/data-test/act-frame.json"), "utf-8"));
+ new FileOutputStream("/var/tmp/cxchu/data-for-test-code/act-frame.json"), "utf-8"));
Writer idtextout = new BufferedWriter(new OutputStreamWriter(
- new FileOutputStream("/var/tmp/cxchu/data-test/id-act-frame.json"), "utf-8"));
+ new FileOutputStream("/var/tmp/cxchu/data-for-test-code/id-act-frame.json"), "utf-8"));
int i = 1;
diff --git a/src/test/java/kb/howtokb/clustering/HeuristicBottomUpClusteringTest.java b/src/test/java/kb/howtokb/clustering/HeuristicBottomUpClusteringTest.java
index 3672209..c6b1146 100644
--- a/src/test/java/kb/howtokb/clustering/HeuristicBottomUpClusteringTest.java
+++ b/src/test/java/kb/howtokb/clustering/HeuristicBottomUpClusteringTest.java
@@ -1,9 +1,15 @@
package kb.howtokb.clustering;
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
import java.util.List;
import kb.howtokb.clustering.HeuristicBottomupClustering.ActivitySuperCluster;
import kb.howtokb.clustering.sim.Coefficient;
+import kb.howtokb.taskframe.WikiHowTaskFrame;
+import kb.howtokb.tools.InformationExtraction;
public class HeuristicBottomUpClusteringTest {
public static void main(String[] args) throws Exception {
@@ -11,7 +17,7 @@ public static void main(String[] args) throws Exception {
long startTime = System.currentTimeMillis();
- String activityTb = "resources/all-words-category.txt";
+ String activityTb = "all-words-category.txt";
HeuristicBottomupClustering cluster = new HeuristicBottomupClustering(activityTb);
double threshold = Coefficient.VVNN_TRHES;
@@ -20,27 +26,27 @@ public static void main(String[] args) throws Exception {
SimplePruningSimilarity simFunc = new SimplePruningSimilarity(threshold, model, allAct);
List results = cluster.cluster(simFunc, Coefficient.VVNN_TRHES);
System.out.println("Number of clusters: " + results.size());
-// String output = "/var/tmp/cxchu/clustering-result/bottom-up-cluster-";
+ String output = "/var/tmp/cxchu/clustering-result-wikihow-task/bottom-up-cluster-";
-// String input = "/var/tmp/cxchu/act-frame-test.json"; //original data point file
-// List allframe = InformationExtraction.getAllFrame(input);
+ String input = "/var/tmp/cxchu/data-server/For-Database/act-frame-wikihow-task.json"; //original data point file
+ List allframe = InformationExtraction.getAllFrame(input);
int total = 0;
for (int i = 0; i < results.size(); i++) {
System.out.println("Cluster " + i + ": " + results.get(i).getSuperClusterMembers().size());
-// Writer out = new BufferedWriter(new OutputStreamWriter(
-// new FileOutputStream(output+i+".json"), "utf-8"));
-// List actitiviesID =
-// results.get(i).getSuperClusterMembers();
-// for (int j=0; j actitiviesID =
+ results.get(i).getSuperClusterMembers();
+ for (int j=0; j