Skip to content
Permalink
3b99e145f5
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
351 lines (309 sloc) 11.4 KB
package tool;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import activity.ActivityFrame;
import activity.Category_Json;
import activity.extraction.JsonToActivityFrame;
import util.AutoMap;
import util.DBConnector;
public class InformationExtraction {
private static Map<Integer, String> idtoCate;
private static Map<String, Integer> catetoID;
private static Map<Integer, List<Integer>> parentChains;
private static Map<Integer, String> idtoWikiURL;
// Get category id
public static String getCategory(int id) throws NumberFormatException, IOException {
if (idtoCate == null) {
idtoCate = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-category-test.txt");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = reader.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id1 = Integer.parseInt(line[0]);
String cate1 = line[1];
idtoCate.put(id1, cate1);
}
reader.close();
}
}
return idtoCate.get(id);
}
// Get category id
public static int getCategoryID(String cate) throws NumberFormatException, IOException {
if (catetoID == null) {
catetoID = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-category-test.txt");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = reader.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id1 = Integer.parseInt(line[0]);
String cate1 = line[1];
catetoID.put(cate1, id1);
}
reader.close();
}
}
return catetoID.get(cate);
}
public static List<Integer> getRootPath(int id) throws SQLException{
if (parentChains == null){
parentChains = new AutoMap<>();
// "rootpath":[57,54,52,150,1]
ResultSet rs = DBConnector.q("select id, json from wikihow.categoryjson");
while (rs.next()) {
try {
parentChains.put(rs.getInt(1), Category_Json.fromJson(rs.getString(2)).getRootpath());
} catch (Exception e) {
System.out.print("\n---- JSONException in category: " + rs.getInt(1));
}
}
}
return parentChains.get(id);
}
// get all children of a category
public static List<Integer> getListofAllChildren(int id) throws SQLException {
Map<Integer, List<Integer>> parentChains = new AutoMap<>();
// "rootpath":[57,54,52,150,1]
ResultSet rs = DBConnector.q("select id, json from wikihow.categoryjson");
while (rs.next()) {
try {
parentChains.put(rs.getInt(1), Category_Json.fromJson(rs.getString(2)).getRootpath());
} catch (Exception e) {
System.out.print("\n---- JSONException in category: " + rs.getInt(1));
}
}
List<Integer> res = new ArrayList<>();
for (Entry<Integer, List<Integer>> e : parentChains.entrySet()) {
if (e.getValue().contains(id))
if (!res.contains(e.getKey()))
res.add(e.getKey());
}
return res;
}
// get all children of a category
public static List<String> getListofActivitySurfaceFromDb(List<Integer> ids) throws SQLException {
List<String> res = new ArrayList<>();
for (int i=0; i<ids.size(); i++){
ResultSet rs = DBConnector.q("select activity from wikihowcluster.frameidtostrongactsurface where id=" + ids.get(i));
if (rs.next())
res.add(rs.getString("activity"));
}
return res;
}
//Table:
//wikihowcluster.frameidtoweakact
//wikihowcluster.frameidtostrongact
public static String getActivityFromDb(int id, String table) throws Exception {
ResultSet rs = DBConnector.q("select activity from " + table + " where id=" + id);
if (rs.next())
return rs.getString("activity");
return "";
}
public static List<String> getActivityListFromDb(List<Integer> ids, String table) throws Exception {
List<String> res = new ArrayList<>();
for (int i=0; i<ids.size(); i++){
ResultSet rs = DBConnector.q("select activity from " + table + " where id=" + ids.get(i));
if (rs.next())
res.add(rs.getString("activity"));
}
return res;
}
//Get link from id by query database
public static String getWikiURLStringFromDb(int id) throws NumberFormatException, IOException, SQLException{
ResultSet rs = DBConnector.q("select url from wikihow.url where id=" + id);
if (rs.next()){
return rs.getString(1);
}
return "";
}
//Get link from id by query database
public static String getWikiTitleFromDb(List<Integer> ids) throws NumberFormatException, IOException, SQLException{
String res = "";
if (ids.size() > 0){
ResultSet rs = DBConnector.q("select url from wikihow.url where id=" + ids.get(0));
if (rs.next()){
res = rs.getString(1);
}
}
if (res.contains("http://www.wikihow.com/")){
res = res.substring("http://www.wikihow.com/".length());
res = res.replaceAll("-", " ").toLowerCase();
}
return res;
}
// Get url from id by reading file
public static String getWikiTitleFromFile(List<Integer> ids) throws NumberFormatException, IOException, SQLException {
if (idtoWikiURL == null) {
idtoWikiURL = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-url");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = reader.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id1 = Integer.parseInt(line[0]);
String url = line[1];
idtoWikiURL.put(id1, url);
}
reader.close();
}
}
String res = "";
if (ids.size() > 0){
AutoMap<Integer, Integer> idmap = new AutoMap<Integer, Integer>(ids, 1);
List<String> tmp = new ArrayList<>();
for (Entry<Integer, Integer> e: idmap.sortByValue(3).entrySet()){
tmp.add(idtoWikiURL.get(e.getKey()));
}
if (tmp.size() == 1) res = "\"" + linkToTitle(tmp.get(0));
else if (tmp.size() == 2) res = "\"" + linkToTitle(tmp.get(0)) + "\" or \"" + linkToTitle(tmp.get(1));
else if (tmp.size() == 3) res = "\"" + linkToTitle(tmp.get(0) + "\", \"" + linkToTitle(tmp.get(1)) + "\" or \"" + linkToTitle(tmp.get(2)));
}
return res + "\"";
}
// Get url from id by reading file
public static List<String> getWikiURLListFromFile(List<Integer> ids) throws NumberFormatException, IOException, SQLException {
if (idtoWikiURL == null) {
idtoWikiURL = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-url");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = reader.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id1 = Integer.parseInt(line[0]);
String url = line[1];
idtoWikiURL.put(id1, url);
}
reader.close();
}
}
List<String> tmp = new ArrayList<>();
if (ids.size() > 0){
AutoMap<Integer, Integer> idmap = new AutoMap<Integer, Integer>(ids, 1);
for (Entry<Integer, Integer> e: idmap.sortByValue(3).entrySet()){
tmp.add(idtoWikiURL.get(e.getKey()));
}
}
return tmp;
}
// Get url from id by reading file
public static List<String> getWikiTitleListFromFile(List<Integer> ids) throws NumberFormatException, IOException, SQLException {
if (idtoWikiURL == null) {
idtoWikiURL = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-url");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = reader.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id1 = Integer.parseInt(line[0]);
String url = line[1];
idtoWikiURL.put(id1, url);
}
reader.close();
}
}
List<String> res = new ArrayList<>();
if (ids.size() > 0){
AutoMap<Integer, Integer> idmap = new AutoMap<Integer, Integer>(ids, 1);
List<String> tmp = new ArrayList<>();
for (Entry<Integer, Integer> e: idmap.sortByValue(3).entrySet()){
tmp.add(idtoWikiURL.get(e.getKey()));
}
for (String s: tmp){
res.add(linkToTitle(s));
}
}
return res;
}
// Get url from id by reading file
public static String getWikiURLStringFromFile(int id) throws NumberFormatException, IOException {
if (idtoWikiURL == null) {
idtoWikiURL = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-url");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputs, "UTF-8"))) {
String sCurrentLine;
while ((sCurrentLine = reader.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id1 = Integer.parseInt(line[0]);
String url = line[1];
idtoWikiURL.put(id1, url);
}
reader.close();
}
}
return idtoWikiURL.get(id);
}
/**
* get all frame from a file
* @param inputfile
* @return
* @throws IOException
* @throws ParseException
*/
public static List<ActivityFrame> getAllFrame(String inputfile) throws IOException, ParseException {
System.out.println("Reading json file.......");
List<ActivityFrame> allframe = new ArrayList<>();
JSONParser parser = new JSONParser();
try (BufferedReader br = new BufferedReader(new FileReader(inputfile))) {
String sCurrentLine;
while ((sCurrentLine = br.readLine()) != null) {
Object obj = parser.parse(sCurrentLine);
JSONObject jsonObject = (JSONObject) obj;
ActivityFrame newframe = JsonToActivityFrame.jsonToActivityFrame(jsonObject);
allframe.add(newframe);
}
}
System.out.println("Done! Total number of instances: " + allframe.size());
return allframe;
}
/**
* get map <id, activity frame> from a file
* @param inputfile
* @return
* @throws IOException
* @throws ParseException
*/
public static Map<Integer, ActivityFrame> getMapFrame(String inputfile) throws IOException, ParseException {
System.out.println("Reading json file.......");
Map<Integer, ActivityFrame> res = new HashMap<>();
JSONParser parser = new JSONParser();
try (BufferedReader br = new BufferedReader(new FileReader(inputfile))) {
String sCurrentLine;
while ((sCurrentLine = br.readLine()) != null) {
Object obj = parser.parse(sCurrentLine);
JSONObject jsonObject = (JSONObject) obj;
ActivityFrame newframe = JsonToActivityFrame.jsonToActivityFrame(jsonObject);
res.put(newframe.getID(), newframe);
}
}
System.out.println("Done! Total number of instances: " + res.size());
return res;
}
public static String linkToTitle(String s){
if (s.contains("http://www.wikihow.com/")){
s = s.substring("http://www.wikihow.com/".length());
s = s.replaceAll("-", " ").toLowerCase();
}
return s;
}
}