Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
HowToKB/src/kb/howtokb/taskframe/extractor/TextToWikiHowTaskFrame.java
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
936 lines (892 sloc)
38.7 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package kb.howtokb.taskframe.extractor; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.Map; | |
import java.util.regex.Pattern; | |
import edu.knowitall.openie.Extraction; | |
import edu.stanford.nlp.util.Pair; | |
import kb.howtokb.taskframe.WikiHowTask; | |
import kb.howtokb.taskframe.WikiHowTaskFrame; | |
import kb.howtokb.wkhobject.Category; | |
import kb.howtokb.wkhobject.Ingredients; | |
import kb.howtokb.wkhobject.Method; | |
import kb.howtokb.wkhobject.Part; | |
import kb.howtokb.wkhobject.Question; | |
import kb.howtokb.wkhobject.Step; | |
import kb.howtokb.wkhobject.Things; | |
public class TextToWikiHowTaskFrame { | |
static Pattern pattern = Pattern.compile("[a-zA-Z]"); | |
static TextToOpenIEResult txtOpenIE = new TextToOpenIEResult(); | |
static Map<String, Integer> linktoID; | |
static Map<String, Integer> catetoID; | |
static int act_id = 1; | |
private int num_ext_wt_thres = 0; | |
private int num_ext_gt_thres = 0; | |
private int num_article = 0; | |
private int num_sent = 0; | |
public ArrayList<WikiHowTaskFrame> articleToListWikiHowTaskFrame(Question ques) throws NumberFormatException, IOException{ | |
num_article++; | |
ArrayList<WikiHowTaskFrame> listWikiHowTaskFrame = new ArrayList<>(); | |
String linkID = Integer.toString(getLinkID(ques.getLink())); | |
String categoryID = Integer.toString(getCategoryID(ques.getCategoryAll())); | |
double rate = ques.getRate(); | |
int view = ques.getViews(); | |
String video = ques.getVideo(); | |
ArrayList<Things> thing = ques.getThings(); | |
ArrayList<String> allObject = new ArrayList<>(); | |
if (thing.size()>0){ | |
for (Things t: thing){ | |
for (String s: t.getThings()){ | |
if (s.length() < 50) | |
allObject.add(s); | |
} | |
} | |
} | |
ArrayList<Ingredients> ingredient = ques.getIngredients(); | |
if (ingredient.size()>0){ | |
for (Ingredients i: ingredient){ | |
for (String s: i.getIngredients()) | |
if (s.length() < 50) | |
allObject.add(s); | |
} | |
} | |
//Extracting WikiHowTask | |
String title = ques.getTitle(); | |
//System.out.println(title); | |
ArrayList<WikiHowTaskFrame> listframe = sentToListWikiHowTaskFrame(title); | |
if (listframe.size() > 0){ | |
for (WikiHowTaskFrame frame: listframe){ | |
WikiHowTask newact = frame.getActivity(); | |
newact = new WikiHowTask(act_id++, newact.getVerb(), | |
newact.getObject(), newact.getOriVerb(), | |
newact.getOriObject(), categoryID, linkID, rate, view, "", video); | |
ArrayList<Integer> sublist = new ArrayList<>(); | |
//set id, act for frame | |
frame.setId(newact.getID()); | |
allObject.addAll(frame.getParticipatingObject()); | |
frame.setParticipatingObject(allObject); | |
//listWikiHowTaskFrame.add(frame); | |
ArrayList<Method> answer = ques.getAnswer(); | |
if (answer.size() > 0){ | |
if (answer.size() == 1){ | |
//One method | |
ArrayList<Part> listofpart = answer.get(0).getMethod(); | |
if (listofpart.size() > 0){ | |
if (listofpart.size() == 1){ | |
//One part, one method | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
int tempid = -1; | |
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>(); | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newact.getID())); | |
newstepact.setPrev(Integer.toString(tempid)); | |
tempid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistframe.add(listStepFrame.get(i)); | |
sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistframe.size()-1; i>=0; i--){ | |
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistframe); | |
} | |
}else{ | |
//Many part, one method | |
int tempprevpartid = -1; | |
ArrayList<WikiHowTaskFrame> templistpartframe = new ArrayList<>(); | |
for (Part part: listofpart){ | |
if (pattern.matcher(part.getTitle()).find()){ | |
ArrayList<WikiHowTaskFrame> listpartFrame = sentToListWikiHowTaskFrame(part.getTitle()); | |
if (listpartFrame.size() > 0){ | |
for (WikiHowTaskFrame partframe: listpartFrame){ | |
WikiHowTask newpartact = partframe.getActivity(); | |
newpartact = new WikiHowTask(act_id++, newpartact.getVerb(), | |
newpartact.getObject(), newpartact.getOriVerb(), | |
newpartact.getOriObject(), categoryID, linkID, rate, view, "", ""); | |
newpartact.setParent(Integer.toString(newact.getID())); | |
newpartact.setPrev(Integer.toString(tempprevpartid)); | |
tempprevpartid = newpartact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> subofpart = new ArrayList<>(); | |
//Extract step in part | |
ArrayList<Step> listofstep = part.getPart(); | |
if (listofstep.size() > 0){ | |
int tempid = -1; | |
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>(); | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newpartact.getID())); | |
newstepact.setPrev(Integer.toString(tempid)); | |
tempid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistframe.add(listStepFrame.get(i)); | |
subofpart.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistframe.size()-1; i>=0; i--){ | |
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistframe); | |
} | |
newpartact.setSubActivities(subofpart); | |
//set frame id, act | |
partframe.setId(newpartact.getID()); | |
partframe.setActivity(newpartact); | |
templistpartframe.add(partframe); | |
sublist.add(newpartact.getID()); | |
} | |
}else{ | |
//Part has title, but no extraction | |
//So take all step in this part as parts | |
ArrayList<Step> listofstep = part.getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newact.getID())); | |
newstepact.setPrev(Integer.toString(tempprevpartid)); | |
tempprevpartid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistpartframe.add(listStepFrame.get(i)); | |
sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
}else{ | |
//Part has no title | |
//Take all step in this part as parts | |
ArrayList<Step> listofstep = part.getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newact.getID())); | |
newstepact.setPrev(Integer.toString(tempprevpartid)); | |
tempprevpartid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistpartframe.add(listStepFrame.get(i)); | |
sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistpartframe.size()-1; i>=0; i--){ | |
templistpartframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistpartframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistpartframe); | |
} | |
} | |
}else{ | |
//Many method | |
//actually, one method just includes one part | |
int tempprevmethodid = -1; | |
ArrayList<WikiHowTaskFrame> templistmethodframe = new ArrayList<>(); | |
for (Method method: answer){ | |
if (pattern.matcher(method.getTitle()).find()){ | |
//Method has title | |
ArrayList<WikiHowTaskFrame> listmethodFrame = sentToListWikiHowTaskFrame(method.getTitle()); | |
if (listmethodFrame.size() > 0){ | |
//Method has extraction | |
for (WikiHowTaskFrame methodframe: listmethodFrame){ | |
WikiHowTask newmethodact = methodframe.getActivity(); | |
newmethodact = new WikiHowTask(act_id++, newmethodact.getVerb(), | |
newmethodact.getObject(), newmethodact.getOriVerb(), | |
newmethodact.getOriObject(), categoryID, linkID, rate, view, "", ""); | |
newmethodact.setParent(Integer.toString(newact.getID())); | |
newmethodact.setPrev(Integer.toString(tempprevmethodid)); | |
tempprevmethodid = newmethodact.getID(); | |
//List of subactivity | |
ArrayList<Integer> subofmethod = new ArrayList<>(); | |
//Extract step in part | |
ArrayList<Part> listofpart = method.getMethod(); | |
if (listofpart.size() > 0){ | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
int tempid = -1; | |
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>(); | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newmethodact.getID())); | |
newstepact.setPrev(Integer.toString(tempid)); | |
tempid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistframe.add(listStepFrame.get(i)); | |
subofmethod.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistframe.size()-1; i>=0; i--){ | |
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistframe); | |
} | |
} | |
newmethodact.setSubActivities(subofmethod); | |
//set frame id, act | |
methodframe.setId(newmethodact.getID()); | |
methodframe.setActivity(newmethodact); | |
templistmethodframe.add(methodframe); | |
sublist.add(newmethodact.getID()); | |
} | |
}else{ | |
//Method has title, but no extraction | |
//So take all step in this method as method | |
ArrayList<Part> listofpart = method.getMethod(); | |
if (listofpart.size() > 0){ | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newact.getID())); | |
newstepact.setPrev(Integer.toString(tempprevmethodid)); | |
tempprevmethodid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistmethodframe.add(listStepFrame.get(i)); | |
sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
}else{ | |
//Method has no title | |
//Take all step in this method as parts | |
ArrayList<Part> listofpart = method.getMethod(); | |
if (listofpart.size() > 0){ | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newact.getID())); | |
newstepact.setPrev(Integer.toString(tempprevmethodid)); | |
tempprevmethodid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistmethodframe.add(listStepFrame.get(i)); | |
sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistmethodframe.size()-1; i>=0; i--){ | |
templistmethodframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistmethodframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistmethodframe); | |
} | |
} | |
newact.setSubActivities(sublist); | |
frame.setActivity(newact); | |
listWikiHowTaskFrame.add(frame); | |
} | |
}else{ | |
//Title has no extraction | |
ArrayList<Method> answer = ques.getAnswer(); | |
if (answer.size() > 0){ | |
if (answer.size() == 1){ | |
//One method | |
ArrayList<Part> listofpart = answer.get(0).getMethod(); | |
if (listofpart.size() > 0){ | |
if (listofpart.size() == 1){ | |
//One part, one method | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
int tempid = -1; | |
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>(); | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setPrev(Integer.toString(tempid)); | |
tempid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistframe.add(listStepFrame.get(i)); | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistframe.size()-1; i>=0; i--){ | |
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistframe); | |
} | |
}else{ | |
//Many part, one method | |
int tempprevpartid = -1; | |
ArrayList<WikiHowTaskFrame> templistpartframe = new ArrayList<>(); | |
for (Part part: listofpart){ | |
if (pattern.matcher(part.getTitle()).find()){ | |
ArrayList<WikiHowTaskFrame> listpartFrame = sentToListWikiHowTaskFrame(part.getTitle()); | |
if (listpartFrame.size() > 0){ | |
for (WikiHowTaskFrame partframe: listpartFrame){ | |
WikiHowTask newpartact = partframe.getActivity(); | |
newpartact = new WikiHowTask(act_id++, newpartact.getVerb(), | |
newpartact.getObject(), newpartact.getOriVerb(), | |
newpartact.getOriObject(), categoryID, linkID, rate, view, "", ""); | |
//newpartact.setParent(newact.getID()); | |
newpartact.setPrev(Integer.toString(tempprevpartid)); | |
tempprevpartid = newpartact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> subofpart = new ArrayList<>(); | |
//Extract step in part | |
ArrayList<Step> listofstep = part.getPart(); | |
if (listofstep.size() > 0){ | |
int tempid = -1; | |
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>(); | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newpartact.getID())); | |
newstepact.setPrev(Integer.toString(tempid)); | |
tempid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistframe.add(listStepFrame.get(i)); | |
subofpart.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistframe.size()-1; i>=0; i--){ | |
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistframe); | |
} | |
newpartact.setSubActivities(subofpart); | |
//set frame id, act | |
partframe.setId(newpartact.getID()); | |
partframe.setActivity(newpartact); | |
templistpartframe.add(partframe); | |
//sublist.add(newpartact.getID()); | |
} | |
}else{ | |
//Part has title, but no extraction | |
//So take all step in this part as parts | |
ArrayList<Step> listofstep = part.getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
//newstepact.setParent(newact.getID()); | |
newstepact.setPrev(Integer.toString(tempprevpartid)); | |
tempprevpartid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistpartframe.add(listStepFrame.get(i)); | |
//sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
}else{ | |
//Part has no title | |
//Take all step in this part as parts | |
ArrayList<Step> listofstep = part.getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
//newstepact.setParent(newact.getID()); | |
newstepact.setPrev(Integer.toString(tempprevpartid)); | |
tempprevpartid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistpartframe.add(listStepFrame.get(i)); | |
//sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistpartframe.size()-1; i>=0; i--){ | |
templistpartframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistpartframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistpartframe); | |
} | |
} | |
}else{ | |
//Many method | |
//actually, one method just includes one part | |
int tempprevmethodid = -1; | |
ArrayList<WikiHowTaskFrame> templistmethodframe = new ArrayList<>(); | |
for (Method method: answer){ | |
if (pattern.matcher(method.getTitle()).find()){ | |
//Method has title | |
ArrayList<WikiHowTaskFrame> listmethodFrame = sentToListWikiHowTaskFrame(method.getTitle()); | |
if (listmethodFrame.size() > 0){ | |
//Method has extraction | |
for (WikiHowTaskFrame methodframe: listmethodFrame){ | |
WikiHowTask newmethodact = methodframe.getActivity(); | |
newmethodact = new WikiHowTask(act_id++, newmethodact.getVerb(), | |
newmethodact.getObject(), newmethodact.getOriVerb(), | |
newmethodact.getOriObject(), categoryID, linkID, rate, view, "", ""); | |
//newmethodact.setParent(newact.getID()); | |
newmethodact.setPrev(Integer.toString(tempprevmethodid)); | |
tempprevmethodid = newmethodact.getID(); | |
//List of subactivity | |
ArrayList<Integer> subofmethod = new ArrayList<>(); | |
//Extract step in part | |
ArrayList<Part> listofpart = method.getMethod(); | |
if (listofpart.size() > 0){ | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
int tempid = -1; | |
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>(); | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
newstepact.setParent(Integer.toString(newmethodact.getID())); | |
newstepact.setPrev(Integer.toString(tempid)); | |
tempid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistframe.add(listStepFrame.get(i)); | |
subofmethod.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistframe.size()-1; i>=0; i--){ | |
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistframe); | |
} | |
} | |
newmethodact.setSubActivities(subofmethod); | |
//set frame id, act | |
methodframe.setId(newmethodact.getID()); | |
methodframe.setActivity(newmethodact); | |
templistmethodframe.add(methodframe); | |
//sublist.add(newmethodact.getID()); | |
} | |
}else{ | |
//Method has title, but no extraction | |
//So take all step in this method as method | |
ArrayList<Part> listofpart = method.getMethod(); | |
if (listofpart.size() > 0){ | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
//newstepact.setParent(newact.getID()); | |
newstepact.setPrev(Integer.toString(tempprevmethodid)); | |
tempprevmethodid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistmethodframe.add(listStepFrame.get(i)); | |
//sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
}else{ | |
//Method has no title | |
//Take all step in this method as parts | |
ArrayList<Part> listofpart = method.getMethod(); | |
if (listofpart.size() > 0){ | |
ArrayList<Step> listofstep = listofpart.get(0).getPart(); | |
if (listofstep.size() > 0){ | |
//Assign prev | |
for (Step step: listofstep){ | |
if (step.getMain_Act() != ""){ | |
//Only steps, no part, no method | |
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act()); | |
if (listStepFrame.size() > 0){ | |
for (int i=0; i<listStepFrame.size(); i++){ | |
WikiHowTask newstepact = listStepFrame.get(i).getActivity(); | |
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(), | |
newstepact.getObject(), newstepact.getOriVerb(), | |
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), ""); | |
//newstepact.setParent(newact.getID()); | |
newstepact.setPrev(Integer.toString(tempprevmethodid)); | |
tempprevmethodid = newstepact.getID(); | |
//List of subactivity, it's empty | |
ArrayList<Integer> sub = new ArrayList<>(); | |
newstepact.setSubActivities(sub); | |
//set frame id,act | |
listStepFrame.get(i).setId(newstepact.getID()); | |
listStepFrame.get(i).setActivity(newstepact); | |
templistmethodframe.add(listStepFrame.get(i)); | |
//sublist.add(newstepact.getID()); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
//assign next | |
int tempnext = -1; | |
for (int i=templistmethodframe.size()-1; i>=0; i--){ | |
templistmethodframe.get(i).getActivity().setNext(Integer.toString(tempnext)); | |
tempnext = templistmethodframe.get(i).getActivity().getID(); | |
} | |
listWikiHowTaskFrame.addAll(templistmethodframe); | |
} | |
} | |
} | |
//System.out.println(listWikiHowTaskFrame.size()); | |
return listWikiHowTaskFrame; | |
} | |
//Extract list of activity frame from a sentence | |
public ArrayList<WikiHowTaskFrame> sentToListWikiHowTaskFrame(String sent) throws IOException{ | |
ArrayList<WikiHowTaskFrame> listFrame = new ArrayList<>(); | |
Pair<Integer, ArrayList<Extraction>> ext_list = txtOpenIE.extractOriTriple(sent); | |
ArrayList<Extraction> listExtractions = ext_list.second; | |
//Report number of extraction that have confidence greater than the threshold | |
num_ext_gt_thres += listExtractions.size(); | |
num_ext_wt_thres += ext_list.first; | |
num_sent++; | |
//System.out.println(num_ext_wt_thres); | |
if (listExtractions.size() > 0){ | |
for (Extraction ext: listExtractions){ | |
String triple = strongNormalizedTriple(ext.tripleString()); | |
//System.out.println(triple); | |
String weaktriple = weakNormalizedTriple(ext.tripleString()); | |
if (triple != "" && weaktriple != ""){ | |
ArrayList<String> loc = new ArrayList<>(); | |
ArrayList<String> temp = new ArrayList<>(); | |
ArrayList<String> partA = new ArrayList<>(); | |
ArrayList<String> partO = new ArrayList<>(); | |
String [] comp = triple.split(";"); | |
String [] weakcomp = weaktriple.split(";"); | |
if (comp.length > 3){ | |
for (int i=3; i<comp.length; i++){ | |
if (comp[i].contains("l:")){ | |
if (comp[i].contains(" ")) | |
loc.add(comp[i].substring(comp[i].indexOf(" ") + 1)); | |
else loc.add(comp[i].substring("l:".length())); | |
}else if (comp[i].contains("t:")){ | |
temp.add(comp[i].substring("t:".length())); | |
} | |
} | |
} | |
if (checkAgent(comp[2])) partA.add(comp[2]); | |
else partO.add(comp[2]); | |
listFrame.add(new WikiHowTaskFrame( | |
new WikiHowTask(simpleNormalizedText(comp[1]), simpleNormalizedText(comp[2]), | |
simpleNormalizedText(weakcomp[1]), simpleNormalizedText(weakcomp[2])), | |
loc, temp, partA, partO)); | |
} | |
} | |
} | |
return listFrame; | |
} | |
//Check an object is living thing or not | |
public static boolean checkAgent(String object){ | |
return OpenIE4Activities.isLivingBeing(object); | |
} | |
//Normalized result of openIE | |
// true true : check frequent word in wordnet | |
// true false: dont check frequency | |
public static String strongNormalizedTriple(String triple) throws IOException{ | |
return OpenIE4Activities.normalizeOpenIEResult(triple, ";", true, true); | |
} | |
//Normalized result of openIE | |
// false false: dont check frequency and dont pick headword | |
public static String weakNormalizedTriple(String triple) throws IOException{ | |
return OpenIE4Activities.normalizeOpenIEResult(triple, ";", false, false); | |
} | |
public int getNum_Ext_Gt_Thres(){ | |
return num_ext_gt_thres; | |
} | |
public int getNum_Ext_Wt_Thres(){ | |
return num_ext_wt_thres; | |
} | |
public int getNum_Sent(){ | |
return num_sent; | |
} | |
public int getNum_Article(){ | |
return num_article; | |
} | |
//Get category id | |
public int getCategoryID(ArrayList<Category> cate) throws NumberFormatException, IOException{ | |
if (catetoID == null){ | |
catetoID = new HashMap<>(); | |
ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); | |
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-category.txt"); | |
BufferedReader br = new BufferedReader(new InputStreamReader(inputs, "UTF-8")); | |
String sCurrentLine; | |
while ((sCurrentLine = br.readLine()) != null) { | |
String[] line = sCurrentLine.split("\t"); | |
int id = Integer.parseInt(line[0]); | |
String cate1 = line[1]; | |
catetoID.put(cate1, id); | |
} | |
br.close(); | |
} | |
int id = -1; | |
if (cate.size() > 0){ | |
for (Category c: cate){ | |
if (catetoID.get(c.getCategory()) != null) | |
return catetoID.get(c.getCategory()); | |
} | |
} | |
return id; | |
} | |
//Get link id | |
public int getLinkID(String url) throws NumberFormatException, IOException{ | |
if (linktoID == null){ | |
linktoID = new HashMap<>(); | |
ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); | |
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-url"); | |
BufferedReader br = new BufferedReader(new InputStreamReader(inputs, "UTF-8")); | |
String sCurrentLine; | |
while ((sCurrentLine = br.readLine()) != null) { | |
String[] line = sCurrentLine.split("\t"); | |
int id = Integer.parseInt(line[0]); | |
String link = line[1]; | |
linktoID.put(link, id); | |
} | |
br.close(); | |
} | |
int id = -1; | |
if (linktoID.get(url) != null){ | |
id = linktoID.get(url); | |
} | |
return id; | |
} | |
public static String simpleNormalizedText(String s) throws IOException{ | |
if (s.contains("/")) | |
s = s.replaceAll("/", " "); | |
if (s.contains("[")) | |
s = s.replace("[", ""); | |
if (s.contains("]")) | |
s = s.replaceAll("]", ""); | |
if (s.contains(" n't")) | |
s = s.replaceAll(" n't", "n't"); | |
if (s.contains(" nt")) | |
s = s.replaceAll(" nt", "nt"); | |
if (s.contains(" '")) | |
s = s.replaceAll(" '", "'"); | |
s = s.replaceAll("l:", "").replaceAll("t:", ""); | |
s = s.replaceAll("\\s+", " ").trim().toLowerCase(); | |
return s; | |
} | |
} |