Skip to content
Permalink
0d82ff1dc4
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
936 lines (892 sloc) 38.7 KB
package kb.howtokb.taskframe.extractor;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import edu.knowitall.openie.Extraction;
import edu.stanford.nlp.util.Pair;
import kb.howtokb.taskframe.WikiHowTask;
import kb.howtokb.taskframe.WikiHowTaskFrame;
import kb.howtokb.wkhobject.Category;
import kb.howtokb.wkhobject.Ingredients;
import kb.howtokb.wkhobject.Method;
import kb.howtokb.wkhobject.Part;
import kb.howtokb.wkhobject.Question;
import kb.howtokb.wkhobject.Step;
import kb.howtokb.wkhobject.Things;
public class TextToWikiHowTaskFrame {
static Pattern pattern = Pattern.compile("[a-zA-Z]");
static TextToOpenIEResult txtOpenIE = new TextToOpenIEResult();
static Map<String, Integer> linktoID;
static Map<String, Integer> catetoID;
static int act_id = 1;
private int num_ext_wt_thres = 0;
private int num_ext_gt_thres = 0;
private int num_article = 0;
private int num_sent = 0;
public ArrayList<WikiHowTaskFrame> articleToListWikiHowTaskFrame(Question ques) throws NumberFormatException, IOException{
num_article++;
ArrayList<WikiHowTaskFrame> listWikiHowTaskFrame = new ArrayList<>();
String linkID = Integer.toString(getLinkID(ques.getLink()));
String categoryID = Integer.toString(getCategoryID(ques.getCategoryAll()));
double rate = ques.getRate();
int view = ques.getViews();
String video = ques.getVideo();
ArrayList<Things> thing = ques.getThings();
ArrayList<String> allObject = new ArrayList<>();
if (thing.size()>0){
for (Things t: thing){
for (String s: t.getThings()){
if (s.length() < 50)
allObject.add(s);
}
}
}
ArrayList<Ingredients> ingredient = ques.getIngredients();
if (ingredient.size()>0){
for (Ingredients i: ingredient){
for (String s: i.getIngredients())
if (s.length() < 50)
allObject.add(s);
}
}
//Extracting WikiHowTask
String title = ques.getTitle();
//System.out.println(title);
ArrayList<WikiHowTaskFrame> listframe = sentToListWikiHowTaskFrame(title);
if (listframe.size() > 0){
for (WikiHowTaskFrame frame: listframe){
WikiHowTask newact = frame.getActivity();
newact = new WikiHowTask(act_id++, newact.getVerb(),
newact.getObject(), newact.getOriVerb(),
newact.getOriObject(), categoryID, linkID, rate, view, "", video);
ArrayList<Integer> sublist = new ArrayList<>();
//set id, act for frame
frame.setId(newact.getID());
allObject.addAll(frame.getParticipatingObject());
frame.setParticipatingObject(allObject);
//listWikiHowTaskFrame.add(frame);
ArrayList<Method> answer = ques.getAnswer();
if (answer.size() > 0){
if (answer.size() == 1){
//One method
ArrayList<Part> listofpart = answer.get(0).getMethod();
if (listofpart.size() > 0){
if (listofpart.size() == 1){
//One part, one method
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
int tempid = -1;
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>();
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newact.getID()));
newstepact.setPrev(Integer.toString(tempid));
tempid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistframe.add(listStepFrame.get(i));
sublist.add(newstepact.getID());
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistframe.size()-1; i>=0; i--){
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistframe);
}
}else{
//Many part, one method
int tempprevpartid = -1;
ArrayList<WikiHowTaskFrame> templistpartframe = new ArrayList<>();
for (Part part: listofpart){
if (pattern.matcher(part.getTitle()).find()){
ArrayList<WikiHowTaskFrame> listpartFrame = sentToListWikiHowTaskFrame(part.getTitle());
if (listpartFrame.size() > 0){
for (WikiHowTaskFrame partframe: listpartFrame){
WikiHowTask newpartact = partframe.getActivity();
newpartact = new WikiHowTask(act_id++, newpartact.getVerb(),
newpartact.getObject(), newpartact.getOriVerb(),
newpartact.getOriObject(), categoryID, linkID, rate, view, "", "");
newpartact.setParent(Integer.toString(newact.getID()));
newpartact.setPrev(Integer.toString(tempprevpartid));
tempprevpartid = newpartact.getID();
//List of subactivity, it's empty
ArrayList<Integer> subofpart = new ArrayList<>();
//Extract step in part
ArrayList<Step> listofstep = part.getPart();
if (listofstep.size() > 0){
int tempid = -1;
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>();
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newpartact.getID()));
newstepact.setPrev(Integer.toString(tempid));
tempid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistframe.add(listStepFrame.get(i));
subofpart.add(newstepact.getID());
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistframe.size()-1; i>=0; i--){
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistframe);
}
newpartact.setSubActivities(subofpart);
//set frame id, act
partframe.setId(newpartact.getID());
partframe.setActivity(newpartact);
templistpartframe.add(partframe);
sublist.add(newpartact.getID());
}
}else{
//Part has title, but no extraction
//So take all step in this part as parts
ArrayList<Step> listofstep = part.getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newact.getID()));
newstepact.setPrev(Integer.toString(tempprevpartid));
tempprevpartid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistpartframe.add(listStepFrame.get(i));
sublist.add(newstepact.getID());
}
}
}
}
}
}
}else{
//Part has no title
//Take all step in this part as parts
ArrayList<Step> listofstep = part.getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newact.getID()));
newstepact.setPrev(Integer.toString(tempprevpartid));
tempprevpartid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistpartframe.add(listStepFrame.get(i));
sublist.add(newstepact.getID());
}
}
}
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistpartframe.size()-1; i>=0; i--){
templistpartframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistpartframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistpartframe);
}
}
}else{
//Many method
//actually, one method just includes one part
int tempprevmethodid = -1;
ArrayList<WikiHowTaskFrame> templistmethodframe = new ArrayList<>();
for (Method method: answer){
if (pattern.matcher(method.getTitle()).find()){
//Method has title
ArrayList<WikiHowTaskFrame> listmethodFrame = sentToListWikiHowTaskFrame(method.getTitle());
if (listmethodFrame.size() > 0){
//Method has extraction
for (WikiHowTaskFrame methodframe: listmethodFrame){
WikiHowTask newmethodact = methodframe.getActivity();
newmethodact = new WikiHowTask(act_id++, newmethodact.getVerb(),
newmethodact.getObject(), newmethodact.getOriVerb(),
newmethodact.getOriObject(), categoryID, linkID, rate, view, "", "");
newmethodact.setParent(Integer.toString(newact.getID()));
newmethodact.setPrev(Integer.toString(tempprevmethodid));
tempprevmethodid = newmethodact.getID();
//List of subactivity
ArrayList<Integer> subofmethod = new ArrayList<>();
//Extract step in part
ArrayList<Part> listofpart = method.getMethod();
if (listofpart.size() > 0){
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
int tempid = -1;
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>();
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newmethodact.getID()));
newstepact.setPrev(Integer.toString(tempid));
tempid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistframe.add(listStepFrame.get(i));
subofmethod.add(newstepact.getID());
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistframe.size()-1; i>=0; i--){
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistframe);
}
}
newmethodact.setSubActivities(subofmethod);
//set frame id, act
methodframe.setId(newmethodact.getID());
methodframe.setActivity(newmethodact);
templistmethodframe.add(methodframe);
sublist.add(newmethodact.getID());
}
}else{
//Method has title, but no extraction
//So take all step in this method as method
ArrayList<Part> listofpart = method.getMethod();
if (listofpart.size() > 0){
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newact.getID()));
newstepact.setPrev(Integer.toString(tempprevmethodid));
tempprevmethodid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistmethodframe.add(listStepFrame.get(i));
sublist.add(newstepact.getID());
}
}
}
}
}
}
}
}else{
//Method has no title
//Take all step in this method as parts
ArrayList<Part> listofpart = method.getMethod();
if (listofpart.size() > 0){
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newact.getID()));
newstepact.setPrev(Integer.toString(tempprevmethodid));
tempprevmethodid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistmethodframe.add(listStepFrame.get(i));
sublist.add(newstepact.getID());
}
}
}
}
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistmethodframe.size()-1; i>=0; i--){
templistmethodframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistmethodframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistmethodframe);
}
}
newact.setSubActivities(sublist);
frame.setActivity(newact);
listWikiHowTaskFrame.add(frame);
}
}else{
//Title has no extraction
ArrayList<Method> answer = ques.getAnswer();
if (answer.size() > 0){
if (answer.size() == 1){
//One method
ArrayList<Part> listofpart = answer.get(0).getMethod();
if (listofpart.size() > 0){
if (listofpart.size() == 1){
//One part, one method
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
int tempid = -1;
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>();
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setPrev(Integer.toString(tempid));
tempid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistframe.add(listStepFrame.get(i));
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistframe.size()-1; i>=0; i--){
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistframe);
}
}else{
//Many part, one method
int tempprevpartid = -1;
ArrayList<WikiHowTaskFrame> templistpartframe = new ArrayList<>();
for (Part part: listofpart){
if (pattern.matcher(part.getTitle()).find()){
ArrayList<WikiHowTaskFrame> listpartFrame = sentToListWikiHowTaskFrame(part.getTitle());
if (listpartFrame.size() > 0){
for (WikiHowTaskFrame partframe: listpartFrame){
WikiHowTask newpartact = partframe.getActivity();
newpartact = new WikiHowTask(act_id++, newpartact.getVerb(),
newpartact.getObject(), newpartact.getOriVerb(),
newpartact.getOriObject(), categoryID, linkID, rate, view, "", "");
//newpartact.setParent(newact.getID());
newpartact.setPrev(Integer.toString(tempprevpartid));
tempprevpartid = newpartact.getID();
//List of subactivity, it's empty
ArrayList<Integer> subofpart = new ArrayList<>();
//Extract step in part
ArrayList<Step> listofstep = part.getPart();
if (listofstep.size() > 0){
int tempid = -1;
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>();
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newpartact.getID()));
newstepact.setPrev(Integer.toString(tempid));
tempid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistframe.add(listStepFrame.get(i));
subofpart.add(newstepact.getID());
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistframe.size()-1; i>=0; i--){
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistframe);
}
newpartact.setSubActivities(subofpart);
//set frame id, act
partframe.setId(newpartact.getID());
partframe.setActivity(newpartact);
templistpartframe.add(partframe);
//sublist.add(newpartact.getID());
}
}else{
//Part has title, but no extraction
//So take all step in this part as parts
ArrayList<Step> listofstep = part.getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
//newstepact.setParent(newact.getID());
newstepact.setPrev(Integer.toString(tempprevpartid));
tempprevpartid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistpartframe.add(listStepFrame.get(i));
//sublist.add(newstepact.getID());
}
}
}
}
}
}
}else{
//Part has no title
//Take all step in this part as parts
ArrayList<Step> listofstep = part.getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
//newstepact.setParent(newact.getID());
newstepact.setPrev(Integer.toString(tempprevpartid));
tempprevpartid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistpartframe.add(listStepFrame.get(i));
//sublist.add(newstepact.getID());
}
}
}
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistpartframe.size()-1; i>=0; i--){
templistpartframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistpartframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistpartframe);
}
}
}else{
//Many method
//actually, one method just includes one part
int tempprevmethodid = -1;
ArrayList<WikiHowTaskFrame> templistmethodframe = new ArrayList<>();
for (Method method: answer){
if (pattern.matcher(method.getTitle()).find()){
//Method has title
ArrayList<WikiHowTaskFrame> listmethodFrame = sentToListWikiHowTaskFrame(method.getTitle());
if (listmethodFrame.size() > 0){
//Method has extraction
for (WikiHowTaskFrame methodframe: listmethodFrame){
WikiHowTask newmethodact = methodframe.getActivity();
newmethodact = new WikiHowTask(act_id++, newmethodact.getVerb(),
newmethodact.getObject(), newmethodact.getOriVerb(),
newmethodact.getOriObject(), categoryID, linkID, rate, view, "", "");
//newmethodact.setParent(newact.getID());
newmethodact.setPrev(Integer.toString(tempprevmethodid));
tempprevmethodid = newmethodact.getID();
//List of subactivity
ArrayList<Integer> subofmethod = new ArrayList<>();
//Extract step in part
ArrayList<Part> listofpart = method.getMethod();
if (listofpart.size() > 0){
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
int tempid = -1;
ArrayList<WikiHowTaskFrame> templistframe = new ArrayList<>();
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
newstepact.setParent(Integer.toString(newmethodact.getID()));
newstepact.setPrev(Integer.toString(tempid));
tempid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistframe.add(listStepFrame.get(i));
subofmethod.add(newstepact.getID());
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistframe.size()-1; i>=0; i--){
templistframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistframe);
}
}
newmethodact.setSubActivities(subofmethod);
//set frame id, act
methodframe.setId(newmethodact.getID());
methodframe.setActivity(newmethodact);
templistmethodframe.add(methodframe);
//sublist.add(newmethodact.getID());
}
}else{
//Method has title, but no extraction
//So take all step in this method as method
ArrayList<Part> listofpart = method.getMethod();
if (listofpart.size() > 0){
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
//newstepact.setParent(newact.getID());
newstepact.setPrev(Integer.toString(tempprevmethodid));
tempprevmethodid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistmethodframe.add(listStepFrame.get(i));
//sublist.add(newstepact.getID());
}
}
}
}
}
}
}
}else{
//Method has no title
//Take all step in this method as parts
ArrayList<Part> listofpart = method.getMethod();
if (listofpart.size() > 0){
ArrayList<Step> listofstep = listofpart.get(0).getPart();
if (listofstep.size() > 0){
//Assign prev
for (Step step: listofstep){
if (step.getMain_Act() != ""){
//Only steps, no part, no method
ArrayList<WikiHowTaskFrame> listStepFrame = sentToListWikiHowTaskFrame(step.getMain_Act());
if (listStepFrame.size() > 0){
for (int i=0; i<listStepFrame.size(); i++){
WikiHowTask newstepact = listStepFrame.get(i).getActivity();
newstepact = new WikiHowTask(act_id++, newstepact.getVerb(),
newstepact.getObject(), newstepact.getOriVerb(),
newstepact.getOriObject(), categoryID, linkID, rate, view, step.getImage(), "");
//newstepact.setParent(newact.getID());
newstepact.setPrev(Integer.toString(tempprevmethodid));
tempprevmethodid = newstepact.getID();
//List of subactivity, it's empty
ArrayList<Integer> sub = new ArrayList<>();
newstepact.setSubActivities(sub);
//set frame id,act
listStepFrame.get(i).setId(newstepact.getID());
listStepFrame.get(i).setActivity(newstepact);
templistmethodframe.add(listStepFrame.get(i));
//sublist.add(newstepact.getID());
}
}
}
}
}
}
}
}
//assign next
int tempnext = -1;
for (int i=templistmethodframe.size()-1; i>=0; i--){
templistmethodframe.get(i).getActivity().setNext(Integer.toString(tempnext));
tempnext = templistmethodframe.get(i).getActivity().getID();
}
listWikiHowTaskFrame.addAll(templistmethodframe);
}
}
}
//System.out.println(listWikiHowTaskFrame.size());
return listWikiHowTaskFrame;
}
//Extract list of activity frame from a sentence
public ArrayList<WikiHowTaskFrame> sentToListWikiHowTaskFrame(String sent) throws IOException{
ArrayList<WikiHowTaskFrame> listFrame = new ArrayList<>();
Pair<Integer, ArrayList<Extraction>> ext_list = txtOpenIE.extractOriTriple(sent);
ArrayList<Extraction> listExtractions = ext_list.second;
//Report number of extraction that have confidence greater than the threshold
num_ext_gt_thres += listExtractions.size();
num_ext_wt_thres += ext_list.first;
num_sent++;
//System.out.println(num_ext_wt_thres);
if (listExtractions.size() > 0){
for (Extraction ext: listExtractions){
String triple = strongNormalizedTriple(ext.tripleString());
//System.out.println(triple);
String weaktriple = weakNormalizedTriple(ext.tripleString());
if (triple != "" && weaktriple != ""){
ArrayList<String> loc = new ArrayList<>();
ArrayList<String> temp = new ArrayList<>();
ArrayList<String> partA = new ArrayList<>();
ArrayList<String> partO = new ArrayList<>();
String [] comp = triple.split(";");
String [] weakcomp = weaktriple.split(";");
if (comp.length > 3){
for (int i=3; i<comp.length; i++){
if (comp[i].contains("l:")){
if (comp[i].contains(" "))
loc.add(comp[i].substring(comp[i].indexOf(" ") + 1));
else loc.add(comp[i].substring("l:".length()));
}else if (comp[i].contains("t:")){
temp.add(comp[i].substring("t:".length()));
}
}
}
if (checkAgent(comp[2])) partA.add(comp[2]);
else partO.add(comp[2]);
listFrame.add(new WikiHowTaskFrame(
new WikiHowTask(simpleNormalizedText(comp[1]), simpleNormalizedText(comp[2]),
simpleNormalizedText(weakcomp[1]), simpleNormalizedText(weakcomp[2])),
loc, temp, partA, partO));
}
}
}
return listFrame;
}
//Check an object is living thing or not
public static boolean checkAgent(String object){
return OpenIE4Activities.isLivingBeing(object);
}
//Normalized result of openIE
// true true : check frequent word in wordnet
// true false: dont check frequency
public static String strongNormalizedTriple(String triple) throws IOException{
return OpenIE4Activities.normalizeOpenIEResult(triple, ";", true, true);
}
//Normalized result of openIE
// false false: dont check frequency and dont pick headword
public static String weakNormalizedTriple(String triple) throws IOException{
return OpenIE4Activities.normalizeOpenIEResult(triple, ";", false, false);
}
public int getNum_Ext_Gt_Thres(){
return num_ext_gt_thres;
}
public int getNum_Ext_Wt_Thres(){
return num_ext_wt_thres;
}
public int getNum_Sent(){
return num_sent;
}
public int getNum_Article(){
return num_article;
}
//Get category id
public int getCategoryID(ArrayList<Category> cate) throws NumberFormatException, IOException{
if (catetoID == null){
catetoID = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-category.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(inputs, "UTF-8"));
String sCurrentLine;
while ((sCurrentLine = br.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id = Integer.parseInt(line[0]);
String cate1 = line[1];
catetoID.put(cate1, id);
}
br.close();
}
int id = -1;
if (cate.size() > 0){
for (Category c: cate){
if (catetoID.get(c.getCategory()) != null)
return catetoID.get(c.getCategory());
}
}
return id;
}
//Get link id
public int getLinkID(String url) throws NumberFormatException, IOException{
if (linktoID == null){
linktoID = new HashMap<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
InputStream inputs = classLoader.getResourceAsStream("wikihow-id-url");
BufferedReader br = new BufferedReader(new InputStreamReader(inputs, "UTF-8"));
String sCurrentLine;
while ((sCurrentLine = br.readLine()) != null) {
String[] line = sCurrentLine.split("\t");
int id = Integer.parseInt(line[0]);
String link = line[1];
linktoID.put(link, id);
}
br.close();
}
int id = -1;
if (linktoID.get(url) != null){
id = linktoID.get(url);
}
return id;
}
public static String simpleNormalizedText(String s) throws IOException{
if (s.contains("/"))
s = s.replaceAll("/", " ");
if (s.contains("["))
s = s.replace("[", "");
if (s.contains("]"))
s = s.replaceAll("]", "");
if (s.contains(" n't"))
s = s.replaceAll(" n't", "n't");
if (s.contains(" nt"))
s = s.replaceAll(" nt", "nt");
if (s.contains(" '"))
s = s.replaceAll(" '", "'");
s = s.replaceAll("l:", "").replaceAll("t:", "");
s = s.replaceAll("\\s+", " ").trim().toLowerCase();
return s;
}
}