Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
HowToKB/src/test/kb/howtokb/TextToWikiHowTaskFrameTest.java
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
79 lines (67 sloc)
2.66 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package test.kb.howtokb; | |
import java.io.BufferedWriter; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.OutputStreamWriter; | |
import java.io.PrintStream; | |
import java.io.Writer; | |
import java.util.ArrayList; | |
import java.util.regex.PatternSyntaxException; | |
import org.json.simple.parser.ParseException; | |
import kb.howtokb.reader.WikiHowArticleReader; | |
import kb.howtokb.taskframe.WikiHowTaskFrame; | |
import kb.howtokb.taskframe.extractor.TextToWikiHowTaskFrame; | |
import kb.howtokb.wkhobject.Question; | |
public class TextToWikiHowTaskFrameTest { | |
public static void main(String[] args) throws ClassNotFoundException, IOException, ParseException { | |
//System.setOut(new PrintStream(new FileOutputStream("log.txt"))); | |
TextToWikiHowTaskFrame extract = new TextToWikiHowTaskFrame(); | |
// Extract all question | |
System.out.println("Reading json data file....."); | |
String input = "howtokb-data/wikihow-data-all.json"; | |
ArrayList<Question> allQuestions = WikiHowArticleReader.WikiHowArticleReaderFromJSONFile(input); | |
int frames = 0; | |
try { | |
Writer textout = new BufferedWriter(new OutputStreamWriter( | |
new FileOutputStream("howtokb-data/task-frame-extracted.json"), "utf-8")); | |
Writer idtextout = new BufferedWriter(new OutputStreamWriter( | |
new FileOutputStream("howtokb-data/id-task-frame-extracted.json"), "utf-8")); | |
int i = 1; | |
for (Question ques : allQuestions) { | |
try { | |
if (i++ > 50){ | |
break; | |
} | |
ArrayList<WikiHowTaskFrame> listframe = extract.articleToListWikiHowTaskFrame(ques.setNormalized()); | |
frames += listframe.size(); | |
for (WikiHowTaskFrame frame : listframe) { | |
// frame = frame.setNormalized(); | |
textout.write(frame.toJsonObject().toJSONString() + "\n"); | |
idtextout.write(frame.getID() + "\t" + frame.toJsonObject().toJSONString() + "\n"); | |
} | |
System.out.println(ques.getLink()); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
continue; | |
} catch (NullPointerException e) { | |
e.printStackTrace(); | |
continue; | |
} catch (IndexOutOfBoundsException e) { | |
e.printStackTrace(); | |
continue; | |
} catch (PatternSyntaxException e) { | |
e.printStackTrace(); | |
continue; | |
} | |
} | |
textout.close(); | |
idtextout.close(); | |
} catch (IOException e) { | |
} | |
System.out.println("Number of articles: " + extract.getNum_Article() + "\n"); | |
System.out.println("Number of sentences: " + extract.getNum_Sent() + "\n"); | |
System.out.println("Number of extractions: " + extract.getNum_Ext_Wt_Thres() + "\n"); | |
System.out.println("Number of extractions with conf > 0.45: " + extract.getNum_Ext_Gt_Thres() + "\n"); | |
System.out.println("Number of activity frames: " + frames + "\n"); | |
} | |
} |