Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
HowToKB/src/kb/howtokb/reader/WikiHowArticleReader.java
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
208 lines (178 sloc)
6.66 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package kb.howtokb.reader; | |
import java.io.BufferedReader; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.io.ObjectInputStream; | |
import java.util.ArrayList; | |
import java.util.HashSet; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Set; | |
import java.util.zip.GZIPInputStream; | |
import org.json.simple.JSONArray; | |
import org.json.simple.JSONObject; | |
import org.json.simple.parser.JSONParser; | |
import org.json.simple.parser.ParseException; | |
import kb.howtokb.global.Global; | |
import kb.howtokb.wkhobject.Category; | |
import kb.howtokb.wkhobject.Ingredients; | |
import kb.howtokb.wkhobject.Method; | |
import kb.howtokb.wkhobject.Part; | |
import kb.howtokb.wkhobject.Question; | |
import kb.howtokb.wkhobject.Step; | |
import kb.howtokb.wkhobject.Things; | |
public class WikiHowArticleReader { | |
/** | |
* Return list of articles extract from file with json format | |
* @param directory that saves articles in json format | |
* @return a list of articles | |
* @throws IOException | |
* @throws ClassNotFoundException | |
* @throws ParseException | |
*/ | |
public static ArrayList<Question> WikiHowArticleReaderFromJSONFile(String directory) throws IOException, ClassNotFoundException, ParseException{ | |
ArrayList<Question> allQuestions = new ArrayList<>(); | |
JSONParser parser = new JSONParser(); | |
try (BufferedReader br = new BufferedReader(new FileReader(directory))) { | |
String sCurrentLine; | |
while ((sCurrentLine = br.readLine()) != null) { | |
Object obj = parser.parse(sCurrentLine); | |
JSONObject jsonObject = (JSONObject) obj; | |
Question newQuestion = jsonToQuestion(jsonObject); | |
allQuestions.add(newQuestion.setNormalized()); | |
} | |
} | |
return allQuestions; | |
} | |
// ====================================================================== | |
/** | |
* Convert a Json object to Question Object | |
* @param JSON Object | |
* @return Question Object | |
*/ | |
@SuppressWarnings("unchecked") | |
public static Question jsonToQuestion(JSONObject jsonobj){ | |
//Extract link | |
String link = (String) jsonobj.get("Link"); | |
//Extract title | |
String title = (String) jsonobj.get("Title"); | |
//Extract explanation | |
String exp = (String) jsonobj.get("Explanation"); | |
//Extract views | |
int views = (int) (long) jsonobj.get("Views"); | |
//Extract rate | |
double rate = (double) (double) jsonobj.get("Rate"); | |
//Extract tips | |
String tips = (String) jsonobj.get("Tips"); | |
//Extract warnings | |
String warnings = (String) jsonobj.get("Warnings"); | |
//Extract link of video | |
String video = (String) jsonobj.get("Video"); | |
//Extract category | |
ArrayList<Category> category = new ArrayList<Category>(); | |
ArrayList<String> cate_string = new ArrayList<String>(); | |
JSONArray categoryJ = (JSONArray) jsonobj.get("Category"); | |
Iterator<String> iteratorC = categoryJ.iterator(); | |
while (iteratorC.hasNext()) { | |
cate_string.add(iteratorC.next()); | |
} | |
cate_string.add(0, ""); | |
cate_string.add(cate_string.size(), ""); | |
for (int k=1; k<cate_string.size()-1; k++){ | |
Category newCate = new Category(cate_string.get(k), cate_string.get(k-1), cate_string.get(k+1)); | |
category.add(newCate); | |
} | |
//Extract things | |
ArrayList<Things> things = new ArrayList<Things>(); | |
JSONArray thingJ = (JSONArray) jsonobj.get("Things"); | |
Iterator<JSONObject> iteratorT = thingJ.iterator(); | |
while (iteratorT.hasNext()) { | |
JSONObject thing = iteratorT.next(); | |
//Title of method | |
String title_thing = (String) thing.get("Title"); | |
//List of things of this method | |
ArrayList<String> listthing = new ArrayList<String>(); | |
//List things of a method | |
JSONArray thing_method = (JSONArray) thing.get("Things"); | |
Iterator<String> iteratorTM = thing_method.iterator(); | |
while (iteratorTM.hasNext()) { | |
listthing.add(iteratorTM.next()); | |
} | |
Things newThing = new Things(title_thing, listthing); | |
things.add(newThing); | |
} | |
//Extract ingredients | |
ArrayList<Ingredients> ingredients = new ArrayList<Ingredients>(); | |
JSONArray ingredientJ = (JSONArray) jsonobj.get("Ingredients"); | |
Iterator<JSONObject> iteratorI = ingredientJ.iterator(); | |
while (iteratorI.hasNext()) { | |
JSONObject ingre = iteratorI.next(); | |
//Title of method | |
String title_ingre = (String) ingre.get("Title"); | |
//List of things of this method | |
ArrayList<String> listingre = new ArrayList<String>(); | |
//List things of a method | |
JSONArray ingre_method = (JSONArray) ingre.get("Ingredients"); | |
Iterator<String> iteratorIM = ingre_method.iterator(); | |
while (iteratorIM.hasNext()) { | |
listingre.add(iteratorIM.next()); | |
} | |
Ingredients newIngre = new Ingredients(title_ingre, listingre); | |
ingredients.add(newIngre); | |
} | |
//Extract answer | |
// loop array | |
//Extract list of methods | |
ArrayList<Method> answerJ = new ArrayList<Method>(); | |
JSONArray answer = (JSONArray) jsonobj.get("Answer"); | |
Iterator<JSONObject> iterator = answer.iterator(); | |
while (iterator.hasNext()) { | |
// loop method array | |
JSONObject method = iterator.next(); | |
//Extract name of method | |
String title_method = (String) method.get("Title"); | |
//Extract order of method | |
int order_method = (int) (long) method.get("Order"); | |
//extract list of part | |
ArrayList<Part> listPart = new ArrayList<>(); | |
JSONArray listofPart = (JSONArray) method.get("Method"); | |
Iterator<JSONObject> iteratorPart = listofPart.iterator(); | |
while(iteratorPart.hasNext()){ | |
// loop part array | |
JSONObject part = iteratorPart.next(); | |
//Extract name of part | |
String title_part = (String) part.get("Title"); | |
//Extract order of part | |
int order_part = (int) (long) part.get("Order"); | |
//Extract list of steps | |
ArrayList<Step> listStep = new ArrayList<>(); | |
JSONArray listofStep = (JSONArray) part.get("Part"); | |
Iterator<JSONObject> iteratorStep = listofStep.iterator(); | |
while(iteratorStep.hasNext()){ | |
//loop step array | |
JSONObject step = iteratorStep.next(); | |
//Extract main action | |
String main_act = (String) step.get("Main_act"); | |
//Extract order of step | |
int order_step = (int) (long) step.get("Order"); | |
//Extract detail action | |
String detail_act = (String) step.get("Detail_act"); | |
//Extract link of image | |
String image = (String) step.get("Image"); | |
Step newStep = new Step(order_step, main_act, detail_act, image); | |
listStep.add(newStep); | |
} | |
Part newPart = new Part(order_part, title_part, listStep); | |
listPart.add(newPart); | |
} | |
Method newMethod = new Method(order_method, title_method, listPart); | |
answerJ.add(newMethod); | |
} | |
Question newQuestion = new Question(title, exp, answerJ, | |
category, link, tips, warnings, video, things, ingredients, views, rate); | |
return newQuestion; | |
} | |
} |