Skip to content
Permalink
aacc3e7b59
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
256 lines (195 sloc) 9.15 KB
package typing;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import ILP.ILPInputPreparation;
import ILP.ILPRun;
import edu.stanford.nlp.util.Pair;
import preparation.TextToUltraFineModule;
import preparation.UltraFineResToILP;
import ranking.ReferenceUnivRanking;
import segmentation.InputSegmentation;
import utils.Configuration;
import utils.ReadFile;
import utils.SortedMultiMap;
import utils.StringUtils;
import utils.Util;
public class ENTYFI {
private InputSegmentation segmentation;
private int check;
private int topKUniverse;
private int topKType;
public ENTYFI(InputSegmentation segmentation, int check, int topKUniverse, int topKType){
this.segmentation = segmentation;
this.check = check;
this.topKUniverse = topKUniverse;
this.topKType = topKType;
}
long startTime = System.currentTimeMillis();
@SuppressWarnings("deprecation")
public Pair<String, List<String>> run(String data) throws IOException, SQLException{
Configuration configuration = new Configuration("resources/wikia.properties");
Properties prop = configuration.getSettings();
String attentionModel = prop.getProperty("ATTENTION_MODEL");
String timeNote = "";
//segmentation==========================================================================
System.out.println("Segmentation....");
List<String> sentences = segmentation.segment(data);
long endTime = System.currentTimeMillis();
long totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "Segmentation: " + totalTime + "\n";
//===================================================================================END
//mention detection=====================================================================
System.out.println("Mention detection....");
//update mention detection tool, doesn't require GPU to run-----------------------------
Tagger tagger = new Tagger("./gLampleNER/");
List<String> res = tagger.tagging(sentences);
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "Mention dectection: " + totalTime + "\n";
//===================================================================================END
List<String> unsupervised_data = new ArrayList<>();
List<String> supervised_data = new ArrayList<>();
for (String s: res){
try{
Pair<String, List<String>> extractedData = StringUtils.tagged2SupAndUnSupData(s);
if (extractedData != null){
unsupervised_data.add(extractedData.first);
supervised_data.addAll(extractedData.second);
}
}catch(Exception e){
continue;
}
}
// typing step
///////////////////////
//ultra-file=============================================================================
System.out.println("Ultra-fine typing....");
List<String> ultrafineData = TextToUltraFineModule.data2json(supervised_data);
ultrafineData.add("end");
UltraFine ultrafine = new UltraFine("./ultrafine/");
List<String> ultraRes = ultrafine.ultrafineTyping(ultrafineData);
System.out.println("Results of ultra fine typing....");
List<String> ultraRes2ILP = UltraFineResToILP.ultrafine2ILP(supervised_data, ultraRes);
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "Ultra-typing: " + totalTime + "\n";
//===================================================================================END
//general predict...attention NER model=================================================
System.out.println("General Type Typing....");
List<String> supervised_data_python = new ArrayList<>(supervised_data);
supervised_data_python.add("end");
GeneralTyping gTyping = new GeneralTyping(attentionModel, "./attentionNER/");
System.out.println("Results of general type typing....");
List<String> generalTypingRes = gTyping.generalTyping(supervised_data_python);
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "General typing: " + totalTime + "\n";
//===================================================================================END
//unsupervised-based typing=============================================================
List<String> unsupervised_types = UnsupervisedBasedExtraction.unsupervisedExtract(unsupervised_data, supervised_data, "models/englishPCFG.ser.gz");
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "unsupervised: " + totalTime + "\n";
//===================================================================================END
//similarity ranking ===================================================================
System.out.println("Similarity Ranking....");
List<String> topKUnivs = ReferenceUnivRanking.topKSimilarityRanking(data, topKUniverse);
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "Similar ranking: " + totalTime + "\n";
//===================================================================================END
// KB Lookup & Top class typing ========================================================
SortedMultiMap<String, String> mergeInput = new SortedMultiMap<String, String>(1000, true);
String refUnivsString = "";
for (String uni: topKUnivs){
//KB Lookup
System.out.println(uni);
System.out.println("KB Lookup ....");
List<String> kbResults = KBLookup.lookup(supervised_data, uni, configuration);
//top-class-predict.....................
System.out.println("Top class typing....");
List<String> topClassTypingRes = new ArrayList<>();
if (new File(attentionModel + uni).exists()){
TopClassTyping topClassTyping = new TopClassTyping(attentionModel, "./attentionNER/", uni);
topClassTypingRes = topClassTyping.typing(supervised_data_python);
}
//merge inside universe
SortedMultiMap<String, String> inUnivMerge = ILPInputPreparation.inUnivMerge(unsupervised_types, topClassTypingRes, kbResults, generalTypingRes);
//merge outside universe
for (String e: inUnivMerge.keyset()){
Map<String, Double> values = inUnivMerge.getAsMap(e);
for (Entry<String, Double> v: values.entrySet()){
mergeInput.update(e, v.getKey(), v.getValue());
}
}
refUnivsString += uni + "|";
}
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "KB lookup and top-class typing: " + totalTime + "\n";
//===================================================================================END
// ILP =================================================================================
SortedMultiMap<String, String> ultraForMerge = ILPInputPreparation.readResults(ultraRes2ILP);
for (String e: ultraForMerge.keyset()){
Map<String, Double> values = ultraForMerge.getAsMap(e);
for (Entry<String, Double> v: values.entrySet()){
mergeInput.update(e, v.getKey(), v.getValue());
}
}
Map<String, String> type2general = ILPInputPreparation.type2general(mergeInput);
//ILP input
List<String> disjointMerge = ILPInputPreparation.disjointOverUnivs(type2general);
disjointMerge.add("end-disjoint");
List<String> ilpInput = ILPInputPreparation.res2ilpInput(mergeInput, type2general);
ilpInput.add("end-ilp-input");
String basedir = prop.getProperty("BASE_DIR");
refUnivsString = refUnivsString.substring(0, refUnivsString.length() - 1);
ILPRun ilp = new ILPRun(basedir, refUnivsString, topKType, "./typeConsolidation/");
System.out.println("ILP running....");
System.out.println(disjointMerge.size());
System.out.println(ilpInput.size());
List<String> ilpResults = ilp.ilp(disjointMerge, ilpInput, null);
System.out.println(ilpResults.size());
endTime = System.currentTimeMillis();
totalTime = endTime - startTime;
startTime = System.currentTimeMillis();
timeNote += "ilp: " + totalTime + "\n";
//===================================================================================END
System.out.println(timeNote);
return new Pair<String, List<String>>(timeNote, ilpResults);
}
public static void main(String[] args) throws IOException, SQLException {
String inputFile = Util.getArg(args, 0, "test-data/input.txt");
String outputFile = Util.getArg(args, 1, "test-data/output.txt");
String input = ReadFile.readFile(inputFile);
System.out.println(input);
int check = 0;
int topKUniverse = 1;
int topKTypes = 10;
String model = "models/englishPCFG.ser.gz";
InputSegmentation segmentation = new InputSegmentation(model, check);
ENTYFI entyfi = new ENTYFI(segmentation, check, topKUniverse, topKTypes);
Pair<String, List<String>> res = entyfi.run(input);
System.out.println(res.first);
Writer out = new PrintWriter(new File(outputFile));
for (String s: res.second){
out.write(s + "\n");
}
out.close();
}
}