Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ENTYFI/src/typing/ENTYFI.java
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
256 lines (195 sloc)
9.15 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package typing; | |
import java.io.File; | |
import java.io.IOException; | |
import java.io.PrintWriter; | |
import java.io.Writer; | |
import java.sql.SQLException; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.Properties; | |
import ILP.ILPInputPreparation; | |
import ILP.ILPRun; | |
import edu.stanford.nlp.util.Pair; | |
import preparation.TextToUltraFineModule; | |
import preparation.UltraFineResToILP; | |
import ranking.ReferenceUnivRanking; | |
import segmentation.InputSegmentation; | |
import utils.Configuration; | |
import utils.ReadFile; | |
import utils.SortedMultiMap; | |
import utils.StringUtils; | |
import utils.Util; | |
public class ENTYFI { | |
private InputSegmentation segmentation; | |
private int check; | |
private int topKUniverse; | |
private int topKType; | |
public ENTYFI(InputSegmentation segmentation, int check, int topKUniverse, int topKType){ | |
this.segmentation = segmentation; | |
this.check = check; | |
this.topKUniverse = topKUniverse; | |
this.topKType = topKType; | |
} | |
long startTime = System.currentTimeMillis(); | |
@SuppressWarnings("deprecation") | |
public Pair<String, List<String>> run(String data) throws IOException, SQLException{ | |
Configuration configuration = new Configuration("resources/wikia.properties"); | |
Properties prop = configuration.getSettings(); | |
String attentionModel = prop.getProperty("ATTENTION_MODEL"); | |
String timeNote = ""; | |
//segmentation========================================================================== | |
System.out.println("Segmentation...."); | |
List<String> sentences = segmentation.segment(data); | |
long endTime = System.currentTimeMillis(); | |
long totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "Segmentation: " + totalTime + "\n"; | |
//===================================================================================END | |
//mention detection===================================================================== | |
System.out.println("Mention detection...."); | |
//update mention detection tool, doesn't require GPU to run----------------------------- | |
Tagger tagger = new Tagger("./gLampleNER/"); | |
List<String> res = tagger.tagging(sentences); | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "Mention dectection: " + totalTime + "\n"; | |
//===================================================================================END | |
List<String> unsupervised_data = new ArrayList<>(); | |
List<String> supervised_data = new ArrayList<>(); | |
for (String s: res){ | |
try{ | |
Pair<String, List<String>> extractedData = StringUtils.tagged2SupAndUnSupData(s); | |
if (extractedData != null){ | |
unsupervised_data.add(extractedData.first); | |
supervised_data.addAll(extractedData.second); | |
} | |
}catch(Exception e){ | |
continue; | |
} | |
} | |
// typing step | |
/////////////////////// | |
//ultra-file============================================================================= | |
System.out.println("Ultra-fine typing...."); | |
List<String> ultrafineData = TextToUltraFineModule.data2json(supervised_data); | |
ultrafineData.add("end"); | |
UltraFine ultrafine = new UltraFine("./ultrafine/"); | |
List<String> ultraRes = ultrafine.ultrafineTyping(ultrafineData); | |
System.out.println("Results of ultra fine typing...."); | |
List<String> ultraRes2ILP = UltraFineResToILP.ultrafine2ILP(supervised_data, ultraRes); | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "Ultra-typing: " + totalTime + "\n"; | |
//===================================================================================END | |
//general predict...attention NER model================================================= | |
System.out.println("General Type Typing...."); | |
List<String> supervised_data_python = new ArrayList<>(supervised_data); | |
supervised_data_python.add("end"); | |
GeneralTyping gTyping = new GeneralTyping(attentionModel, "./attentionNER/"); | |
System.out.println("Results of general type typing...."); | |
List<String> generalTypingRes = gTyping.generalTyping(supervised_data_python); | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "General typing: " + totalTime + "\n"; | |
//===================================================================================END | |
//unsupervised-based typing============================================================= | |
List<String> unsupervised_types = UnsupervisedBasedExtraction.unsupervisedExtract(unsupervised_data, supervised_data, "models/englishPCFG.ser.gz"); | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "unsupervised: " + totalTime + "\n"; | |
//===================================================================================END | |
//similarity ranking =================================================================== | |
System.out.println("Similarity Ranking...."); | |
List<String> topKUnivs = ReferenceUnivRanking.topKSimilarityRanking(data, topKUniverse); | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "Similar ranking: " + totalTime + "\n"; | |
//===================================================================================END | |
// KB Lookup & Top class typing ======================================================== | |
SortedMultiMap<String, String> mergeInput = new SortedMultiMap<String, String>(1000, true); | |
String refUnivsString = ""; | |
for (String uni: topKUnivs){ | |
//KB Lookup | |
System.out.println(uni); | |
System.out.println("KB Lookup ...."); | |
List<String> kbResults = KBLookup.lookup(supervised_data, uni, configuration); | |
//top-class-predict..................... | |
System.out.println("Top class typing...."); | |
List<String> topClassTypingRes = new ArrayList<>(); | |
if (new File(attentionModel + uni).exists()){ | |
TopClassTyping topClassTyping = new TopClassTyping(attentionModel, "./attentionNER/", uni); | |
topClassTypingRes = topClassTyping.typing(supervised_data_python); | |
} | |
//merge inside universe | |
SortedMultiMap<String, String> inUnivMerge = ILPInputPreparation.inUnivMerge(unsupervised_types, topClassTypingRes, kbResults, generalTypingRes); | |
//merge outside universe | |
for (String e: inUnivMerge.keyset()){ | |
Map<String, Double> values = inUnivMerge.getAsMap(e); | |
for (Entry<String, Double> v: values.entrySet()){ | |
mergeInput.update(e, v.getKey(), v.getValue()); | |
} | |
} | |
refUnivsString += uni + "|"; | |
} | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "KB lookup and top-class typing: " + totalTime + "\n"; | |
//===================================================================================END | |
// ILP ================================================================================= | |
SortedMultiMap<String, String> ultraForMerge = ILPInputPreparation.readResults(ultraRes2ILP); | |
for (String e: ultraForMerge.keyset()){ | |
Map<String, Double> values = ultraForMerge.getAsMap(e); | |
for (Entry<String, Double> v: values.entrySet()){ | |
mergeInput.update(e, v.getKey(), v.getValue()); | |
} | |
} | |
Map<String, String> type2general = ILPInputPreparation.type2general(mergeInput); | |
//ILP input | |
List<String> disjointMerge = ILPInputPreparation.disjointOverUnivs(type2general); | |
disjointMerge.add("end-disjoint"); | |
List<String> ilpInput = ILPInputPreparation.res2ilpInput(mergeInput, type2general); | |
ilpInput.add("end-ilp-input"); | |
String basedir = prop.getProperty("BASE_DIR"); | |
refUnivsString = refUnivsString.substring(0, refUnivsString.length() - 1); | |
ILPRun ilp = new ILPRun(basedir, refUnivsString, topKType, "./typeConsolidation/"); | |
System.out.println("ILP running...."); | |
System.out.println(disjointMerge.size()); | |
System.out.println(ilpInput.size()); | |
List<String> ilpResults = ilp.ilp(disjointMerge, ilpInput, null); | |
System.out.println(ilpResults.size()); | |
endTime = System.currentTimeMillis(); | |
totalTime = endTime - startTime; | |
startTime = System.currentTimeMillis(); | |
timeNote += "ilp: " + totalTime + "\n"; | |
//===================================================================================END | |
System.out.println(timeNote); | |
return new Pair<String, List<String>>(timeNote, ilpResults); | |
} | |
public static void main(String[] args) throws IOException, SQLException { | |
String inputFile = Util.getArg(args, 0, "test-data/input.txt"); | |
String outputFile = Util.getArg(args, 1, "test-data/output.txt"); | |
String input = ReadFile.readFile(inputFile); | |
System.out.println(input); | |
int check = 0; | |
int topKUniverse = 1; | |
int topKTypes = 10; | |
String model = "models/englishPCFG.ser.gz"; | |
InputSegmentation segmentation = new InputSegmentation(model, check); | |
ENTYFI entyfi = new ENTYFI(segmentation, check, topKUniverse, topKTypes); | |
Pair<String, List<String>> res = entyfi.run(input); | |
System.out.println(res.first); | |
Writer out = new PrintWriter(new File(outputFile)); | |
for (String s: res.second){ | |
out.write(s + "\n"); | |
} | |
out.close(); | |
} | |
} |