Skip to content

Commit

Permalink
adding yago facts and locations
Browse files Browse the repository at this point in the history
  • Loading branch information
gadelrab committed Feb 16, 2016
1 parent 753d992 commit b4fd61d
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 33 deletions.
14 changes: 13 additions & 1 deletion scripts/download_large_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,22 @@ mkdir -p $DATA_DIR
#wget -P $DATA_DIR <url>
wget -P $DATA_DIR http://resources.mpi-inf.mpg.de/yago-naga/yago/download/yago/yagoTaxonomy.tsv.7z
wget -P $DATA_DIR http://resources.mpi-inf.mpg.de/yago-naga/yago/download/yago/yagoSimpleTypes.tsv.7z
wget -P $DATA_DIR http://resources.mpi-inf.mpg.de/yago-naga/yago/download/yago/yagoGeonamesOnlyData.tsv.7z
wget -P $DATA_DIR http://resources.mpi-inf.mpg.de/yago-naga/yago/download/yago/yagoFacts.tsv.7z



#uncompress
7z x $DATA_DIR/*.7z -o$DATA_DIR

for f in $DATA_DIR/*.7z; do
echo $f
7z x $f -o$DATA_DIR
done


#Filter data for reduction
grep '<isLocatedIn>' $DATA_DIR/yagoGeonamesOnlyData.tsv > $DATA_DIR/isLocatedInData.tsv
grep '<isLocatedIn>' $DATA_DIR/yagoFacts.tsv >> $DATA_DIR/isLocatedInData.tsv

#remove archives
rm $DATA_DIR/*.7z
Expand Down
15 changes: 3 additions & 12 deletions src/main/java/de/mpii/predicatelifting/YagoFactsReducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,20 @@
*/
public class YagoFactsReducer {

final static String COUNTRIES_FILE="resources/countries.tsv";

YagoSimpleTypes yst;

ImmutableSet<String> countriesSet;


public YagoFactsReducer(){
yst=YagoSimpleTypes.getInstance();
//location-based load countries
loadLocations();

}

private void loadLocations() {
try {
String fileContect= FileUtils.getFileContent(new File(COUNTRIES_FILE));
countriesSet=ImmutableSet.copyOf(fileContect.split("\n"));
} catch (IOException e) {
e.printStackTrace();
}

}



public void reduceToType(String factSourceFilePath, String []relations) {
try {
reduceToType( FactSource.from(factSourceFilePath), relations);
Expand Down
49 changes: 46 additions & 3 deletions src/main/java/de/mpii/yagotools/YagoLocation.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
package de.mpii.yagotools;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import de.mpii.yagotools.utils.YagoDataReader;
import de.mpii.yagotools.utils.YagoRelations;
import mpi.tools.javatools.util.FileUtils;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Set;


/**
Expand All @@ -11,19 +20,32 @@
public class YagoLocation {

//private static final String SUB_CLASS_OF = "rdfs:subClassOf";
String TAXONOMY_FILE_PATH="data/yagoTaxonomy.tsv";
String LOCATION_FILE_PATH ="data/isLocatedInData.tsv";
final static String COUNTRIES_FILE="src/resources/countries.tsv";

private static YagoLocation instance;

private Multimap<String,String> typesParents;
ImmutableSet<String> countriesSet;


private YagoLocation(){
typesParents= YagoDataReader.loadSubject2ObjectMap(TAXONOMY_FILE_PATH,new String[]{YagoRelations.SUB_CLASS_OF});
typesParents= YagoDataReader.loadSubject2ObjectMap(LOCATION_FILE_PATH,new String[]{YagoRelations.IS_LOCATED_IN});
loadCountries();
}

private void loadCountries() {
try {
String fileContect= FileUtils.getFileContent(new File(COUNTRIES_FILE));
countriesSet= ImmutableSet.copyOf(fileContect.split("\n"));
} catch (IOException e) {
e.printStackTrace();
}

}



public static YagoLocation getInstance(){
if (instance==null){
instance=new YagoLocation();
Expand All @@ -32,8 +54,29 @@ public static YagoLocation getInstance(){
}


public static void main (String [] args){
public String getParentCountry(String entity){

String parentCountry;
if (countriesSet.contains(entity))
return entity;

Collection<String> parents=typesParents.get(entity);
//System.out.println(parents);
Set<String> countryParents=Sets.intersection(ImmutableSet.copyOf(parents), countriesSet);
//System.out.println(countryParents);

if(countryParents==null||countryParents.isEmpty())
return entity;
else
return (String) countryParents.toArray()[0];
}


public static void main (String [] args) {
YagoLocation yt= YagoLocation.getInstance();
System.out.println(yt.getParentCountry("<Sohag>"));
System.out.println(yt.getParentCountry("<Berlin>"));
System.out.println(yt.getParentCountry("<Qatar>"));
}


Expand Down
27 changes: 11 additions & 16 deletions src/main/java/de/mpii/yagotools/utils/YagoDataReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import mpi.tools.basics3.Fact;
import mpi.tools.basics3.FactSource;
import mpi.tools.javatools.filehandlers.UTF8Reader;


import java.io.File;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;

/**
* Created by gadelrab on 2/11/16.
Expand All @@ -27,30 +30,22 @@ public static Multimap<String,String> loadSubject2ObjectMap(String filePath, Str

UTF8Reader fileReader;
try {
fileReader=new UTF8Reader(new File(filePath),"Loading Data");


String line;

while((line=fileReader.readLine())!=null){
String[] lineParts=line.split("\t");
int subIndex=0,predIndex=1,objIndex=2;
if (lineParts.length>3){
subIndex++;predIndex++;objIndex++;
}
if(relationsSet==null||relationsSet.contains(lineParts[predIndex])){
subjectObjectMap.put(lineParts[subIndex],lineParts[objIndex]);
}
//String line;

for(Fact f: FactSource.from(filePath))
if(relationsSet==null||relationsSet.contains(f.getRelation())){
subjectObjectMap.put(f.getSubject(),f.getObject());}

}
System.out.println( "Dictionary size: "+ subjectObjectMap.size());

return subjectObjectMap;

} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();

} catch (MalformedURLException e2) {
e2.printStackTrace();
}

return null;
Expand Down
1 change: 1 addition & 0 deletions src/main/java/de/mpii/yagotools/utils/YagoRelations.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ public class YagoRelations {
public static final String SUB_CLASS_OF="rdfs:subClassOf";


public static final String IS_LOCATED_IN="<isLocatedIn>";
}
2 changes: 1 addition & 1 deletion src/main/java/mpi/tools/javatools/datatypes/Pair.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package mpi.tools.javatools.datatypes;

import mpi.aida.data.PreparedInputChunk;


import java.io.Serializable;

Expand Down

0 comments on commit b4fd61d

Please sign in to comment.