Skip to content

Commit

Permalink
read files from resources
Browse files Browse the repository at this point in the history
  • Loading branch information
gadelrab committed Feb 17, 2016
1 parent e335fef commit 1a36f3b
Show file tree
Hide file tree
Showing 12 changed files with 482 additions and 69 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ hs_err_pid*

*.iml

/resources/bigData
## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:
Expand Down
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


<build>

<finalName>rules-generator</finalName>
<plugins>
<plugin>
Expand Down Expand Up @@ -64,6 +65,10 @@
<mainClass>de.mpii.frequentrulesminning.AssociationRulesMining</mainClass>
<id>association_rules</id>
</program>
<program>
<mainClass>de.mpii.predicatelifting.YagoFactsReducer</mainClass>
<id>fact_reducer</id>
</program>
</programs>
</configuration>
</plugin>
Expand Down
File renamed without changes.
9 changes: 9 additions & 0 deletions resources/yago_location_relations.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<isCitizenOf>
<diedIn>
<isLocatedIn>
<wasBornIn>
<dealsWith>
<hasNeighbor>
<isPoliticianOf>
<livesIn>
<happenedIn>
8 changes: 5 additions & 3 deletions scripts/download_large_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ done
PRGDIR=`dirname "$PRG"`
BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
DATA_DIR=$BASEDIR/data
BIG_DATA_DIR=$BASEDIR/resources/bigData


#make new directory for data
mkdir -p $DATA_DIR
mkdir -p $BIG_DATA_DIR


#Download
Expand All @@ -38,10 +40,10 @@ for f in $DATA_DIR/*.7z; do
7z x $f -o$DATA_DIR
done


#TODO get bigData out of resources
#Filter data for reduction
grep '<isLocatedIn>' $DATA_DIR/yagoGeonamesOnlyData.tsv > $DATA_DIR/isLocatedInData.tsv
grep '<isLocatedIn>' $DATA_DIR/yagoFacts.tsv >> $DATA_DIR/isLocatedInData.tsv
grep '<isLocatedIn>' $DATA_DIR/yagoGeonamesOnlyData.tsv > $BIG_DATA_DIR/isLocatedInData.tsv
grep '<isLocatedIn>' $DATA_DIR/yagoFacts.tsv >> $BIG_DATA_DIR/isLocatedInData.tsv

#remove archives
rm $DATA_DIR/*.7z
Expand Down
95 changes: 77 additions & 18 deletions src/main/java/de/mpii/predicatelifting/YagoFactsReducer.java
Original file line number Diff line number Diff line change
@@ -1,59 +1,77 @@
package de.mpii.predicatelifting;

import com.google.common.collect.ImmutableSet;
import de.mpii.yagotools.YagoLocation;
import de.mpii.yagotools.YagoSimpleTypes;
import mpi.tools.basics3.Fact;
import mpi.tools.basics3.FactSource;
import mpi.tools.javatools.filehandlers.UTF8Reader;
import mpi.tools.javatools.util.FileUtils;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.nio.file.Files;
import java.util.Set;
import java.io.*;
import java.net.URL;

/**
* Created by gadelrab on 2/11/16.
*/
public class YagoFactsReducer {


public enum FactType{LOCATION,DATE,PERSON,ORGANIZATION,ARTIFACT}

public static final String LOCATION_RELATIONS_FILE="resources/yago_location_relations.tsv";
ImmutableSet<String> locationRelations;
YagoSimpleTypes yst;
YagoLocation yLoc;



public YagoFactsReducer(){
yst=YagoSimpleTypes.getInstance();
//yst=YagoSimpleTypes.getInstance();
yLoc=YagoLocation.getInstance();


try {

locationRelations = ImmutableSet.copyOf(FileUtils.getFileContentasList(LOCATION_RELATIONS_FILE));

} catch (IOException e) {
e.printStackTrace();
}

}



public void reduceToType(String factSourceFilePath, String []relations) {
public void reduceFacts(String factSourceFilePath, String outputFile, String []relations, FactType fType) {
try {
reduceToType( FactSource.from(factSourceFilePath), relations);
} catch (MalformedURLException e) {
reduceFacts( FactSource.from(factSourceFilePath),FileUtils.getBufferedUTF8Writer(outputFile) ,relations,fType);

} catch (IOException e) {
e.printStackTrace();
}

}


public void reduceToType(FactSource factSource, String []relations){

ImmutableSet<String> relationsSet=ImmutableSet.copyOf(relations);
public void reduceFacts(FactSource factSource, BufferedWriter outputWriter, String []relations, FactType fType){
ImmutableSet<String> relationsSet=null;
if(relations!=null)
relationsSet=ImmutableSet.copyOf(relations);


for( Fact f:factSource)
for( Fact f:factSource) {
if (relationsSet==null||relationsSet.contains(f.getRelation())) {
Fact reducedfact = reduceFact(f, fType);
try {
outputWriter.write(reducedfact.toTsvLine());
} catch (IOException e) {
e.printStackTrace();
}


if(relationsSet.contains(f.getRelation())){
reduceFact(f);
}

}

}


Expand All @@ -62,13 +80,54 @@ public void reduceToType(FactSource factSource, String []relations){



public Fact reduceFact(Fact orgFact) {

public Fact reduceFact(Fact orgFact,FactType factType) {
if (factType==null){
if(locationRelations.contains(orgFact)){
return reduceLocationFact(orgFact);
}
}
else{
switch (factType){
case LOCATION:
return reduceLocationFact(orgFact);

}
}

return orgFact;

}


private Fact reduceLocationFact(Fact orgFact) {
String entity=orgFact.getObject();
String reduced=yLoc.getParentCountry(entity);

return new Fact(orgFact.getSubject(),orgFact.getRelation(),reduced);

}


public static void main(String [] args){


if(args.length<2){
System.out.println("Incorrect params: fact_reducer <InputFile> <outputFile> [Type<LOCATION>]");
System.exit(1);
}

YagoFactsReducer fr=new YagoFactsReducer();



FactType type=null;
if (args.length>2)
type=FactType.valueOf(args[2]);
fr.reduceFacts(args[0],args[1],null,type);

}



}
20 changes: 13 additions & 7 deletions src/main/java/de/mpii/yagotools/YagoLocation.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
import de.mpii.yagotools.utils.YagoRelations;
import mpi.tools.javatools.util.FileUtils;

import java.lang.*;
import java.io.*;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Set;
import java.net.URL;


/**
Expand All @@ -20,8 +23,8 @@
public class YagoLocation {

//private static final String SUB_CLASS_OF = "rdfs:subClassOf";
String LOCATION_FILE_PATH ="data/isLocatedInData.tsv";
final static String COUNTRIES_FILE="src/resources/countries.tsv";
String LOCATION_FILE_PATH ="resources/bigData/isLocatedInData.tsv";
final static String COUNTRIES_FILE="resources/countries.tsv";

private static YagoLocation instance;

Expand All @@ -30,15 +33,18 @@ public class YagoLocation {


private YagoLocation(){
typesParents= YagoDataReader.loadSubject2ObjectMap(LOCATION_FILE_PATH,new String[]{YagoRelations.IS_LOCATED_IN});
loadCountries();
try {
typesParents = YagoDataReader.loadDataInMap(LOCATION_FILE_PATH, new String[]{YagoRelations.IS_LOCATED_IN}, YagoDataReader.MapType.SUBJ_2_OBJ);

}catch (Exception e) {
e.printStackTrace();}
}

private void loadCountries() {
try {
String fileContect= FileUtils.getFileContent(new File(COUNTRIES_FILE));
countriesSet= ImmutableSet.copyOf(fileContect.split("\n"));
} catch (IOException e) {
countriesSet= ImmutableSet.copyOf(FileUtils.getFileContentasList(COUNTRIES_FILE));
} catch (Exception e) {
e.printStackTrace();
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/de/mpii/yagotools/YagoSimpleTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class YagoSimpleTypes {


private YagoSimpleTypes(){
entityTypes= YagoDataReader.loadSubject2ObjectMap(TAXONOMY_FILE_PATH,new String[]{YagoRelations.TYPE});
entityTypes= YagoDataReader.loadDataInMap(TAXONOMY_FILE_PATH,new String[]{YagoRelations.TYPE}, YagoDataReader.MapType.SUBJ_2_OBJ);
}


Expand Down
2 changes: 1 addition & 1 deletion src/main/java/de/mpii/yagotools/YagoTaxonomy.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class YagoTaxonomy {


private YagoTaxonomy(){
typesParents= YagoDataReader.loadSubject2ObjectMap(TAXONOMY_FILE_PATH,new String[]{YagoRelations.SUB_CLASS_OF});
typesParents= YagoDataReader.loadDataInMap(TAXONOMY_FILE_PATH,new String[]{YagoRelations.SUB_CLASS_OF}, YagoDataReader.MapType.SUBJ_2_OBJ);

}

Expand Down
Loading

0 comments on commit 1a36f3b

Please sign in to comment.