From aacc3e7b59478f9ab8b45469ed9a729ee0a469d4 Mon Sep 17 00:00:00 2001 From: cxchu Date: Wed, 22 Apr 2020 14:01:17 +0200 Subject: [PATCH] Update bash file --- Note.txt | 3 --- kill.sh | 2 -- resources/wikia.properties | 14 ++++++-------- run_parallel.sh | 23 ----------------------- src/typing/ENTYFI.java | 16 +++++++++++++--- test-data/input.txt | 1 + 6 files changed, 20 insertions(+), 39 deletions(-) delete mode 100644 Note.txt delete mode 100755 kill.sh delete mode 100755 run_parallel.sh create mode 100644 test-data/input.txt diff --git a/Note.txt b/Note.txt deleted file mode 100644 index 9b84886..0000000 --- a/Note.txt +++ /dev/null @@ -1,3 +0,0 @@ -Reference Univs Ranking" - - Keep 1000 top tf-idf tokens - diff --git a/kill.sh b/kill.sh deleted file mode 100755 index 08804c4..0000000 --- a/kill.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -ps -ef | grep $1 | grep -v grep | awk '{print $2}' | xargs -r kill -9 diff --git a/resources/wikia.properties b/resources/wikia.properties index 73735f4..260614a 100644 --- a/resources/wikia.properties +++ b/resources/wikia.properties @@ -2,7 +2,13 @@ ############### which includes data file path, parameters, etc. ####### ####################################################################### +##directory stores the background KB of all universes BASE_DIR = /var/tmp/wikia/entity-typing/data-store/ +##directory stores the pre-trained model of fictional typing module +ATTENTION_MODEL = /local/var/tmp/wikia/entity-typing/deep-learning/ +##directory stores the universe-term matrix for reference universe ranking +TERMMATRIX = /var/tmp/wikia/all-data/top-k-universe-termmatrix/ + WIKIA_TYPE_SYSTEM_PATH = /type-hierarchy @@ -28,12 +34,6 @@ WIKIA_BACKGROUND_FACT_DICTIONARY = /fact-dictionary WIKIA_BACKGROUND_TIME_DICTIONARY = /time-dictionary WIKIA_BACKGROUND_STATISTIC = /bkb-statistic -# html dump file after remove unneccessary parts, just keep: infobox, maintext, category, title and id -#/var/tmp/wikia/entity-typing/data-store/religion/ - -#WIKIA_DIR = /var/tmp/wikia/all-data/wikia-top-dumpfile/ -#WIKIA_DIR = /var/tmp/wikia/entity-typing/input-data/ -#WIKIA_DIR = /GW/D5data-9/cxchu/wikia/all-data/dumpfile-available-universes/ # stopwords STOPWORDS = stopwords_en.txt @@ -43,5 +43,3 @@ TOKEN_CONTEXT_COMPRESS_RATE = 0.2 # if context size < STANDARD_TOKEN_CONTEXT_SIZE, take STANDARD_TOKEN_CONTEXT_SIZE STANDARD_TOKEN_CONTEXT_SIZE = 100 -TERMMATRIX = /var/tmp/wikia/all-data/top-k-universe-termmatrix/ -ATTENTION_MODEL = /local/var/tmp/wikia/entity-typing/deep-learning/ \ No newline at end of file diff --git a/run_parallel.sh b/run_parallel.sh deleted file mode 100755 index cd2207e..0000000 --- a/run_parallel.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -# Use to run a task to process a input file with multithreading. Each element of the input file and output file should be in a single line. -# and of the MainClass should be the first 2 arguments. MainClass processes input and output files in GZIP format. -# run_parallel.sh -# Real output file will be also in GZIP format. -np=$(($1-1)) -echo "Split data" -args="$3 $1" -mvn exec:java -Dexec.classpathScope=compile -Dexec.mainClass="util.FileSplitter" -Dexec.args="$args" 1>>$4.out 2>>$4.err - -echo "Run in parallel" -for i in $(seq -f "%02g" 0 ${np}); do -args="$3.part$i.gz $4.part$i.gz ${@:5}" -export MAVEN_OPTS="-Xmx8G" && mvn exec:java -Dexec.classpathScope=compile -Dexec.mainClass="$2" -Dexec.args="$args" 1>>$4.out 2>>$4.err & -done - -wait - -echo "Combine result" -zcat $4.part*.gz | gzip > $4 -echo "Clean" -rm $3.part*.gz $4.part*.gz - diff --git a/src/typing/ENTYFI.java b/src/typing/ENTYFI.java index 9e327b6..073ad93 100644 --- a/src/typing/ENTYFI.java +++ b/src/typing/ENTYFI.java @@ -2,6 +2,8 @@ import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.io.Writer; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; @@ -17,8 +19,10 @@ import ranking.ReferenceUnivRanking; import segmentation.InputSegmentation; import utils.Configuration; +import utils.ReadFile; import utils.SortedMultiMap; import utils.StringUtils; +import utils.Util; public class ENTYFI { @@ -227,7 +231,11 @@ public Pair> run(String data) throws IOException, SQLExcept public static void main(String[] args) throws IOException, SQLException { - String input = "In the Red Keep, Tyrion, escorted by Ser Gregor, meets Jaime, who confirms that he believes the threat of the Dead, but has been unable to convince Cersei. Jamie is killed. ."; + String inputFile = Util.getArg(args, 0, "test-data/input.txt"); + String outputFile = Util.getArg(args, 1, "test-data/output.txt"); + + String input = ReadFile.readFile(inputFile); + System.out.println(input); int check = 0; int topKUniverse = 1; int topKTypes = 10; @@ -237,10 +245,12 @@ public static void main(String[] args) throws IOException, SQLException { ENTYFI entyfi = new ENTYFI(segmentation, check, topKUniverse, topKTypes); Pair> res = entyfi.run(input); System.out.println(res.first); + + Writer out = new PrintWriter(new File(outputFile)); for (String s: res.second){ - System.out.println(s); + out.write(s + "\n"); } - + out.close(); } } diff --git a/test-data/input.txt b/test-data/input.txt new file mode 100644 index 0000000..0e7763f --- /dev/null +++ b/test-data/input.txt @@ -0,0 +1 @@ +In the Red Keep, Tyrion, escorted by Ser Gregor, meets Jaime, who confirms that he believes the threat of the Dead, but has been unable to convince Cersei. Jamie is killed. \ No newline at end of file