HelperForOpenIE4Activities.java

package kb.howtokb.taskframe.extractor;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import edu.stanford.nlp.util.Pair;
import kb.howtokb.utils.FileLines;
import kb.howtokb.utils.IDHelper;
import uk.ac.susx.informatics.Morpha;

public class HelperForOpenIE4Activities {

	public static Set<String> readFileAsSet(String filePath,
		boolean toLowerCase, boolean trim) {
		String temp = "";
		HashSet<String> lines = new HashSet<String>();
		try {
			BufferedReader in = new BufferedReader(new FileReader(filePath));
			while ((temp = in.readLine()) != null) {
				if (temp.length() > 0) {
					String UTF8Str = new String(temp.getBytes(), "UTF-8");
					if (trim)
						UTF8Str = UTF8Str.trim();
					lines.add(toLowerCase ? UTF8Str.toLowerCase() : UTF8Str);
				}
			}
			in.close();
		} catch (FileNotFoundException e) {
			System.out.println("File not found, in reading file as list: "
				+ e.getMessage());
		} catch (IOException e) {
			System.out.println("IOException in reading file as list: "
				+ e.getMessage());
		}
		return lines;
	}

	/*public static String headWord(String s) {
		String[] words = s.split(" ");
		return words[headWord(words)];
	}*/
	public static String headWord(String s) throws SQLException, IOException {
		return headWordStr(s.split(" "));
	}

	/** returns position of headword.
	 * @throws IOException
	 * @throws SQLException */
	public static int headWord(String[] s) {
		StringBuilder sb = new StringBuilder();

		for (int i = 0; i < s.length; i++) {
			String w = s[i];
			// tower of hanoi (here, tower = index 0, i.e. preceding "of")
			if (prepositions.contains(w) && sb.length() > 0)
				return i - 1;
			sb.append(sb.length() == 0 ? "" : " ").append(w);
		}

		// Fallback to last position case when no prepositions are present.
		return s.length - 1;
	}

	public static String headWordStr(String[] s) throws SQLException,
		IOException {
		StringBuilder sb = new StringBuilder();

		for (int i = 0; i < s.length; i++) {
			String w = s[i];
			// tower of hanoi (here, tower = index 0, i.e. preceding "of")
			if (prepositions.contains(w) && sb.length() > 0)
				break;
			sb.append(sb.length() == 0 ? "" : " ").append(w);
		}

		return wnHeadWord(sb.toString().split(" "));
	}

	/**
	 * san francisco botanical garden -> botanical garden
	 * Loop over phrases in the sentence to find a valid WN noun phrase as the head words.
	 * @param s input phrase
	 * @return head words (valid WN noun phrases)
	 * @throws IOException
	 * @throws SQLException
	 */
	private static String wnHeadWord(String[] s) throws SQLException,
		IOException {
		StringBuilder phrase;
		for (int i = 0; i < s.length; i++) {
			phrase = new StringBuilder();
			for (int j = i; j < s.length; j++)
				phrase.append(phrase.length() > 0 ? " " : "").append(s[j]);
			if (IDHelper.WNWords.inWN(phrase.toString()) != null)
				return phrase.toString();
		}
		// Fallback to last position case when no prepositions are present.
		return s[s.length - 1];
	}

	/************************************************************************
	 * Stems only head noun (e.g. will stem "schools boxes" to "schools box")
	 *
	 * @param MorphaDotPOS
	 *            morpha code : Morpha.noun , Morpha.verb , or Morpha.any
	 ************************************************************************/
	/*public static String judiciousStemming(String phrase, int MorphaDotPOS) {

		// instance e.g. Los Angeles doesn't need to be stemmed
		// check if it's in WN
		// things is a WN entry. how to stem things to thing? --> keep instance as it is
		List<String> inWNList = new ArrayList<>();
		if (phrase.indexOf(" ") > 0 && MorphaDotPOS == Morpha.noun) {
			try {
				inWNList = IDHelper.WNWords.inWN(phrase).second;
			} catch (Exception e) {
			}
		}
		if (MorphaDotPOS == Morpha.noun && inWNList != null
				&& inWNList.size() > 0) {
			return phrase;
		} else {
			// Only for noun
			StringBuilder sb = new StringBuilder();
			String[] splitted = phrase.split(" ");
			if (MorphaDotPOS == Morpha.noun) {
				int headWordIndex = headWord(splitted);
				// The blue darts to blue darts.

				for (int i = 0; i < splitted.length; i++) {
					// if (!Util.isStopWord(splitted[i]))
					sb.append(i > 0 ? ' ' : "");
					if (i == headWordIndex)
						sb.append(stem(splitted[i], MorphaDotPOS));
					else
						sb.append(splitted[i]);
				}
			} else {
				for (int i = 0; i < splitted.length; i++) {
					// if (!Util.isStopWord(splitted[i]))
					sb.append(i > 0 ? ' ' : "").append(
							stem(splitted[i], MorphaDotPOS));
				}
			}

			if (sb.length() == 0)
				sb.append(sb.length() > 0 ? ' ' : "").append(
						stem(splitted[splitted.length - 1], MorphaDotPOS));

			return sb.toString();
		}
	}*/

	public static String judiciousStemming(String phrase, int MorphaDotPOS) {
		if (MorphaDotPOS == Morpha.noun && isAnInstance(phrase)) { return phrase; }
		StringBuilder sb = new StringBuilder();
		String[] splitted = phrase.split(" ");
		if (MorphaDotPOS == Morpha.noun) {
			int headWordIndex = headWord(splitted);
			// The blue darts to blue darts.

			for (int i = 0; i < splitted.length; i++) {
				// if (!Util.isStopWord(splitted[i]))
				sb.append(i > 0 ? ' ' : "");
				if (i == headWordIndex)
					sb.append(stem(splitted[i], MorphaDotPOS));
				else
					sb.append(splitted[i]);
			}
		} else {
			for (int i = 0; i < splitted.length; i++) {
				sb.append(i > 0 ? ' ' : "").append(
					stem(splitted[i], MorphaDotPOS));
			}
		}

		if (sb.length() == 0)
			sb.append(sb.length() > 0 ? ' ' : "").append(
				stem(splitted[splitted.length - 1], MorphaDotPOS));

		return sb.toString();
		// }
	}

	/****
	 * Check if input phrase is eligible to be stemmed.
	 * (In short, we don't stem instances)
	 * @param phrase los_angeles -> false, boxes -> true
	 * @return
	 * @throws SQLException
	 * @throws IOException
	 */

	public static String stem(String w, int MorphaDotPOS) {
		String stemmed = null;
		if (MorphaDotPOS == Morpha.noun) {
			stemmed = IrregularPlurals.PLURAL.getSingular(w);
		}
		if (stemmed == null || stemmed.isEmpty())
			stemmed = stemExceptWN(w, MorphaDotPOS);
		return stemmed;
	}

	private static boolean isAnInstance(String w) {
		try {
			Set<Character> types = IDHelper.WNWords.getWNWordTypes(w);
			if (types == null)
				return false;
			else if (types.size() == 1 && types.contains('i'))
				return true;
		} catch (IOException | SQLException e) {}
		return false;
	}

	private static String stemExceptWN(String w, int MorphaDotPOS) {
		if (w.equals("_s"))
			return w; // exception for _s which originally was 's
		else {
			String stemmed = stemMorpha(w, MorphaDotPOS);
			String pos = "";
			switch (MorphaDotPOS) {
				case Morpha.noun:
					pos = "n";
					break;
				case Morpha.verb:
					pos = "v";
					break;
			}
			Pair<String, Set<String>> inWN = null;
			try {
				inWN = IDHelper.WNWords.inWN(stemmed);
			} catch (Exception e) {
				e.printStackTrace();
			}
			if (inWN == null)
				return w;
			if (inWN.second == null || inWN.second.isEmpty())
				return w;
			if (pos.isEmpty())
				return stemmed;
			if (inWN.second.contains(pos))
				return stemmed;
			else
				return w;
		}
	}

	private static int countChar(String s, char ec, boolean shouldTrim) {
		int count = 0;
		if (shouldTrim)
			s = s.trim();
		for (char c : s.toCharArray()) {
			if (c == ec)
				count++;
		}
		return count;
	}

	/*************************************************************************
	 * That is, it only does noun plurals, pronoun case, and verb endings, and
	 * not things like comparative adjectives or derived nominals. It is based
	 * on a finite-state transducer implemented by John Carroll et al., written
	 * in flex and publicly available. See:
	 * http://www.informatics.susx.ac.uk/research/nlp/carroll/morph.html .
	 *
	 * @param w
	 *            e.g. fighter jets
	 * @param morphaPOSNum
	 *            (Use Morpha. static members) e.g. 2 (for noun), 3 (for any)
	 * @return fighter jet (note that fighters jets returns fighters jet
	 * @usage Util.stem("goes", Morpha.verb);
	 ************************************************************************/
	private static String stemMorpha(String w, int morphaPOSNum) {
		try {
			if (w == null || w.length() == 0)
				return w;
			int numWords = countChar(w, ' ', false) + 1;
			String[] ws = null;
			if (numWords > 1)
				ws = w.split(" ");
			if (lexer == null)
				lexer = new Morpha(System.in);
			lexer.yyreset(new StringReader(numWords == 1 ? w
				: ws[ws.length - 1]));
			lexer.yybegin(morphaPOSNum);
			if (numWords == 1)
				return lexer.next();
			else {
				StringBuilder sb = new StringBuilder();
				for (int i = 0; i < ws.length - 1; i++)
					sb.append(ws[i]).append(" ");
				sb.append(lexer.next());
				return sb.toString();
			}
		} catch (Exception e) {
			/*
			 * System.out.println("Exception in stemming (" + w + "): " +
			 * e.getMessage());
			 */
			// e.printStackTrace();
		} catch (Error e) {
			// Sometimes Morpha throws Error!
			// Exception in thread "main" java.lang.Error: Error: could not
			// match input
			/*
			 * System.out.println("Error in stemming (" + w + "): " +
			 * e.getMessage());
			 */
			// e.printStackTrace();
		}
		return w;
	}

	static Morpha lexer;

	public static Collection<String> copularVerbs = new HashSet<>(Arrays
		.asList(new String[] {"be", "has", "have", "had", "is", "was", "are",
			"were"}));

	public static Collection<String> articles = new HashSet<String>(Arrays
		.asList(new String[] {"a", "an", "the", "your", "my", "our", "his",
			"her"}));

	public static Collection<String> prepositions = new HashSet<String>(Arrays
		.asList(new String[] {"in", "on", "at", "with", "into", "across",
			"opposite", "toward", "towards", "through", "beyond", "aboard",
			"amid", "past", "by", "near", "nearby", "above", "below", "over",
			"under", "up", "down", "around", "through", "inside", "out", "outside",
			"outside of", "between", "beside", "besides", "beyond",
			"in front of", "in back of", "behind", "next to", "on top of",
			"within", "beneath", "underneath", "among", "along", "against",

			"aboard", "about", "above", "across", "after", "against", "along",
			"amid", "among", "anti", "around", "as", "at", "before", "behind",
			"below", "beneath", "beside", "besides", "between", "beyond",
			"but", "by", "concerning", "considering", "despite", "down",
			"during", "except", "excepting", "excluding", "following", "for",
			"from", "in", "inside", "into", "in front of", "like", "minus",
			"near", "of", "off", "on", "onto", "opposite", "outside", "over",
			"past", "per", "plus", "regarding", "round", "save", "since",
			"than", "through", "to", "toward", "towards", "under",
			"underneath", "unlike", "until", "up", "upon", "versus", "via",
			"with", "within", "without"}));

	public static Collection<String> MODAL_VERBS = (new HashSet<String>(Arrays
		.asList(new String[] {"can", "could", "may", "might", "will", "would",
			"must", "shall", "should", "ought to"})));

	public static Collection<String> STOPWORDS = (new HashSet<String>(Arrays
		.asList(new String[] {"a", "able", "about", "across", "after", "all",
			"almost", "also", "always", "am", "among", "an", "and", "another",
			"any", "are", "as", "at", "be", "because", "been", "before",
			"being", "but", "by", "can", "cannot", "could", "dear", "did",
			"do", "does", "either", "else", "ever", "every", "few", "for",
			"from", "get", "got", "had", "has", "have", "he", "her", "here",
			"hers", "him", "his", "how", "however", "i", "if", "in", "into",
			"is", "it", "its", "just", "least", "let", "like", "likely", "lrb",
			"many", "may", "me", "might", "mine", "more", "most", "much",
			"must", "my", "neither", "no", "none", "nor", "not", "nothing",
			"now", "nt", "of", "off", "often", "on", "only", "or", "other",
			"our", "ours", "own", "per", "rather", "rrb", "said", "say",
			"says", "she", "should", "since", "so", "some", "somehow", "still",
			"such", "than", "that", "the", "their", "theirs", "them", "then",
			"there", "these", "they", "this", "those", "though", "tis", "to",
			"too", "twas", "u", "us", "very", "want", "wants", "was", "we",
			"were", "what", "when", "where", "which", "while", "who", "whom",
			"why", "will", "with", "would", "www", "yet", "you", "your",
			"yours", "yourss", "'m", "'ll", "a", "about", "above", "after",
			"again", "against", "all", "am", "an", "and", "any", "are", "as",
			"at", "be", "because", "been", "before", "being", "below",
			"between", "both", "but", "by", "cannot", "could", "did", "do",
			"does", "dont", "doesnt", "cant",
			"doing", "down", "during", "each", "few", "for", "from",
			"further", "had", "has", "have", "having", "he", "her", "here",
			"hers", "herself", "him", "himself", "his", "how", "however", "i",
			"if", "in", "into", "is", "it", "its", "itself", "let", "lrb",
			"me", "more", "most", "must", "my", "myself", "no", "nor", "not",
			"of", "off", "on", "once", "only", "or", "other", "ought", "our",
			"ours ourselves", "out", "over", "own", "rrb", "same", "sha",
			"she", "should", "so", "some", "such", "than", "that", "the",
			"their", "theirs", "them", "themselves", "then", "there", "these",
			"they", "this", "those", "through", "to", "too", "under", "until",
			"up", "very", "was", "we", "were", "what", "when", "where",
			"which", "while", "who", "who", "whom", "why", "why", "with", "wo",
			"would", "would", "you", "you", "you", "you", "you", "your",
			"yours", "yourself", "yourselves"})));

	/** returns position of headverb from headverbCandidate s. */
	public static int headVerb(String[] s) {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < s.length; i++) {
			String w = s[i];
			// begin to peel (here, peel = index 1, i.e. succeeding "to")
			if (prepositions.contains(w) && sb.length() > 0

			) {
				// prep. not last word e.g. heat up
				if (i != s.length - 1)
					return i + 1;
				/*
				 * else if return 0;
				 */
			}
			sb.append(sb.length() == 0 ? "" : " ").append(w);
		}

		// had turned

		if (s.length > 1
			&& (copularVerbs.contains(s[0]) || MODAL_VERBS.contains(s[0]) || STOPWORDS
				.contains(s[0]))) {

		return 1 + headVerb(Arrays.copyOfRange(s, 1, s.length)); }

		// Fallback to last position case when no prepositions are present.
		// continue peeling
		if (s[s.length - 1].endsWith("ing")) // present tense.
			return s.length - 1;
		else
			// take out
			/*
			 * return verbsInCorpus != null && verbsInCorpus.contains(s[0]) ? 0
			 * : s.length - 1;
			 */
			return 0;
	}

	private static enum IrregularPlurals {
		PLURAL();

		private final Map<String, String> pluralToSingularMap;

		private IrregularPlurals() {
			this.pluralToSingularMap = loadPlurals();
		}

		private Map<String, String> loadPlurals() {
			Map<String, String> mapping = new HashMap<>();
			// women woman
			try {
				String[] splitted;
				for (String line : new FileLines("resources/irregular-plurals.txt")) {
					splitted = line.split("\t");
					mapping.put(splitted[0], splitted[1]);
				}
			} catch (Exception e) {}
			return mapping;
		}

		public String getSingular(String pluralNoun) {
			return PLURAL.pluralToSingularMap.get(pluralNoun);
		}

	}
}
	package kb.howtokb.taskframe.extractor;

	import java.io.BufferedReader;
	import java.io.FileNotFoundException;
	import java.io.FileReader;
	import java.io.IOException;
	import java.io.StringReader;
	import java.sql.SQLException;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Map;
	import java.util.Set;

	import edu.stanford.nlp.util.Pair;
	import kb.howtokb.utils.FileLines;
	import kb.howtokb.utils.IDHelper;
	import uk.ac.susx.informatics.Morpha;

	public class HelperForOpenIE4Activities {

	public static Set<String> readFileAsSet(String filePath,
	boolean toLowerCase, boolean trim) {
	String temp = "";
	HashSet<String> lines = new HashSet<String>();
	try {
	BufferedReader in = new BufferedReader(new FileReader(filePath));
	while ((temp = in.readLine()) != null) {
	if (temp.length() > 0) {
	String UTF8Str = new String(temp.getBytes(), "UTF-8");
	if (trim)
	UTF8Str = UTF8Str.trim();
	lines.add(toLowerCase ? UTF8Str.toLowerCase() : UTF8Str);
	}
	}
	in.close();
	} catch (FileNotFoundException e) {
	System.out.println("File not found, in reading file as list: "
	+ e.getMessage());
	} catch (IOException e) {
	System.out.println("IOException in reading file as list: "
	+ e.getMessage());
	}
	return lines;
	}

	/*public static String headWord(String s) {
	String[] words = s.split(" ");
	return words[headWord(words)];
	}*/
	public static String headWord(String s) throws SQLException, IOException {
	return headWordStr(s.split(" "));
	}

	/** returns position of headword.
	* @throws IOException
	* @throws SQLException */
	public static int headWord(String[] s) {
	StringBuilder sb = new StringBuilder();

	for (int i = 0; i < s.length; i++) {
	String w = s[i];
	// tower of hanoi (here, tower = index 0, i.e. preceding "of")
	if (prepositions.contains(w) && sb.length() > 0)
	return i - 1;
	sb.append(sb.length() == 0 ? "" : " ").append(w);
	}

	// Fallback to last position case when no prepositions are present.
	return s.length - 1;
	}

	public static String headWordStr(String[] s) throws SQLException,
	IOException {
	StringBuilder sb = new StringBuilder();

	for (int i = 0; i < s.length; i++) {
	String w = s[i];
	// tower of hanoi (here, tower = index 0, i.e. preceding "of")
	if (prepositions.contains(w) && sb.length() > 0)
	break;
	sb.append(sb.length() == 0 ? "" : " ").append(w);
	}

	return wnHeadWord(sb.toString().split(" "));
	}

	/**
	* san francisco botanical garden -> botanical garden
	* Loop over phrases in the sentence to find a valid WN noun phrase as the head words.
	* @param s input phrase
	* @return head words (valid WN noun phrases)
	* @throws IOException
	* @throws SQLException
	*/
	private static String wnHeadWord(String[] s) throws SQLException,
	IOException {
	StringBuilder phrase;
	for (int i = 0; i < s.length; i++) {
	phrase = new StringBuilder();
	for (int j = i; j < s.length; j++)
	phrase.append(phrase.length() > 0 ? " " : "").append(s[j]);
	if (IDHelper.WNWords.inWN(phrase.toString()) != null)
	return phrase.toString();
	}
	// Fallback to last position case when no prepositions are present.
	return s[s.length - 1];
	}

	/************************************************************************
	* Stems only head noun (e.g. will stem "schools boxes" to "schools box")
	*
	* @param MorphaDotPOS
	* morpha code : Morpha.noun , Morpha.verb , or Morpha.any
	************************************************************************/
	/*public static String judiciousStemming(String phrase, int MorphaDotPOS) {

	// instance e.g. Los Angeles doesn't need to be stemmed
	// check if it's in WN
	// things is a WN entry. how to stem things to thing? --> keep instance as it is
	List<String> inWNList = new ArrayList<>();
	if (phrase.indexOf(" ") > 0 && MorphaDotPOS == Morpha.noun) {
	try {
	inWNList = IDHelper.WNWords.inWN(phrase).second;
	} catch (Exception e) {
	}
	}
	if (MorphaDotPOS == Morpha.noun && inWNList != null
	&& inWNList.size() > 0) {
	return phrase;
	} else {
	// Only for noun
	StringBuilder sb = new StringBuilder();
	String[] splitted = phrase.split(" ");
	if (MorphaDotPOS == Morpha.noun) {
	int headWordIndex = headWord(splitted);
	// The blue darts to blue darts.

	for (int i = 0; i < splitted.length; i++) {
	// if (!Util.isStopWord(splitted[i]))
	sb.append(i > 0 ? ' ' : "");
	if (i == headWordIndex)
	sb.append(stem(splitted[i], MorphaDotPOS));
	else
	sb.append(splitted[i]);
	}
	} else {
	for (int i = 0; i < splitted.length; i++) {
	// if (!Util.isStopWord(splitted[i]))
	sb.append(i > 0 ? ' ' : "").append(
	stem(splitted[i], MorphaDotPOS));
	}
	}

	if (sb.length() == 0)
	sb.append(sb.length() > 0 ? ' ' : "").append(
	stem(splitted[splitted.length - 1], MorphaDotPOS));

	return sb.toString();
	}
	}*/

	public static String judiciousStemming(String phrase, int MorphaDotPOS) {
	if (MorphaDotPOS == Morpha.noun && isAnInstance(phrase)) { return phrase; }
	StringBuilder sb = new StringBuilder();
	String[] splitted = phrase.split(" ");
	if (MorphaDotPOS == Morpha.noun) {
	int headWordIndex = headWord(splitted);
	// The blue darts to blue darts.

	for (int i = 0; i < splitted.length; i++) {
	// if (!Util.isStopWord(splitted[i]))
	sb.append(i > 0 ? ' ' : "");
	if (i == headWordIndex)
	sb.append(stem(splitted[i], MorphaDotPOS));
	else
	sb.append(splitted[i]);
	}
	} else {
	for (int i = 0; i < splitted.length; i++) {
	sb.append(i > 0 ? ' ' : "").append(
	stem(splitted[i], MorphaDotPOS));
	}
	}

	if (sb.length() == 0)
	sb.append(sb.length() > 0 ? ' ' : "").append(
	stem(splitted[splitted.length - 1], MorphaDotPOS));

	return sb.toString();
	// }
	}

	/****
	* Check if input phrase is eligible to be stemmed.
	* (In short, we don't stem instances)
	* @param phrase los_angeles -> false, boxes -> true
	* @return
	* @throws SQLException
	* @throws IOException
	*/

	public static String stem(String w, int MorphaDotPOS) {
	String stemmed = null;
	if (MorphaDotPOS == Morpha.noun) {
	stemmed = IrregularPlurals.PLURAL.getSingular(w);
	}
	if (stemmed == null \|\| stemmed.isEmpty())
	stemmed = stemExceptWN(w, MorphaDotPOS);
	return stemmed;
	}

	private static boolean isAnInstance(String w) {
	try {
	Set<Character> types = IDHelper.WNWords.getWNWordTypes(w);
	if (types == null)
	return false;
	else if (types.size() == 1 && types.contains('i'))
	return true;
	} catch (IOException \| SQLException e) {}
	return false;
	}

	private static String stemExceptWN(String w, int MorphaDotPOS) {
	if (w.equals("_s"))
	return w; // exception for _s which originally was 's
	else {
	String stemmed = stemMorpha(w, MorphaDotPOS);
	String pos = "";
	switch (MorphaDotPOS) {
	case Morpha.noun:
	pos = "n";
	break;
	case Morpha.verb:
	pos = "v";
	break;
	}
	Pair<String, Set<String>> inWN = null;
	try {
	inWN = IDHelper.WNWords.inWN(stemmed);
	} catch (Exception e) {
	e.printStackTrace();
	}
	if (inWN == null)
	return w;
	if (inWN.second == null \|\| inWN.second.isEmpty())
	return w;
	if (pos.isEmpty())
	return stemmed;
	if (inWN.second.contains(pos))
	return stemmed;
	else
	return w;
	}
	}

	private static int countChar(String s, char ec, boolean shouldTrim) {
	int count = 0;
	if (shouldTrim)
	s = s.trim();
	for (char c : s.toCharArray()) {
	if (c == ec)
	count++;
	}
	return count;
	}

	/*************************************************************************
	* That is, it only does noun plurals, pronoun case, and verb endings, and
	* not things like comparative adjectives or derived nominals. It is based
	* on a finite-state transducer implemented by John Carroll et al., written
	* in flex and publicly available. See:
	* http://www.informatics.susx.ac.uk/research/nlp/carroll/morph.html .
	*
	* @param w
	* e.g. fighter jets
	* @param morphaPOSNum
	* (Use Morpha. static members) e.g. 2 (for noun), 3 (for any)
	* @return fighter jet (note that fighters jets returns fighters jet
	* @usage Util.stem("goes", Morpha.verb);
	************************************************************************/
	private static String stemMorpha(String w, int morphaPOSNum) {
	try {
	if (w == null \|\| w.length() == 0)
	return w;
	int numWords = countChar(w, ' ', false) + 1;
	String[] ws = null;
	if (numWords > 1)
	ws = w.split(" ");
	if (lexer == null)
	lexer = new Morpha(System.in);
	lexer.yyreset(new StringReader(numWords == 1 ? w
	: ws[ws.length - 1]));
	lexer.yybegin(morphaPOSNum);
	if (numWords == 1)
	return lexer.next();
	else {
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < ws.length - 1; i++)
	sb.append(ws[i]).append(" ");
	sb.append(lexer.next());
	return sb.toString();
	}
	} catch (Exception e) {
	/*
	* System.out.println("Exception in stemming (" + w + "): " +
	* e.getMessage());
	*/
	// e.printStackTrace();
	} catch (Error e) {
	// Sometimes Morpha throws Error!
	// Exception in thread "main" java.lang.Error: Error: could not
	// match input
	/*
	* System.out.println("Error in stemming (" + w + "): " +
	* e.getMessage());
	*/
	// e.printStackTrace();
	}
	return w;
	}

	static Morpha lexer;

	public static Collection<String> copularVerbs = new HashSet<>(Arrays
	.asList(new String[] {"be", "has", "have", "had", "is", "was", "are",
	"were"}));

	public static Collection<String> articles = new HashSet<String>(Arrays
	.asList(new String[] {"a", "an", "the", "your", "my", "our", "his",
	"her"}));

	public static Collection<String> prepositions = new HashSet<String>(Arrays
	.asList(new String[] {"in", "on", "at", "with", "into", "across",
	"opposite", "toward", "towards", "through", "beyond", "aboard",
	"amid", "past", "by", "near", "nearby", "above", "below", "over",
	"under", "up", "down", "around", "through", "inside", "out", "outside",
	"outside of", "between", "beside", "besides", "beyond",
	"in front of", "in back of", "behind", "next to", "on top of",
	"within", "beneath", "underneath", "among", "along", "against",

	"aboard", "about", "above", "across", "after", "against", "along",
	"amid", "among", "anti", "around", "as", "at", "before", "behind",
	"below", "beneath", "beside", "besides", "between", "beyond",
	"but", "by", "concerning", "considering", "despite", "down",
	"during", "except", "excepting", "excluding", "following", "for",
	"from", "in", "inside", "into", "in front of", "like", "minus",
	"near", "of", "off", "on", "onto", "opposite", "outside", "over",
	"past", "per", "plus", "regarding", "round", "save", "since",
	"than", "through", "to", "toward", "towards", "under",
	"underneath", "unlike", "until", "up", "upon", "versus", "via",
	"with", "within", "without"}));

	public static Collection<String> MODAL_VERBS = (new HashSet<String>(Arrays
	.asList(new String[] {"can", "could", "may", "might", "will", "would",
	"must", "shall", "should", "ought to"})));

	public static Collection<String> STOPWORDS = (new HashSet<String>(Arrays
	.asList(new String[] {"a", "able", "about", "across", "after", "all",
	"almost", "also", "always", "am", "among", "an", "and", "another",
	"any", "are", "as", "at", "be", "because", "been", "before",
	"being", "but", "by", "can", "cannot", "could", "dear", "did",
	"do", "does", "either", "else", "ever", "every", "few", "for",
	"from", "get", "got", "had", "has", "have", "he", "her", "here",
	"hers", "him", "his", "how", "however", "i", "if", "in", "into",
	"is", "it", "its", "just", "least", "let", "like", "likely", "lrb",
	"many", "may", "me", "might", "mine", "more", "most", "much",
	"must", "my", "neither", "no", "none", "nor", "not", "nothing",
	"now", "nt", "of", "off", "often", "on", "only", "or", "other",
	"our", "ours", "own", "per", "rather", "rrb", "said", "say",
	"says", "she", "should", "since", "so", "some", "somehow", "still",
	"such", "than", "that", "the", "their", "theirs", "them", "then",
	"there", "these", "they", "this", "those", "though", "tis", "to",
	"too", "twas", "u", "us", "very", "want", "wants", "was", "we",
	"were", "what", "when", "where", "which", "while", "who", "whom",
	"why", "will", "with", "would", "www", "yet", "you", "your",
	"yours", "yourss", "'m", "'ll", "a", "about", "above", "after",
	"again", "against", "all", "am", "an", "and", "any", "are", "as",
	"at", "be", "because", "been", "before", "being", "below",
	"between", "both", "but", "by", "cannot", "could", "did", "do",
	"does", "dont", "doesnt", "cant",
	"doing", "down", "during", "each", "few", "for", "from",
	"further", "had", "has", "have", "having", "he", "her", "here",
	"hers", "herself", "him", "himself", "his", "how", "however", "i",
	"if", "in", "into", "is", "it", "its", "itself", "let", "lrb",
	"me", "more", "most", "must", "my", "myself", "no", "nor", "not",
	"of", "off", "on", "once", "only", "or", "other", "ought", "our",
	"ours ourselves", "out", "over", "own", "rrb", "same", "sha",
	"she", "should", "so", "some", "such", "than", "that", "the",
	"their", "theirs", "them", "themselves", "then", "there", "these",
	"they", "this", "those", "through", "to", "too", "under", "until",
	"up", "very", "was", "we", "were", "what", "when", "where",
	"which", "while", "who", "who", "whom", "why", "why", "with", "wo",
	"would", "would", "you", "you", "you", "you", "you", "your",
	"yours", "yourself", "yourselves"})));

	/** returns position of headverb from headverbCandidate s. */
	public static int headVerb(String[] s) {
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < s.length; i++) {
	String w = s[i];
	// begin to peel (here, peel = index 1, i.e. succeeding "to")
	if (prepositions.contains(w) && sb.length() > 0

	) {
	// prep. not last word e.g. heat up
	if (i != s.length - 1)
	return i + 1;
	/*
	* else if return 0;
	*/
	}
	sb.append(sb.length() == 0 ? "" : " ").append(w);
	}

	// had turned

	if (s.length > 1
	&& (copularVerbs.contains(s[0]) \|\| MODAL_VERBS.contains(s[0]) \|\| STOPWORDS
	.contains(s[0]))) {

	return 1 + headVerb(Arrays.copyOfRange(s, 1, s.length)); }

	// Fallback to last position case when no prepositions are present.
	// continue peeling
	if (s[s.length - 1].endsWith("ing")) // present tense.
	return s.length - 1;
	else
	// take out
	/*
	* return verbsInCorpus != null && verbsInCorpus.contains(s[0]) ? 0
	* : s.length - 1;
	*/
	return 0;
	}

	private static enum IrregularPlurals {
	PLURAL();

	private final Map<String, String> pluralToSingularMap;

	private IrregularPlurals() {
	this.pluralToSingularMap = loadPlurals();
	}

	private Map<String, String> loadPlurals() {
	Map<String, String> mapping = new HashMap<>();
	// women woman
	try {
	String[] splitted;
	for (String line : new FileLines("resources/irregular-plurals.txt")) {
	splitted = line.split("\t");
	mapping.put(splitted[0], splitted[1]);
	}
	} catch (Exception e) {}
	return mapping;
	}

	public String getSingular(String pluralNoun) {
	return PLURAL.pluralToSingularMap.get(pluralNoun);
	}

	}
	}