NLPUtils.java

package utils;

import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import uk.ac.susx.informatics.Morpha;
import util.FileLines;
import util.Util;
import util.WordListEnum;
import util.IDHelper.WNWords;
import util.Pair;

public class NLPUtils {

	/*
	 * public static String headWord(String s) { String[] words = s.split(" ");
	 * return words[headWord(words)]; }
	 */
	public static String headWord(String s) throws SQLException, IOException {
		return headWordStr(s.split(" "));
	}

	/** returns position of headword.
	 * <b> In any case returns exactly one position! cannot return a phrase </b>
	 * @throws IOException
	 * @throws SQLException */
	public static int headWord(String[] s) {
		StringBuilder sb = new StringBuilder();

		for (int i = 0; i < s.length; i++) {
			String w = s[i];
			// tower of hanoi (here, tower = index 0, i.e. preceding "of")
			if (WordListEnum.PREPOSITIONS.contains(w) && sb.length() > 0)
				if (WordListEnum.PLURAL_QUANTIFIER.contains(Util.rangeWords(0,
					i, s)))
					// couple of sketch pads => sketch pads (i.e. after preposition!)
					return i + 1;
				else
					return i - 1;
			sb.append(sb.length() == 0 ? "" : " ").append(w);
		}

		// Fallback to last position case when no prepositions are present.
		return s.length - 1;
	}

	public static String headWordStr(String[] s) throws SQLException,
		IOException {
		StringBuilder sb = new StringBuilder();

		for (int i = 0; i < s.length; i++) {
			String w = s[i];
			// tower of hanoi (here, tower = index 0, i.e. preceding "of")
			// couple of sketch pads => sketch pads (i.e. after preposition!)
			if (WordListEnum.PREPOSITIONS.contains(w) && sb.length() > 0) {

				// PLURAL_QUANTIFIER (e.g. couple of)
				// couple of sketch pads => check from 0 to i (i.e. couple of)
				if (WordListEnum.PLURAL_QUANTIFIER.contains(Util.rangeWords(0,
					i, s))) {
					// couple of sketch pads => sketch pads is a cand. wnhead
					sb = new StringBuilder();
					sb.append(Util.rangeWords(i + 1, s.length - 1, s));
				}
				// in any case break because we already filled the head cand.
				break;
			}
			sb.append(sb.length() == 0 ? "" : " ").append(w);
		}

		// sketch pads => sketch pad.
		// Verbs are handled separately via headVerb() func,
		// hence stem using Morpha.any
		return wnHeadWord(stem(sb.toString(), Morpha.any).split(" "));
	}

	/**
	 * san francisco botanical garden -> botanical garden Loop over phrases in
	 * the sentence to find a valid WN noun phrase as the head words.
	 *
	 * @param s
	 *            input phrase
	 * @return head words (valid WN noun phrases)
	 * @throws IOException
	 * @throws SQLException
	 */
	private static String wnHeadWord(String[] s) throws SQLException,
		IOException {
		StringBuilder phrase;
		for (int i = 0; i < s.length; i++) {
			phrase = new StringBuilder();
			for (int j = i; j < s.length; j++)
				phrase.append(phrase.length() > 0 ? " " : "").append(s[j]);
			if (WNWords.inWN(phrase.toString()) != null)
				return phrase.toString();
		}
		// Fallback to last position case when no prepositions are present.
		return s[s.length - 1];
	}

	/************************************************************************
	 * Stems only head noun (e.g. will stem "schools boxes" to "schools box")
	 *
	 * @param MorphaDotPOS
	 *            morpha code : Morpha.noun , Morpha.verb , or Morpha.any
	 ************************************************************************/
	public static String judiciousStemming(String phrase, int MorphaDotPOS) {
		if (MorphaDotPOS == Morpha.noun) { return phrase; }
		StringBuilder sb = new StringBuilder();
		String[] splitted = phrase.split(" ");
		if (MorphaDotPOS == Morpha.noun) {
			int headWordIndex = headWord(splitted);
			// The blue darts to blue darts.

			for (int i = 0; i < splitted.length; i++) {
				// if (!Util.isStopWord(splitted[i]))
				sb.append(i > 0 ? ' ' : "");
				if (i == headWordIndex)
					sb.append(stem(splitted[i], MorphaDotPOS));
				else
					sb.append(splitted[i]);
			}
		} else {
			for (int i = 0; i < splitted.length; i++) {
				sb.append(i > 0 ? ' ' : "").append(
					stem(splitted[i], MorphaDotPOS));
			}
		}

		if (sb.length() == 0)
			sb.append(sb.length() > 0 ? ' ' : "").append(
				stem(splitted[splitted.length - 1], MorphaDotPOS));

		return sb.toString();
		// }
	}

	/****
	 * Check if input phrase is eligible to be stemmed. (In short, we don't stem
	 * instances)
	 *
	 * @param phrase
	 *            los_angeles -> false, boxes -> true
	 * @return
	 * @throws SQLException
	 * @throws IOException
	 */

	public static String stem(String w, int MorphaDotPOS) {
		String stemmed = null;
		if (MorphaDotPOS == Morpha.noun) {
			stemmed = IrregularPlurals.PLURAL.getSingular(w);
		}
		if (stemmed == null || stemmed.isEmpty())
			stemmed = stemExceptWN(w, MorphaDotPOS);
		return stemmed;
	}

	private static String stemExceptWN(String w, int MorphaDotPOS) {
		if (w.equals("_s"))
			return w; // exception for _s which originally was 's
		else {
			String stemmed = stemMorpha(w, MorphaDotPOS);
			String pos = "";
			switch (MorphaDotPOS) {
				case Morpha.noun:
					pos = "n";
					break;
				case Morpha.verb:
					pos = "v";
					break;
			}
			Pair<String, Set<String>> inWN = null;
			try {
				inWN = WNWords.inWN(stemmed);
			} catch (Exception e) {
				e.printStackTrace();
			}
			if (inWN == null)
				return w;
			if (inWN.second == null || inWN.second.isEmpty())
				return w;
			if (pos.isEmpty())
				return stemmed;
			if (inWN.second.contains(pos))
				return stemmed;
			else
				return w;
		}
	}

	private static int countChar(String s, char ec, boolean shouldTrim) {
		int count = 0;
		if (shouldTrim)
			s = s.trim();
		for (char c : s.toCharArray()) {
			if (c == ec)
				count++;
		}
		return count;
	}

	/*************************************************************************
	 * That is, it only does noun plurals, pronoun case, and verb endings, and
	 * not things like comparative adjectives or derived nominals. It is based
	 * on a finite-state transducer implemented by John Carroll et al., written
	 * in flex and publicly available. See:
	 * http://www.informatics.susx.ac.uk/research/nlp/carroll/morph.html .
	 *
	 * @param w
	 *            e.g. fighter jets
	 * @param morphaPOSNum
	 *            (Use Morpha. static members) e.g. 2 (for noun), 3 (for any)
	 * @return fighter jet (note that fighters jets returns fighters jet
	 * @usage Util.stem("goes", Morpha.verb);
	 ************************************************************************/
	private static String stemMorpha(String w, int morphaPOSNum) {
		try {
			if (w == null || w.length() == 0)
				return w;
			int numWords = countChar(w, ' ', false) + 1;
			String[] ws = null;
			if (numWords > 1)
				ws = w.split(" ");
			if (lexer == null)
				lexer = new Morpha(System.in);
			lexer.yyreset(new StringReader(numWords == 1 ? w
				: ws[ws.length - 1]));
			lexer.yybegin(morphaPOSNum);
			if (numWords == 1)
				return lexer.next();
			else {
				StringBuilder sb = new StringBuilder();
				for (int i = 0; i < ws.length - 1; i++)
					sb.append(ws[i]).append(" ");
				sb.append(lexer.next());
				return sb.toString();
			}
		} catch (Exception e) {
			/*
			 * System.out.println("Exception in stemming (" + w + "): " +
			 * e.getMessage());
			 */
			// e.printStackTrace();
		} catch (Error e) {
			// Sometimes Morpha throws Error!
			// Exception in thread "main" java.lang.Error: Error: could not
			// match input
			/*
			 * System.out.println("Error in stemming (" + w + "): " +
			 * e.getMessage());
			 */
			// e.printStackTrace();
		}
		return w;
	}

	static Morpha lexer;

	public static Collection<String> copularVerbs = new HashSet<>(Arrays
		.asList(new String[] {"be", "has", "have", "had", "is", "was", "are",
			"were"}));

	public static Collection<String> articles = new HashSet<String>(Arrays
		.asList(new String[] {"a", "an", "the", "your", "my", "our", "his",
			"her"}));

	public static Collection<String> prepositions = new HashSet<String>(Arrays
		.asList(new String[] {"in", "on", "at", "with", "into", "across",
			"opposite", "toward", "towards", "through", "beyond", "aboard",
			"amid", "past", "by", "near", "nearby", "above", "below", "over",
			"under", "up", "down", "around", "through", "inside", "out",
			"outside", "outside of", "between", "beside", "besides", "beyond",
			"in front of", "in back of", "behind", "next to", "on top of",
			"within", "beneath", "underneath", "among", "along", "against",

			"aboard", "about", "above", "across", "after", "against", "along",
			"amid", "among", "anti", "around", "as", "at", "before", "behind",
			"below", "beneath", "beside", "besides", "between", "beyond",
			"but", "by", "concerning", "considering", "despite", "down",
			"during", "except", "excepting", "excluding", "following", "for",
			"from", "in", "inside", "into", "in front of", "like", "minus",
			"near", "of", "off", "on", "onto", "opposite", "outside", "over",
			"past", "per", "plus", "regarding", "round", "save", "since",
			"than", "through", "to", "toward", "towards", "under",
			"underneath", "unlike", "until", "up", "upon", "versus", "via",
			"with", "within", "without"}));

	public static Collection<String> MODAL_VERBS = (new HashSet<String>(Arrays
		.asList(new String[] {"can", "could", "may", "might", "will", "would",
			"must", "shall", "should", "ought to"})));

	public static Collection<String> STOPWORDS = (new HashSet<String>(Arrays
		.asList(new String[] {"a", "able", "about", "across", "after", "all",
			"almost", "also", "always", "am", "among", "an", "and", "another",
			"any", "are", "as", "at", "be", "because", "been", "before",
			"being", "but", "by", "can", "cannot", "could", "dear", "did",
			"do", "does", "either", "else", "ever", "every", "few", "for",
			"from", "get", "got", "had", "has", "have", "he", "her", "here",
			"hers", "him", "his", "how", "however", "i", "if", "in", "into",
			"is", "it", "its", "just", "least", "let", "like", "likely", "lrb",
			"many", "may", "me", "might", "mine", "more", "most", "much",
			"must", "my", "neither", "no", "none", "nor", "not", "nothing",
			"now", "nt", "of", "off", "often", "on", "only", "or", "other",
			"our", "ours", "own", "per", "rather", "rrb", "said", "say",
			"says", "she", "should", "since", "so", "some", "somehow", "still",
			"such", "than", "that", "the", "their", "theirs", "them", "then",
			"there", "these", "they", "this", "those", "though", "tis", "to",
			"too", "twas", "u", "us", "very", "want", "wants", "was", "we",
			"were", "what", "when", "where", "which", "while", "who", "whom",
			"why", "will", "with", "would", "www", "yet", "you", "your",
			"yours", "yourss", "'m", "'ll", "a", "about", "above", "after",
			"again", "against", "all", "am", "an", "and", "any", "are", "as",
			"at", "be", "because", "been", "before", "being", "below",
			"between", "both", "but", "by", "cannot", "could", "did", "do",
			"does", "doing", "down", "during", "each", "few", "for", "from",
			"further", "had", "has", "have", "having", "he", "her", "here",
			"hers", "herself", "him", "himself", "his", "how", "however", "i",
			"if", "in", "into", "is", "it", "its", "itself", "let", "lrb",
			"me", "more", "most", "must", "my", "myself", "no", "nor", "not",
			"of", "off", "on", "once", "only", "or", "other", "ought", "our",
			"ours ourselves", "out", "over", "own", "rrb", "same", "sha",
			"she", "should", "so", "some", "such", "than", "that", "the",
			"their", "theirs", "them", "themselves", "then", "there", "these",
			"they", "this", "those", "through", "to", "too", "under", "until",
			"up", "very", "was", "we", "were", "what", "when", "where",
			"which", "while", "who", "who", "whom", "why", "why", "with", "wo",
			"would", "would", "you", "you", "you", "you", "you", "your",
			"yours", "yourself", "yourselves"})));

	/** returns position of headverb from headverbCandidate s. */
	public static int headVerb(String[] s) {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < s.length; i++) {
			String w = s[i];
			// begin to peel (here, peel = index 1, i.e. succeeding "to")
			if (prepositions.contains(w) && sb.length() > 0

			) {
				// prep. not last word e.g. heat up
				if (i != s.length - 1)
					return i + 1;
				/*
				 * else if return 0;
				 */
			}
			sb.append(sb.length() == 0 ? "" : " ").append(w);
		}

		// had turned

		if (s.length > 1
			&& (copularVerbs.contains(s[0]) || MODAL_VERBS.contains(s[0]) || STOPWORDS
				.contains(s[0]))) {

		return 1 + headVerb(Arrays.copyOfRange(s, 1, s.length)); }

		// Fallback to last position case when no prepositions are present.
		// continue peeling
		if (s[s.length - 1].endsWith("ing")) // present tense.
			return s.length - 1;
		else
			// take out
			/*
			 * return verbsInCorpus != null && verbsInCorpus.contains(s[0]) ? 0
			 * : s.length - 1;
			 */
			return 0;
	}

	private static enum IrregularPlurals {
		PLURAL();

		private final Map<String, String> pluralToSingularMap;

		private IrregularPlurals() {
			this.pluralToSingularMap = loadPlurals();
		}

		private Map<String, String> loadPlurals() {
			Map<String, String> mapping = new HashMap<>();
			// women woman
			try {
				String[] splitted;
				for (String line : new FileLines("./data/irregular-plurals.txt")) {
					splitted = line.split("\t");
					mapping.put(splitted[0], splitted[1]);
				}
			} catch (Exception e) {}
			return mapping;
		}

		public String getSingular(String pluralNoun) {
			return PLURAL.pluralToSingularMap.get(pluralNoun);
		}

	}
}
	package utils;

	import java.io.IOException;
	import java.io.StringReader;
	import java.sql.SQLException;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Map;
	import java.util.Set;

	import uk.ac.susx.informatics.Morpha;
	import util.FileLines;
	import util.Util;
	import util.WordListEnum;
	import util.IDHelper.WNWords;
	import util.Pair;

	public class NLPUtils {

	/*
	* public static String headWord(String s) { String[] words = s.split(" ");
	* return words[headWord(words)]; }
	*/
	public static String headWord(String s) throws SQLException, IOException {
	return headWordStr(s.split(" "));
	}

	/** returns position of headword.
	* <b> In any case returns exactly one position! cannot return a phrase </b>
	* @throws IOException
	* @throws SQLException */
	public static int headWord(String[] s) {
	StringBuilder sb = new StringBuilder();

	for (int i = 0; i < s.length; i++) {
	String w = s[i];
	// tower of hanoi (here, tower = index 0, i.e. preceding "of")
	if (WordListEnum.PREPOSITIONS.contains(w) && sb.length() > 0)
	if (WordListEnum.PLURAL_QUANTIFIER.contains(Util.rangeWords(0,
	i, s)))
	// couple of sketch pads => sketch pads (i.e. after preposition!)
	return i + 1;
	else
	return i - 1;
	sb.append(sb.length() == 0 ? "" : " ").append(w);
	}

	// Fallback to last position case when no prepositions are present.
	return s.length - 1;
	}

	public static String headWordStr(String[] s) throws SQLException,
	IOException {
	StringBuilder sb = new StringBuilder();

	for (int i = 0; i < s.length; i++) {
	String w = s[i];
	// tower of hanoi (here, tower = index 0, i.e. preceding "of")
	// couple of sketch pads => sketch pads (i.e. after preposition!)
	if (WordListEnum.PREPOSITIONS.contains(w) && sb.length() > 0) {

	// PLURAL_QUANTIFIER (e.g. couple of)
	// couple of sketch pads => check from 0 to i (i.e. couple of)
	if (WordListEnum.PLURAL_QUANTIFIER.contains(Util.rangeWords(0,
	i, s))) {
	// couple of sketch pads => sketch pads is a cand. wnhead
	sb = new StringBuilder();
	sb.append(Util.rangeWords(i + 1, s.length - 1, s));
	}
	// in any case break because we already filled the head cand.
	break;
	}
	sb.append(sb.length() == 0 ? "" : " ").append(w);
	}

	// sketch pads => sketch pad.
	// Verbs are handled separately via headVerb() func,
	// hence stem using Morpha.any
	return wnHeadWord(stem(sb.toString(), Morpha.any).split(" "));
	}

	/**
	* san francisco botanical garden -> botanical garden Loop over phrases in
	* the sentence to find a valid WN noun phrase as the head words.
	*
	* @param s
	* input phrase
	* @return head words (valid WN noun phrases)
	* @throws IOException
	* @throws SQLException
	*/
	private static String wnHeadWord(String[] s) throws SQLException,
	IOException {
	StringBuilder phrase;
	for (int i = 0; i < s.length; i++) {
	phrase = new StringBuilder();
	for (int j = i; j < s.length; j++)
	phrase.append(phrase.length() > 0 ? " " : "").append(s[j]);
	if (WNWords.inWN(phrase.toString()) != null)
	return phrase.toString();
	}
	// Fallback to last position case when no prepositions are present.
	return s[s.length - 1];
	}

	/************************************************************************
	* Stems only head noun (e.g. will stem "schools boxes" to "schools box")
	*
	* @param MorphaDotPOS
	* morpha code : Morpha.noun , Morpha.verb , or Morpha.any
	************************************************************************/
	public static String judiciousStemming(String phrase, int MorphaDotPOS) {
	if (MorphaDotPOS == Morpha.noun) { return phrase; }
	StringBuilder sb = new StringBuilder();
	String[] splitted = phrase.split(" ");
	if (MorphaDotPOS == Morpha.noun) {
	int headWordIndex = headWord(splitted);
	// The blue darts to blue darts.

	for (int i = 0; i < splitted.length; i++) {
	// if (!Util.isStopWord(splitted[i]))
	sb.append(i > 0 ? ' ' : "");
	if (i == headWordIndex)
	sb.append(stem(splitted[i], MorphaDotPOS));
	else
	sb.append(splitted[i]);
	}
	} else {
	for (int i = 0; i < splitted.length; i++) {
	sb.append(i > 0 ? ' ' : "").append(
	stem(splitted[i], MorphaDotPOS));
	}
	}

	if (sb.length() == 0)
	sb.append(sb.length() > 0 ? ' ' : "").append(
	stem(splitted[splitted.length - 1], MorphaDotPOS));

	return sb.toString();
	// }
	}

	/****
	* Check if input phrase is eligible to be stemmed. (In short, we don't stem
	* instances)
	*
	* @param phrase
	* los_angeles -> false, boxes -> true
	* @return
	* @throws SQLException
	* @throws IOException
	*/

	public static String stem(String w, int MorphaDotPOS) {
	String stemmed = null;
	if (MorphaDotPOS == Morpha.noun) {
	stemmed = IrregularPlurals.PLURAL.getSingular(w);
	}
	if (stemmed == null \|\| stemmed.isEmpty())
	stemmed = stemExceptWN(w, MorphaDotPOS);
	return stemmed;
	}

	private static String stemExceptWN(String w, int MorphaDotPOS) {
	if (w.equals("_s"))
	return w; // exception for _s which originally was 's
	else {
	String stemmed = stemMorpha(w, MorphaDotPOS);
	String pos = "";
	switch (MorphaDotPOS) {
	case Morpha.noun:
	pos = "n";
	break;
	case Morpha.verb:
	pos = "v";
	break;
	}
	Pair<String, Set<String>> inWN = null;
	try {
	inWN = WNWords.inWN(stemmed);
	} catch (Exception e) {
	e.printStackTrace();
	}
	if (inWN == null)
	return w;
	if (inWN.second == null \|\| inWN.second.isEmpty())
	return w;
	if (pos.isEmpty())
	return stemmed;
	if (inWN.second.contains(pos))
	return stemmed;
	else
	return w;
	}
	}

	private static int countChar(String s, char ec, boolean shouldTrim) {
	int count = 0;
	if (shouldTrim)
	s = s.trim();
	for (char c : s.toCharArray()) {
	if (c == ec)
	count++;
	}
	return count;
	}

	/*************************************************************************
	* That is, it only does noun plurals, pronoun case, and verb endings, and
	* not things like comparative adjectives or derived nominals. It is based
	* on a finite-state transducer implemented by John Carroll et al., written
	* in flex and publicly available. See:
	* http://www.informatics.susx.ac.uk/research/nlp/carroll/morph.html .
	*
	* @param w
	* e.g. fighter jets
	* @param morphaPOSNum
	* (Use Morpha. static members) e.g. 2 (for noun), 3 (for any)
	* @return fighter jet (note that fighters jets returns fighters jet
	* @usage Util.stem("goes", Morpha.verb);
	************************************************************************/
	private static String stemMorpha(String w, int morphaPOSNum) {
	try {
	if (w == null \|\| w.length() == 0)
	return w;
	int numWords = countChar(w, ' ', false) + 1;
	String[] ws = null;
	if (numWords > 1)
	ws = w.split(" ");
	if (lexer == null)
	lexer = new Morpha(System.in);
	lexer.yyreset(new StringReader(numWords == 1 ? w
	: ws[ws.length - 1]));
	lexer.yybegin(morphaPOSNum);
	if (numWords == 1)
	return lexer.next();
	else {
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < ws.length - 1; i++)
	sb.append(ws[i]).append(" ");
	sb.append(lexer.next());
	return sb.toString();
	}
	} catch (Exception e) {
	/*
	* System.out.println("Exception in stemming (" + w + "): " +
	* e.getMessage());
	*/
	// e.printStackTrace();
	} catch (Error e) {
	// Sometimes Morpha throws Error!
	// Exception in thread "main" java.lang.Error: Error: could not
	// match input
	/*
	* System.out.println("Error in stemming (" + w + "): " +
	* e.getMessage());
	*/
	// e.printStackTrace();
	}
	return w;
	}

	static Morpha lexer;

	public static Collection<String> copularVerbs = new HashSet<>(Arrays
	.asList(new String[] {"be", "has", "have", "had", "is", "was", "are",
	"were"}));

	public static Collection<String> articles = new HashSet<String>(Arrays
	.asList(new String[] {"a", "an", "the", "your", "my", "our", "his",
	"her"}));

	public static Collection<String> prepositions = new HashSet<String>(Arrays
	.asList(new String[] {"in", "on", "at", "with", "into", "across",
	"opposite", "toward", "towards", "through", "beyond", "aboard",
	"amid", "past", "by", "near", "nearby", "above", "below", "over",
	"under", "up", "down", "around", "through", "inside", "out",
	"outside", "outside of", "between", "beside", "besides", "beyond",
	"in front of", "in back of", "behind", "next to", "on top of",
	"within", "beneath", "underneath", "among", "along", "against",

	"aboard", "about", "above", "across", "after", "against", "along",
	"amid", "among", "anti", "around", "as", "at", "before", "behind",
	"below", "beneath", "beside", "besides", "between", "beyond",
	"but", "by", "concerning", "considering", "despite", "down",
	"during", "except", "excepting", "excluding", "following", "for",
	"from", "in", "inside", "into", "in front of", "like", "minus",
	"near", "of", "off", "on", "onto", "opposite", "outside", "over",
	"past", "per", "plus", "regarding", "round", "save", "since",
	"than", "through", "to", "toward", "towards", "under",
	"underneath", "unlike", "until", "up", "upon", "versus", "via",
	"with", "within", "without"}));

	public static Collection<String> MODAL_VERBS = (new HashSet<String>(Arrays
	.asList(new String[] {"can", "could", "may", "might", "will", "would",
	"must", "shall", "should", "ought to"})));

	public static Collection<String> STOPWORDS = (new HashSet<String>(Arrays
	.asList(new String[] {"a", "able", "about", "across", "after", "all",
	"almost", "also", "always", "am", "among", "an", "and", "another",
	"any", "are", "as", "at", "be", "because", "been", "before",
	"being", "but", "by", "can", "cannot", "could", "dear", "did",
	"do", "does", "either", "else", "ever", "every", "few", "for",
	"from", "get", "got", "had", "has", "have", "he", "her", "here",
	"hers", "him", "his", "how", "however", "i", "if", "in", "into",
	"is", "it", "its", "just", "least", "let", "like", "likely", "lrb",
	"many", "may", "me", "might", "mine", "more", "most", "much",
	"must", "my", "neither", "no", "none", "nor", "not", "nothing",
	"now", "nt", "of", "off", "often", "on", "only", "or", "other",
	"our", "ours", "own", "per", "rather", "rrb", "said", "say",
	"says", "she", "should", "since", "so", "some", "somehow", "still",
	"such", "than", "that", "the", "their", "theirs", "them", "then",
	"there", "these", "they", "this", "those", "though", "tis", "to",
	"too", "twas", "u", "us", "very", "want", "wants", "was", "we",
	"were", "what", "when", "where", "which", "while", "who", "whom",
	"why", "will", "with", "would", "www", "yet", "you", "your",
	"yours", "yourss", "'m", "'ll", "a", "about", "above", "after",
	"again", "against", "all", "am", "an", "and", "any", "are", "as",
	"at", "be", "because", "been", "before", "being", "below",
	"between", "both", "but", "by", "cannot", "could", "did", "do",
	"does", "doing", "down", "during", "each", "few", "for", "from",
	"further", "had", "has", "have", "having", "he", "her", "here",
	"hers", "herself", "him", "himself", "his", "how", "however", "i",
	"if", "in", "into", "is", "it", "its", "itself", "let", "lrb",
	"me", "more", "most", "must", "my", "myself", "no", "nor", "not",
	"of", "off", "on", "once", "only", "or", "other", "ought", "our",
	"ours ourselves", "out", "over", "own", "rrb", "same", "sha",
	"she", "should", "so", "some", "such", "than", "that", "the",
	"their", "theirs", "them", "themselves", "then", "there", "these",
	"they", "this", "those", "through", "to", "too", "under", "until",
	"up", "very", "was", "we", "were", "what", "when", "where",
	"which", "while", "who", "who", "whom", "why", "why", "with", "wo",
	"would", "would", "you", "you", "you", "you", "you", "your",
	"yours", "yourself", "yourselves"})));

	/** returns position of headverb from headverbCandidate s. */
	public static int headVerb(String[] s) {
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < s.length; i++) {
	String w = s[i];
	// begin to peel (here, peel = index 1, i.e. succeeding "to")
	if (prepositions.contains(w) && sb.length() > 0

	) {
	// prep. not last word e.g. heat up
	if (i != s.length - 1)
	return i + 1;
	/*
	* else if return 0;
	*/
	}
	sb.append(sb.length() == 0 ? "" : " ").append(w);
	}

	// had turned

	if (s.length > 1
	&& (copularVerbs.contains(s[0]) \|\| MODAL_VERBS.contains(s[0]) \|\| STOPWORDS
	.contains(s[0]))) {

	return 1 + headVerb(Arrays.copyOfRange(s, 1, s.length)); }

	// Fallback to last position case when no prepositions are present.
	// continue peeling
	if (s[s.length - 1].endsWith("ing")) // present tense.
	return s.length - 1;
	else
	// take out
	/*
	* return verbsInCorpus != null && verbsInCorpus.contains(s[0]) ? 0
	* : s.length - 1;
	*/
	return 0;
	}

	private static enum IrregularPlurals {
	PLURAL();

	private final Map<String, String> pluralToSingularMap;

	private IrregularPlurals() {
	this.pluralToSingularMap = loadPlurals();
	}

	private Map<String, String> loadPlurals() {
	Map<String, String> mapping = new HashMap<>();
	// women woman
	try {
	String[] splitted;
	for (String line : new FileLines("./data/irregular-plurals.txt")) {
	splitted = line.split("\t");
	mapping.put(splitted[0], splitted[1]);
	}
	} catch (Exception e) {}
	return mapping;
	}

	public String getSingular(String pluralNoun) {
	return PLURAL.pluralToSingularMap.get(pluralNoun);
	}

	}
	}