/** * quick-and-dirty routine for getting morph variants should be replaced by * something better! * * @param word * @return */ public HashSet <string> getVariants(WordElement word) { var variants = new HashSet <string>(); variants.Add(word.getBaseForm()); var category = word.getCategory(); if (category is ILexicalCategory) { switch (category.enumType) { case (int)LexicalCategoryEnum.NOUN: variants.add(getVariant(word, LexicalFeature.PLURAL, "s")); break; case (int)LexicalCategoryEnum.ADJECTIVE: variants .add(getVariant(word, LexicalFeature.COMPARATIVE, "er")); variants .add(getVariant(word, LexicalFeature.SUPERLATIVE, "est")); break; case (int)LexicalCategoryEnum.VERB: variants.add(getVariant(word, LexicalFeature.PRESENT3S, "s")); variants.add(getVariant(word, LexicalFeature.PAST, "ed")); variants.add(getVariant(word, LexicalFeature.PAST_PARTICIPLE, "ed")); variants.add(getVariant(word, LexicalFeature.PRESENT_PARTICIPLE, "ing")); break; } } return(variants); }
/** * add word to internal indices * * @param word */ private void IndexWord(WordElement word) { // first index by base form var basef = word.getBaseForm(); // shouldn't really need is, as all words have base forms if (basef != null) { updateIndex(word, basef, indexByBase); } // now index by ID, which should be unique (if present) var id = word.getId(); if (id != null) { if (indexByID.ContainsKey(id)) { Console.WriteLine($"Lexicon error: ID {id} occurs more than once"); } indexByID.Add(id, word); } // now index by variant foreach (var variant in getVariants(word)) { updateIndex(word, variant, indexByVariant); } // done }
/** * creates a duplicate WordElement from an existing WordElement * * @param currentWord * - An existing WordElement */ public WordElement(WordElement currentWord) { baseForm = currentWord.getBaseForm(); setCategory(currentWord.getCategory()); id = currentWord.getId(); inflVars = currentWord.getInflectionalVariants(); defaultInfl = (Inflection)currentWord.getDefaultInflectionalVariant(); setFeatures(currentWord); }
/** * quick-and-dirty routine for computing morph forms Should be replaced by * something better! * * @param word * @param feature * @param string * @return */ private string getVariant(WordElement word, string feature, string suffix) { if (word.hasFeature(feature)) { return(word.getFeatureAsString(feature)); } else { return(getForm(word.getBaseForm(), suffix)); } }
/** * Extract info about the spelling variants of a word from an NIH record, * and add to the simplenlg Woordelement. * * <P> * Spelling variants are represented as lists of strings, retrievable via * {@link LexicalFeature#SPELL_VARS} * * @param wordElement * @param record */ private void addSpellingVariants(WordElement wordElement, LexRecord record) { Vector <string> vars = record.GetSpellingVars(); if (vars != null && !vars.isEmpty()) { var wordVars = new List <string>(); wordVars.addAll(vars); wordElement.setFeature(LexicalFeature.SPELL_VARS, wordVars); } // we set the default spelling var as the baseForm wordElement.setFeature(LexicalFeature.DEFAULT_SPELL, wordElement .getBaseForm()); }