/** * extract adj information from NIH AdjEntry record, and add to a simplenlg * WordElement For now just extract position info * * @param wordElement * @param AdjEntry */ private void addAdjectiveInfo(WordElement wordElement, AdjEntry adjEntry) { var qualitativeAdj = false; var colourAdj = false; var classifyingAdj = false; var predicativeAdj = false; List <string> positions = adjEntry.GetPosition(); foreach (var position in positions) { if (position.startsWith("attrib(1)")) { qualitativeAdj = true; } else if (position.startsWith("attrib(2)")) { colourAdj = true; } else if (position.startsWith("attrib(3)")) { classifyingAdj = true; } else if (position.startsWith("pred")) { predicativeAdj = true; } // ignore other positions } // ignore (for now) other info in record wordElement.setFeature(LexicalFeature.QUALITATIVE, qualitativeAdj); wordElement.setFeature(LexicalFeature.COLOUR, colourAdj); wordElement.setFeature(LexicalFeature.CLASSIFYING, classifyingAdj); wordElement.setFeature(LexicalFeature.PREDICATIVE, predicativeAdj); return; }
/** * Creates a new element representing a word. If the word passed is already * an <code>NLGElement</code> then that is returned unchanged. If a * <code>string</code> is passed as the word then the factory will look up * the <code>Lexicon</code> if one exists and use the details found to * create a new <code>WordElement</code>. * * @param word * the base word for the new element. This can be a * <code>NLGElement</code>, which is returned unchanged, or a * <code>string</code>, which is used to construct a new * <code>WordElement</code>. * @param category * the <code>LexicalCategory</code> for the word. * * @return an <code>NLGElement</code> representing the word. */ public WordElement createWord(object word, ILexicalCategory category) { WordElement wordElement = null; if (word is WordElement) { wordElement = (WordElement)word; } else if (word is string && this.lexicon != null) { // AG: change: should create a WordElement, not an // InflectedWordElement // wordElement = new InflectedWordElement( // (string) word, category); // if (this.lexicon != null) { // doLexiconLookUp(category, (string) word, wordElement); // } // wordElement = lexicon.getWord((string) word, category); wordElement = lexicon.lookupWord((string)word, category); if (PRONOUNS.Contains((string)word)) { setPronounFeatures(wordElement, (string)word); } } return(wordElement); }
/** * add word to internal indices * * @param word */ private void IndexWord(WordElement word) { // first index by base form var basef = word.getBaseForm(); // shouldn't really need is, as all words have base forms if (basef != null) { updateIndex(word, basef, indexByBase); } // now index by ID, which should be unique (if present) var id = word.getId(); if (id != null) { if (indexByID.ContainsKey(id)) { Console.WriteLine($"Lexicon error: ID {id} occurs more than once"); } indexByID.Add(id, word); } // now index by variant foreach (var variant in getVariants(word)) { updateIndex(word, variant, indexByVariant); } // done }
/** * extract adv information from NIH AdvEntry record, and add to a simplenlg * WordElement For now just extract modifier type * * @param wordElement * @param AdvEntry */ private void addAdverbInfo(WordElement wordElement, AdvEntry advEntry) { var verbModifier = false; var sentenceModifier = false; var intensifier = false; List <string> modifications = advEntry.GetModification(); foreach (var modification in modifications) { if (modification.startsWith("verb_modifier")) { verbModifier = true; } else if (modification.startsWith("sentence_modifier")) { sentenceModifier = true; } else if (modification.startsWith("intensifier")) { intensifier = true; } // ignore other modification types } // ignore (for now) other info in record wordElement.setFeature(LexicalFeature.VERB_MODIFIER, verbModifier); wordElement.setFeature(LexicalFeature.SENTENCE_MODIFIER, sentenceModifier); wordElement.setFeature(LexicalFeature.INTENSIFIER, intensifier); return; }
/** * quick-and-dirty routine for getting morph variants should be replaced by * something better! * * @param word * @return */ public HashSet <string> getVariants(WordElement word) { var variants = new HashSet <string>(); variants.Add(word.getBaseForm()); var category = word.getCategory(); if (category is ILexicalCategory) { switch (category.enumType) { case (int)LexicalCategoryEnum.NOUN: variants.add(getVariant(word, LexicalFeature.PLURAL, "s")); break; case (int)LexicalCategoryEnum.ADJECTIVE: variants .add(getVariant(word, LexicalFeature.COMPARATIVE, "er")); variants .add(getVariant(word, LexicalFeature.SUPERLATIVE, "est")); break; case (int)LexicalCategoryEnum.VERB: variants.add(getVariant(word, LexicalFeature.PRESENT3S, "s")); variants.add(getVariant(word, LexicalFeature.PAST, "ed")); variants.add(getVariant(word, LexicalFeature.PAST_PARTICIPLE, "ed")); variants.add(getVariant(word, LexicalFeature.PRESENT_PARTICIPLE, "ing")); break; } } return(variants); }
public WordElement CopyWithoutSpec() { WordElement result = Copy(); result.Base = null; return(result); }
/** * convenience method to update an index * * @param word * @param base * @param index */ private void updateIndex(WordElement word, string basef, Dictionary <string, List <WordElement> > index) { if (!index.ContainsKey(basef)) { index.Add(basef, new List <WordElement>()); } index[basef].Add(word); }
/** * creates a duplicate WordElement from an existing WordElement * * @param currentWord * - An existing WordElement */ public WordElement(WordElement currentWord) { baseForm = currentWord.getBaseForm(); setCategory(currentWord.getCategory()); id = currentWord.getId(); inflVars = currentWord.getInflectionalVariants(); defaultInfl = (Inflection)currentWord.getDefaultInflectionalVariant(); setFeatures(currentWord); }
// note that addFrontModifier, addPostModifier, addPreModifier are inherited from PhraseElement // likewise getFrontModifiers, getPostModifiers, getPreModifiers /** Add a modifier to a verb phrase * Use heuristics to decide where it goes * @param modifier */ public override void addModifier(object modifier) { // adverb is preModifier // string which is one lexicographic word is looked up in lexicon, // if it is an adverb than it becomes a preModifier // Everything else is postModifier if (modifier == null) { return; } // get modifier as NLGElement if possible INLGElement modifierElement = null; if (modifier is INLGElement) { modifierElement = (INLGElement)modifier; } else if (modifier is string) { var modifierString = (string)modifier; if (modifierString.length() > 0 && !modifierString.contains(" ")) { modifierElement = getFactory().createWord(modifier, new LexicalCategory_ANY()); } } // if no modifier element, must be a complex string if (modifierElement == null) { addPostModifier((string)modifier); return; } // extract WordElement if modifier is a single word WordElement modifierWord = null; if (modifierElement != null && modifierElement is WordElement) { modifierWord = (WordElement)modifierElement; } else if (modifierElement != null && modifierElement is InflectedWordElement) { modifierWord = ((InflectedWordElement)modifierElement).getBaseWord(); } if (modifierWord != null && modifierWord.getCategory().enumType == (int)LexicalCategoryEnum.ADVERB) { addPreModifier(modifierWord); return; } // default case addPostModifier(modifierElement); }
/** * Sets Features from another existing WordElement into this WordElement. * * @param currentWord * the WordElement to copy features from */ public void setFeatures(WordElement currentWord) { if (null != currentWord && null != currentWord.getAllFeatures()) { foreach (var feature in currentWord.getAllFeatureNames()) { setFeature(feature, currentWord.getFeature(feature)); } } }
/** * Constructs a new inflected word from a WordElement * * @param word * underlying wordelement */ public InflectedWordElement(WordElement word) { setFeature(InternalFeature.BASE_WORD.ToString(), word); // AG: changed to use the default spelling variant // setFeature(LexicalFeature.BASE_FORM, word.getBaseForm()); var defaultSpelling = word.getDefaultSpellingVariant(); setFeature(LexicalFeature.BASE_FORM, defaultSpelling); setCategory(word.getCategory()); }
/** * quick-and-dirty routine for computing morph forms Should be replaced by * something better! * * @param word * @param feature * @param string * @return */ private string getVariant(WordElement word, string feature, string suffix) { if (word.hasFeature(feature)) { return(word.getFeatureAsString(feature)); } else { return(getForm(word.getBaseForm(), suffix)); } }
/** * return the base form of a word * * @param element * @param baseWord * @return */ private static string getBaseForm(InflectedWordElement element, WordElement baseWord) { // unclear what the right behaviour should be // for now, prefer baseWord.getBaseForm() to element.getBaseForm() for // verbs (ie, "is" mapped to "be") // but prefer element.getBaseForm() to baseWord.getBaseForm() for other // words (ie, "children" not mapped to "child") // AG: changed this to get the default spelling variant // needed to preserve spelling changes in the VP if ((int)LexicalCategoryEnum.VERB == element.getCategory().enumType) { if (baseWord != null && baseWord.getDefaultSpellingVariant() != null) { return(baseWord.getDefaultSpellingVariant()); } else { return(element.getBaseForm()); } } else { if (element.getBaseForm() != null) { return(element.getBaseForm()); } else if (baseWord == null) { return(null); } else { return(baseWord.getDefaultSpellingVariant()); } } // if (LexicalCategory.VERB == element.getCategory()) { // if (baseWord != null && baseWord.getBaseForm() != null) // return baseWord.getBaseForm(); // else // return element.getBaseForm(); // } else { // if (element.getBaseForm() != null) // return element.getBaseForm(); // else if (baseWord == null) // return null; // else // return baseWord.getBaseForm(); // } }
/** * extract information about acronyms from NIH record, and add to a * simplenlg WordElement. * * <P> * Acronyms are represented as lists of word elements. Any acronym will have * a list of full form word elements, retrievable via * {@link LexicalFeature#ACRONYM_OF} * * @param wordElement * @param record */ private void addAcronymInfo(WordElement wordElement, LexRecord record) { // NB: the acronyms are actually the full forms of which the word is an // acronym List <string> acronyms = record.GetAcronyms(); if (!acronyms.isEmpty()) { // the list of full forms of which this word is an acronym List <INLGElement> acronymOf = wordElement .getFeatureAsElementList(LexicalFeature.ACRONYM_OF); // keep all acronym full forms and set them up as wordElements foreach (var fullForm in acronyms) { if (fullForm.contains("|")) { // get the acronym id string acronymID = fullForm.substring( fullForm.indexOf("|") + 1, fullForm.length()); // create the full form element WordElement fullFormWE = this.getWordByID(acronymID); if (fullForm != null) { // add as full form of this acronym acronymOf.add(fullFormWE); // List<NLGElement> fullFormAcronyms = fullFormWE // .getFeatureAsElementList(LexicalFeature.ACRONYMS); // fullFormAcronyms.add(wordElement); // fullFormWE.setFeature(LexicalFeature.ACRONYMS, // fullFormAcronyms); } } } // set all the full forms for this acronym wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronymOf); } // if (!acronyms.isEmpty()) { // // string acronym = acronyms.get(0); // // remove anything after a |, this will be an NIH ID // if (acronym.contains("|")) // acronym = acronym.substring(0, acronym.indexOf("|")); // wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronym); // } return; }
/** * Extract info about the spelling variants of a word from an NIH record, * and add to the simplenlg Woordelement. * * <P> * Spelling variants are represented as lists of strings, retrievable via * {@link LexicalFeature#SPELL_VARS} * * @param wordElement * @param record */ private void addSpellingVariants(WordElement wordElement, LexRecord record) { Vector <string> vars = record.GetSpellingVars(); if (vars != null && !vars.isEmpty()) { var wordVars = new List <string>(); wordVars.addAll(vars); wordElement.setFeature(LexicalFeature.SPELL_VARS, wordVars); } // we set the default spelling var as the baseForm wordElement.setFeature(LexicalFeature.DEFAULT_SPELL, wordElement .getBaseForm()); }
/** * Retrieves the correct representation of the word from the element. This * method will find the <code>WordElement</code>, if it exists, for the * given phrase or inflected word. * * @param element * the <code>NLGElement</code> from which the head is required. * @return the <code>WordElement</code> */ private static WordElement getHeadWordElement(INLGElement element) { WordElement head = null; if (element is WordElement) { head = (WordElement)element; } else if (element is InflectedWordElement) { head = (WordElement)element.getFeature(InternalFeature.BASE_WORD.ToString()); } else if (element is PhraseElement) { head = getHeadWordElement(((PhraseElement)element).getHead()); } return(head); }
/** * This method performs the morphology for adverbs. * * @param element * the <code>InflectedWordElement</code>. * @param baseWord * the <code>WordElement</code> as created from the lexicon * entry. * @return a <code>StringElement</code> representing the word after * inflection. */ public static INLGElement doAdverbMorphology(InflectedWordElement element, WordElement baseWord) { string realised = null; // base form from baseWord if it exists, otherwise from element var baseForm = getBaseForm(element, baseWord); if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE.ToString())) { realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.COMPARATIVE); } if (realised == null) { realised = buildRegularComparative(baseForm); } } else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE.ToString())) { realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.SUPERLATIVE); } if (realised == null) { realised = buildRegularSuperlative(baseForm); } } else { realised = baseForm; } var realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(), element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString())); return(realisedElement); }
/** * A helper method to look up the lexicon for the given word. * * @param category * the <code>LexicalCategory</code> of the word. * @param word * the base form of the word. * @param wordElement * the created element representing the word. */ private void doLexiconLookUp(ILexicalCategory category, string word, INLGElement wordElement) { WordElement baseWord = null; if (category.lexType == LexicalCategoryEnum.NOUN && this.lexicon.hasWord(word, new LexicalCategory_PRONOUN())) { baseWord = this.lexicon.lookupWord(word, new LexicalCategory_PRONOUN()); if (baseWord != null) { wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord); wordElement.setCategory(new LexicalCategory_PRONOUN()); if (!PRONOUNS.Contains(word)) { wordElement.setFeature(InternalFeature.NON_MORPH.ToString(), true); } } } else { baseWord = this.lexicon.lookupWord(word, category); wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord); } }
/** * make a WordElement from a lexical record. Currently just specifies basic * params and inflections Should do more in the future! * * @param record * @return */ private WordElement makeWord(LexRecord record) { // get basic data string baseForm = record.GetBase(); ILexicalCategory category = getSimplenlgCategory(record); string id = record.GetEui(); // create word class var wordElement = new WordElement(baseForm, (LexicalCategory)category, id); // now add type information switch (category.lexType) { case LexicalCategoryEnum.ADJECTIVE: addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry()); break; case LexicalCategoryEnum.ADVERB: addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry()); break; case LexicalCategoryEnum.NOUN: addNounInfo(wordElement, record.GetCatEntry().GetNounEntry()); break; case LexicalCategoryEnum.VERB: addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry()); break; // ignore closed class words } var defaultInfl = (Inflection)wordElement .getDefaultInflectionalVariant(); // now add inflected forms // if (keepStandardInflections || !standardInflections(record, // category)) { foreach (InflVar inflection in record.GetInflVarsAndAgreements() .GetInflValues()) { string simplenlgInflection = getSimplenlgInflection(inflection .GetInflection()); if (simplenlgInflection != null) { string inflectedForm = inflection.GetVar(); Inflection inflType = Inflection.getInflCode(inflection .GetType()); // store all inflectional variants, except for regular ones // unless explicitly set if (inflType != null && !(Inflection.REGULAR.Equals(inflType) && !this.keepStandardInflections)) { wordElement.addInflectionalVariant(inflType, simplenlgInflection, inflectedForm); } // if the infl variant is the default, also set this feature on // the word if (defaultInfl == null || (defaultInfl.Equals(inflType) && !(Inflection.REGULAR .Equals(inflType) && !this.keepStandardInflections))) { wordElement.setFeature(simplenlgInflection, inflectedForm); } // wordElement // .setFeature(simplenlgInflection, inflection.GetVar()); } } // } // add acronym info addAcronymInfo(wordElement, record); // now add spelling variants addSpellingVariants(wordElement, record); return(wordElement); }
/** * extract noun information from NIH NounEntry record, and add to a * simplenlg WordElement For now just extract whether count/non-count and * whether proper or not * * @param wordElement * @param nounEntry */ private void addNounInfo(WordElement wordElement, NounEntry nounEntry) { bool proper = nounEntry.IsProper(); // bool nonCountVariant = false; // bool regVariant = false; // add the inflectional variants List <string> variants = nounEntry.GetVariants(); if (!variants.isEmpty()) { var wordVariants = new List <Inflection>(); foreach (var v in variants) { int index = v.indexOf("|"); string code; if (index > -1) { code = v.substring(0, index).toLowerCase().trim(); } else { code = v.toLowerCase().trim(); } Inflection infl = Inflection.getInflCode(code); if (infl != null) { wordVariants.add(infl); wordElement.addInflectionalVariant(infl); } } // if the variants include "reg", this is the default, otherwise // just a random pick Inflection defaultVariant = wordVariants .Contains(Inflection.REGULAR) || wordVariants.isEmpty() ? Inflection.REGULAR : wordVariants.get(0); wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant); wordElement.setDefaultInflectionalVariant(defaultVariant); } // for (string variant : variants) { // if (variant.startsWith("uncount") // || variant.startsWith("groupuncount")) // nonCountVariant = true; // // if (variant.startsWith("reg")) // regVariant = true; // // ignore other variant info // } // lots of words have both "reg" and "unCount", indicating they // can be used in either way. Regard such words as normal, // only flag as nonCount if unambiguous // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant // && !regVariant); wordElement.setFeature(LexicalFeature.PROPER, proper); // ignore (for now) other info in record return; }
/** * extract verb information from NIH VerbEntry record, and add to a * simplenlg WordElement For now just extract transitive, instransitive, * and/or ditransitive * * @param wordElement * @param verbEntry */ private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry) { if (verbEntry == null) { // should only happen for aux verbs, which have // auxEntry instead of verbEntry in NIH Lex // just flag as transitive and return wordElement.setFeature(LexicalFeature.INTRANSITIVE, false); wordElement.setFeature(LexicalFeature.TRANSITIVE, true); wordElement.setFeature(LexicalFeature.DITRANSITIVE, false); return; } bool intransitiveVerb = notEmpty(verbEntry.GetIntran()); bool transitiveVerb = notEmpty(verbEntry.GetTran()) || notEmpty(verbEntry.GetCplxtran()); bool ditransitiveVerb = notEmpty(verbEntry.GetDitran()); wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb); wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb); wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb); // add the inflectional variants List <string> variants = verbEntry.GetVariants(); if (!variants.isEmpty()) { var wordVariants = new List <Inflection>(); foreach (var v in variants) { int index = v.indexOf("|"); string code; Inflection infl; if (index > -1) { code = v.substring(0, index).toLowerCase().trim(); infl = Inflection.getInflCode(code); } else { infl = Inflection.getInflCode(v.toLowerCase().trim()); } if (infl != null) { wordElement.addInflectionalVariant(infl); wordVariants.add(infl); } } // if the variants include "reg", this is the default, otherwise // just a random pick Inflection defaultVariant = wordVariants .contains(Inflection.REGULAR) || wordVariants.isEmpty() ? Inflection.REGULAR : wordVariants.get(0); // wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants); // wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant); wordElement.setDefaultInflectionalVariant(defaultVariant); } // ignore (for now) other info in record return; }
/** * create a simplenlg WordElement from a Word node in a lexicon XML file * * @param wordNode * @return * @throws XPathUtilException */ private WordElement convertNodeToWord(XmlNode wordNode) { // if this isn't a Word node, ignore it if (!wordNode.LocalName.equalsIgnoreCase(XML_WORD)) { return(null); } // // if there is no base, flag an error and return null // string base = XPathUtil.extractValue(wordNode, Constants.XML_BASE); // if (base == null) { // Console.WriteLine("Error in loading XML lexicon: Word with no base"); // return null; // } // create word var word = new WordElement(); var inflections = new List <Inflection>(); // now copy features var nodes = wordNode.SelectNodes("*"); for (var i = 0; i < nodes.Count; i++) { var featureNode = nodes[i]; if (featureNode.NodeType == XmlNodeType.Element) { var feature = featureNode.LocalName.trim(); var value = featureNode.InnerText; if (value != null) { value = value.trim(); } if (feature == null) { Debug.WriteLine("Error in XML lexicon node for " + word); break; } if (feature.equalsIgnoreCase(XML_BASE)) { word.setBaseForm(value); } else if (feature.equalsIgnoreCase(XML_CATEGORY)) { var c = LexicalCategoryExtensions.valueOf(value.toUpperCase()); word.setCategory(c); } else if (feature.equalsIgnoreCase(XML_ID)) { word.setId(value); } else if (value == null || value.Equals("")) { // if this is an infl code, add it to inflections Tuple <bool, Inflection> infl = InflectionExtensions.getInflCode(feature); if (infl.Item1) { inflections.Add(infl.Item2); } else { word.setFeature(feature, true); } } else { word.setFeature(feature, value); } } } // if no infl specified, assume regular if (inflections.isEmpty()) { inflections.Add(Inflection.REGULAR); } // default inflection code is "reg" if we have it, else random pick form // infl codes available var defaultInfl = inflections.Contains(Inflection.REGULAR) ? Inflection.REGULAR : inflections[0]; word.setFeature(LexicalFeature.DEFAULT_INFL, defaultInfl); word.setDefaultInflectionalVariant(defaultInfl); foreach (var infl in inflections) { word.addInflectionalVariant(infl); } // done, return word return(word); }
// note that addFrontModifier, addPostModifier, addPreModifier are inherited // from PhraseElement // likewise getFrontModifiers, getPostModifiers, getPreModifiers /** * Add a modifier to a clause Use heuristics to decide where it goes * * @param modifier */ public override void addModifier(object modifier) { // adverb is frontModifier if sentenceModifier // otherwise adverb is preModifier // string which is one lexicographic word is looked up in lexicon, // above rules apply if adverb // Everything else is postModifier if (modifier == null) { return; } // get modifier as NLGElement if possible INLGElement modifierElement = null; if (modifier is INLGElement) { modifierElement = (INLGElement)modifier; } else if (modifier is string) { var modifierString = (string)modifier; if (modifierString.length() > 0 && !modifierString.contains(" ")) { modifierElement = getFactory().createWord(modifier, new LexicalCategory_ANY()); } } // if no modifier element, must be a complex string if (modifierElement == null) { addPostModifier((string)modifier); return; } // AdvP is premodifer (probably should look at head to see if // sentenceModifier) if (modifierElement is AdvPhraseSpec) { addPreModifier(modifierElement); return; } // extract WordElement if modifier is a single word WordElement modifierWord = null; if (modifierElement != null && modifierElement is WordElement) { modifierWord = (WordElement)modifierElement; } else if (modifierElement != null && modifierElement is InflectedWordElement) { modifierWord = ((InflectedWordElement)modifierElement) .getBaseWord(); } if (modifierWord != null && (modifierWord.getCategory().enumType == (int)LexicalCategoryEnum.ADVERB)) { // adverb rules if (modifierWord .getFeatureAsBoolean(LexicalFeature.SENTENCE_MODIFIER)) { addFrontModifier(modifierWord); } else { addPreModifier(modifierWord); } return; } // default case addPostModifier(modifierElement); }
/** * This method performs the morphology for adjectives. * * @param element * the <code>InflectedWordElement</code>. * @param baseWord * the <code>WordElement</code> as created from the lexicon * entry. * @return a <code>StringElement</code> representing the word after * inflection. */ public static INLGElement doAdjectiveMorphology(InflectedWordElement element, WordElement baseWord) { string realised = null; var patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL); // base form from baseWord if it exists, otherwise from element var baseForm = getBaseForm(element, baseWord); if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE.ToString())) { realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.COMPARATIVE); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.Equals(patternValue)) { realised = buildDoubleCompAdjective(baseForm); } else { realised = buildRegularComparative(baseForm); } } } else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE.ToString())) { realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.SUPERLATIVE); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.Equals(patternValue)) { realised = buildDoubleSuperAdjective(baseForm); } else { realised = buildRegularSuperlative(baseForm); } } } else { realised = baseForm; } var realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(), element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString())); return(realisedElement); }
/** * Sets the base word for this element. * * @param word * the <code>WordElement</code> representing the base word as * read from the lexicon. */ public void setBaseWord(WordElement word) { setFeature(InternalFeature.BASE_WORD.ToString(), word); }
/** * This method performs the morphology for verbs. * * @param element * the <code>InflectedWordElement</code>. * @param baseWord * the <code>WordElement</code> as created from the lexicon * entry. * @return a <code>StringElement</code> representing the word after * inflection. */ public static INLGElement doVerbMorphology(InflectedWordElement element, WordElement baseWord) { string realised = null; var numberValue = element.getFeature(Feature.NUMBER.ToString()); var personValue = element.getFeature(Feature.PERSON.ToString()); var tenseValue = element.getFeatureTense(Feature.TENSE.ToString()); var formValue = element.getFeature(Feature.FORM.ToString()); var patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL); // base form from baseWord if it exists, otherwise from element var baseForm = getBaseForm(element, baseWord); if (element.getFeatureAsBoolean(Feature.NEGATED.ToString()) || Form.BARE_INFINITIVE.Equals(formValue)) { realised = baseForm; } else if (Form.PRESENT_PARTICIPLE.Equals(formValue)) { realised = element.getFeatureAsString(LexicalFeature.PRESENT_PARTICIPLE); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.PRESENT_PARTICIPLE); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.Equals(patternValue)) { realised = buildDoublePresPartVerb(baseForm); } else { realised = buildRegularPresPartVerb(baseForm); } } } else if (Tense.PAST.Equals(tenseValue) || Form.PAST_PARTICIPLE.Equals(formValue)) { if (Form.PAST_PARTICIPLE.Equals(formValue)) { realised = element.getFeatureAsString(LexicalFeature.PAST_PARTICIPLE); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.PAST_PARTICIPLE); } if (realised == null) { if ("be".equalsIgnoreCase(baseForm)) { realised = "been"; } else if (Inflection.REGULAR_DOUBLE.Equals(patternValue)) { realised = buildDoublePastVerb(baseForm); } else { realised = buildRegularPastVerb(baseForm, numberValue, personValue); } } } else { realised = element.getFeatureAsString(LexicalFeature.PAST); if (realised == null && baseWord != null) { realised = baseWord.getFeatureAsString(LexicalFeature.PAST); } if (realised == null) { if (Inflection.REGULAR_DOUBLE.Equals(patternValue)) { realised = buildDoublePastVerb(baseForm); } else { realised = buildRegularPastVerb(baseForm, numberValue, personValue); } } } } else if ((numberValue == null || NumberAgreement.SINGULAR.Equals(numberValue)) && (personValue == null || Person.THIRD.Equals( personValue)) && (Tense.PRESENT.Equals(tenseValue))) { realised = element.getFeatureAsString(LexicalFeature.PRESENT3S); if (realised == null && baseWord != null && !"be".equalsIgnoreCase(baseForm)) { realised = baseWord.getFeatureAsString(LexicalFeature.PRESENT3S); } if (realised == null) { realised = buildPresent3SVerb(baseForm); } } else { if ("be".equalsIgnoreCase(baseForm)) { if (Person.FIRST.Equals(personValue) && (NumberAgreement.SINGULAR.Equals(numberValue) || numberValue == null)) { realised = "am"; } else { realised = "are"; } } else { realised = baseForm; } } var realisedElement = new StringElement(realised); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(), element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString())); return(realisedElement); }
/** * This method performs the morphology for nouns. * * @param element * the <code>InflectedWordElement</code>. * @param baseWord * the <code>WordElement</code> as created from the lexicon * entry. * @return a <code>StringElement</code> representing the word after * inflection. */ public static StringElement doNounMorphology(InflectedWordElement element, WordElement baseWord) { var realised = new StringBuilder(); // base form from baseWord if it exists, otherwise from element var baseForm = getBaseForm(element, baseWord); if (element.isPlural() && !element.getFeatureAsBoolean(LexicalFeature.PROPER)) { string pluralForm = null; // AG changed: now check if default infl is uncount // if (element.getFeatureAsBoolean(LexicalFeature.NON_COUNT) // ) { // pluralForm = baseForm; var elementDefaultInfl = element.getFeature(LexicalFeature.DEFAULT_INFL); if (elementDefaultInfl != null && Inflection.UNCOUNT.Equals(elementDefaultInfl)) { pluralForm = baseForm; } else { pluralForm = element.getFeatureAsString(LexicalFeature.PLURAL); } if (pluralForm == null && baseWord != null) { // AG changed: now check if default infl is uncount // if (baseWord.getFeatureAsBoolean(LexicalFeature.NON_COUNT) // ) { // pluralForm = baseForm; var baseDefaultInfl = baseWord.getFeatureAsString(LexicalFeature.DEFAULT_INFL); if (baseDefaultInfl != null && baseDefaultInfl.Equals("uncount")) { pluralForm = baseForm; } else { pluralForm = baseWord.getFeatureAsString(LexicalFeature.PLURAL); } } if (pluralForm == null) { var pattern = element.getFeature(LexicalFeature.DEFAULT_INFL); if (Inflection.GRECO_LATIN_REGULAR.Equals(pattern)) { pluralForm = buildGrecoLatinPluralNoun(baseForm); } else { pluralForm = buildRegularPluralNoun(baseForm); } } realised.append(pluralForm); } else { realised.append(baseForm); } checkPossessive(element, realised); var realisedElement = new StringElement(realised.ToString()); realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(), element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString())); return(realisedElement); }