public virtual void nounInflectionalVariantsTest() { WordElement word = lexicon.getWord("sanctum", new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN)); Assert.AreEqual(Inflection.REGULAR, word.getDefaultInflectionalVariant()); // reg plural shouldn't be stored Assert.AreEqual(null, word.getFeature(LexicalFeature.PLURAL)); InflectedWordElement infl = new InflectedWordElement(word); infl.setFeature(Feature.NUMBER, NumberAgreement.PLURAL); string plur = realiser.realise(infl).Realisation; Assert.AreEqual("sanctums", plur); // switch to glreg word.setDefaultInflectionalVariant(Inflection.GRECO_LATIN_REGULAR); infl = new InflectedWordElement(word); infl.setFeature(Feature.NUMBER, NumberAgreement.PLURAL); plur = realiser.realise(infl).Realisation; Assert.AreEqual("sancta", plur); // and back to reg word.setDefaultInflectionalVariant(Inflection.REGULAR); infl = new InflectedWordElement(word); infl.setFeature(Feature.NUMBER, NumberAgreement.PLURAL); plur = realiser.realise(infl).Realisation; Assert.AreEqual("sanctums", plur); }
public virtual void verbInflectionalVariantsTest() { WordElement word = lexicon.getWord("lie", new LexicalCategory(LexicalCategory.LexicalCategoryEnum.VERB)); Assert.AreEqual(Inflection.REGULAR, word.getDefaultInflectionalVariant()); // default past is "lied" InflectedWordElement infl = new InflectedWordElement(word); infl.setFeature(Feature.TENSE, Tense.PAST); string past = realiser.realise(infl).Realisation; Assert.AreEqual("lied", past); // switch to irregular word.setDefaultInflectionalVariant(Inflection.IRREGULAR); infl = new InflectedWordElement(word); infl.setFeature(Feature.TENSE, Tense.PAST); past = realiser.realise(infl).Realisation; Assert.AreEqual("lay", past); // switch back to regular word.setDefaultInflectionalVariant(Inflection.REGULAR); Assert.AreEqual(null, word.getFeature(LexicalFeature.PAST)); infl = new InflectedWordElement(word); infl.setFeature(Feature.TENSE, Tense.PAST); past = realiser.realise(infl).Realisation; Assert.AreEqual("lied", past); }
public virtual void spellingVariantsInNPTest() { WordElement asd = lexicon.getWord("Adams-Stokes disease"); Assert.AreEqual("Adams-Stokes disease", asd.DefaultSpellingVariant); NPPhraseSpec np = factory.createNounPhrase(asd); np.setSpecifier(lexicon.getWord("the")); Assert.AreEqual("the Adams-Stokes disease", realiser.realise(np).Realisation); // change spelling var asd.DefaultSpellingVariant = "Adams Stokes disease"; Assert.AreEqual("Adams Stokes disease", asd.DefaultSpellingVariant); Assert.AreEqual("the Adams Stokes disease", realiser.realise(np).Realisation); //default infl for this word is uncount np.setFeature(Feature.NUMBER, NumberAgreement.PLURAL); Assert.AreEqual("the Adams Stokes disease", realiser.realise(np).Realisation); //change default infl for this word asd.setDefaultInflectionalVariant(Inflection.REGULAR); Assert.AreEqual("the Adams Stokes diseases", realiser.realise(np).Realisation); }
/** * extract verb information from NIH VerbEntry record, and add to a * simplenlg WordElement For now just extract transitive, instransitive, * and/or ditransitive * * @param wordElement * @param verbEntry */ private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry) { if (verbEntry == null) { // should only happen for aux verbs, which have // auxEntry instead of verbEntry in NIH Lex // just flag as transitive and return wordElement.setFeature(LexicalFeature.INTRANSITIVE, false); wordElement.setFeature(LexicalFeature.TRANSITIVE, true); wordElement.setFeature(LexicalFeature.DITRANSITIVE, false); return; } bool intransitiveVerb = verbEntry.GetIntran().Any(); bool transitiveVerb = verbEntry.GetTran().Any() || verbEntry.GetCplxtran().Any(); bool ditransitiveVerb = verbEntry.GetDitran().Any(); wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb); wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb); wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb); // add the inflectional variants List <string> variants = verbEntry.GetVariants(); if (variants.Count > 0) { IList <Inflection> wordVariants = new List <Inflection>(); foreach (string v in variants) { int index = v.IndexOf("|", StringComparison.Ordinal); string code; Inflection?infl; if (index > -1) { code = v.Substring(0, index).ToLower().Trim(); infl = Inflection.REGULAR.getInflCode(code); } else { infl = Inflection.REGULAR.getInflCode(v.ToLower().Trim()); } if (infl != null) { wordElement.addInflectionalVariant((Inflection)infl); wordVariants.Add((Inflection)infl); } } // if the variants include "reg", this is the default, otherwise // just a random pick Inflection defaultVariant = wordVariants.Contains(Inflection.REGULAR) || wordVariants.Count == 0 ? Inflection.REGULAR : wordVariants[0]; // wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants); // wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant); wordElement.setDefaultInflectionalVariant(defaultVariant); } // ignore (for now) other info in record }
/** * extract noun information from NIH NounEntry record, and add to a * simplenlg WordElement For now just extract whether count/non-count and * whether proper or not * * @param wordElement * @param nounEntry */ private void addNounInfo(WordElement wordElement, NounEntry nounEntry) { bool proper = nounEntry.IsProper(); // bool nonCountVariant = false; // bool regVariant = false; // add the inflectional variants List <string> variants = nounEntry.GetVariants(); if (variants.Count > 0) { IList <Inflection> wordVariants = new List <Inflection>(); foreach (string v in variants) { int index = v.IndexOf("|", StringComparison.Ordinal); string code; if (index > -1) { code = v.Substring(0, index).ToLower().Trim(); } else { code = v.ToLower().Trim(); } Inflection?infl = Inflection.REGULAR.getInflCode(code); if (infl != null) { wordVariants.Add((Inflection)infl); wordElement.addInflectionalVariant((Inflection)infl); } } // if the variants include "reg", this is the default, otherwise just a random pick Inflection defaultVariant = wordVariants.Contains(Inflection.REGULAR) || wordVariants.Count == 0 ? Inflection.REGULAR : wordVariants[0]; wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant); wordElement.setDefaultInflectionalVariant(defaultVariant); } // for (String variant : variants) { // if (variant.startsWith("uncount") // || variant.startsWith("groupuncount")) // nonCountVariant = true; // // if (variant.startsWith("reg")) // regVariant = true; // // ignore other variant info // } // lots of words have both "reg" and "unCount", indicating they // can be used in either way. Regard such words as normal, // only flag as nonCount if unambiguous // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant && !regVariant); wordElement.setFeature(LexicalFeature.PROPER, proper); // ignore (for now) other info in record }
/** * Unwrap word element. * * @param wordElement * the word element * @return the nLG element */ private NLGElement UnwrapWordElement(XmlWordElement wordElement) { NLGElement word = null; if (wordElement != null) { if (true.Equals(wordElement.Canned)) { word = factory.createStringElement(wordElement.Base); } else { LexicalCategory lexCat = new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ANY); ElementCategory cat = UnwrapCategory(wordElement.Cat); if (cat != null && cat is LexicalCategory) { lexCat = (LexicalCategory)cat; } // String baseForm = getBaseWord(wordElement); string baseForm = wordElement.Base; if (!ReferenceEquals(baseForm, null)) { word = factory.createWord(baseForm, lexCat); if (word is InflectedWordElement && ((InflectedWordElement)word).BaseWord.BaseForm.Length == 0) { word = null; // cch TESTING } else if (word is WordElement) { WordElement we = (WordElement)word; // Inflection if (wordElement.Var != null) { Enum.TryParse(wordElement.Var.ToString(), out Inflection defaultInflection); we.setDefaultInflectionalVariant(defaultInflection); } // Spelling variant may have been given as base form in xml. // If so, use that variant. if (!Regex.IsMatch(baseForm, "^" + we.BaseForm + "$")) { we.DefaultSpellingVariant = baseForm; } } } } } return(word); }
public virtual void uncountInflectionalVariantTest() { WordElement calc = (WordElement)factory.createWord("calcification", new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN)); NPPhraseSpec theCalc = factory.createNounPhrase("the", calc); theCalc.setFeature(Feature.NUMBER, NumberAgreement.PLURAL); string r1 = realiser.realise(theCalc).Realisation; Assert.AreEqual("the calcifications", r1); calc.setDefaultInflectionalVariant(Inflection.UNCOUNT); NPPhraseSpec theCalc2 = factory.createNounPhrase("the", calc); theCalc2.setFeature(Feature.NUMBER, NumberAgreement.PLURAL); string r2 = realiser.realise(theCalc2).Realisation; Assert.AreEqual("the calcification", r2); }
/** * create a simplenlg WordElement from a Word node in a lexicon XML file * * @param wordNode * @return * @throws XPathUtilException */ private WordElement convertNodeToWord(XmlNode wordNode) { // if this isn't a Word node, ignore it if (!wordNode.Name.Equals(XML_WORD, StringComparison.CurrentCultureIgnoreCase)) { return(null); } // if there is no base, flag an error and return null // String base = XPathUtil.extractValue(wordNode, Constants.XML_BASE); // if (base == null) { // System.out.println("Error in loading XML lexicon: Word with no base"); // return null; // } // create word WordElement word = new WordElement(); IList <Inflection> inflections = new List <Inflection>(); // now copy features XmlNodeList nodes = wordNode.ChildNodes; for (int i = 0; i < nodes.Count; i++) { XmlNode featureNode = nodes.Item(i); if (featureNode.NodeType == XmlNodeType.Element) { string feature = featureNode.Name.Trim(); string value = featureNode.InnerText; if (!ReferenceEquals(value, null)) { value = value.Trim(); } if (ReferenceEquals(feature, null)) { Console.Error.WriteLine("Error in XML lexicon node for " + word.ToString()); break; } if (feature.Equals(XML_BASE, StringComparison.OrdinalIgnoreCase)) { word.BaseForm = value; } else if (feature.Equals(XML_CATEGORY, StringComparison.OrdinalIgnoreCase)) { Enum.TryParse(value.ToUpper(), out LexicalCategory.LexicalCategoryEnum lexcat); word.Category = new LexicalCategory(lexcat); } else if (feature.Equals(XML_ID, StringComparison.OrdinalIgnoreCase)) { word.Id = value; } else if (ReferenceEquals(value, null) || value.Equals("")) { // if this is an infl code, add it to inflections Inflection?infl = Inflection.REGULAR.getInflCode(feature); if (infl != null) { inflections.Add((Inflection)infl); } else { // otherwise assume it's a boolean feature word.setFeature(feature, true); } } else { word.setFeature(feature, value); } } } // if no infl specified, assume regular if (inflections.Count == 0) { inflections.Add(Inflection.REGULAR); } // default inflection code is "reg" if we have it, else random pick form infl codes available Inflection defaultInfl = inflections.Contains(Inflection.REGULAR) ? Inflection.REGULAR : inflections[0]; word.setFeature(LexicalFeature.DEFAULT_INFL, defaultInfl); word.setDefaultInflectionalVariant(defaultInfl); foreach (Inflection infl in inflections) { word.addInflectionalVariant(infl); } // done, return word return(word); }