/** * create a simplenlg WordElement from a Word node in a lexicon XML file * * @param wordNode * @return * @throws XPathUtilException */ private WordElement convertNodeToWord(XmlNode wordNode) { // if this isn't a Word node, ignore it if (!wordNode.LocalName.equalsIgnoreCase(XML_WORD)) { return(null); } // // if there is no base, flag an error and return null // string base = XPathUtil.extractValue(wordNode, Constants.XML_BASE); // if (base == null) { // Console.WriteLine("Error in loading XML lexicon: Word with no base"); // return null; // } // create word var word = new WordElement(); var inflections = new List <Inflection>(); // now copy features var nodes = wordNode.SelectNodes("*"); for (var i = 0; i < nodes.Count; i++) { var featureNode = nodes[i]; if (featureNode.NodeType == XmlNodeType.Element) { var feature = featureNode.LocalName.trim(); var value = featureNode.InnerText; if (value != null) { value = value.trim(); } if (feature == null) { Debug.WriteLine("Error in XML lexicon node for " + word); break; } if (feature.equalsIgnoreCase(XML_BASE)) { word.setBaseForm(value); } else if (feature.equalsIgnoreCase(XML_CATEGORY)) { var c = LexicalCategoryExtensions.valueOf(value.toUpperCase()); word.setCategory(c); } else if (feature.equalsIgnoreCase(XML_ID)) { word.setId(value); } else if (value == null || value.Equals("")) { // if this is an infl code, add it to inflections Tuple <bool, Inflection> infl = InflectionExtensions.getInflCode(feature); if (infl.Item1) { inflections.Add(infl.Item2); } else { word.setFeature(feature, true); } } else { word.setFeature(feature, value); } } } // if no infl specified, assume regular if (inflections.isEmpty()) { inflections.Add(Inflection.REGULAR); } // default inflection code is "reg" if we have it, else random pick form // infl codes available var defaultInfl = inflections.Contains(Inflection.REGULAR) ? Inflection.REGULAR : inflections[0]; word.setFeature(LexicalFeature.DEFAULT_INFL, defaultInfl); word.setDefaultInflectionalVariant(defaultInfl); foreach (var infl in inflections) { word.addInflectionalVariant(infl); } // done, return word return(word); }
/** * extract noun information from NIH NounEntry record, and add to a * simplenlg WordElement For now just extract whether count/non-count and * whether proper or not * * @param wordElement * @param nounEntry */ private void addNounInfo(WordElement wordElement, NounEntry nounEntry) { bool proper = nounEntry.IsProper(); // bool nonCountVariant = false; // bool regVariant = false; // add the inflectional variants List <string> variants = nounEntry.GetVariants(); if (!variants.isEmpty()) { var wordVariants = new List <Inflection>(); foreach (var v in variants) { int index = v.indexOf("|"); string code; if (index > -1) { code = v.substring(0, index).toLowerCase().trim(); } else { code = v.toLowerCase().trim(); } Inflection infl = Inflection.getInflCode(code); if (infl != null) { wordVariants.add(infl); wordElement.addInflectionalVariant(infl); } } // if the variants include "reg", this is the default, otherwise // just a random pick Inflection defaultVariant = wordVariants .Contains(Inflection.REGULAR) || wordVariants.isEmpty() ? Inflection.REGULAR : wordVariants.get(0); wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant); wordElement.setDefaultInflectionalVariant(defaultVariant); } // for (string variant : variants) { // if (variant.startsWith("uncount") // || variant.startsWith("groupuncount")) // nonCountVariant = true; // // if (variant.startsWith("reg")) // regVariant = true; // // ignore other variant info // } // lots of words have both "reg" and "unCount", indicating they // can be used in either way. Regard such words as normal, // only flag as nonCount if unambiguous // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant // && !regVariant); wordElement.setFeature(LexicalFeature.PROPER, proper); // ignore (for now) other info in record return; }
/** * extract verb information from NIH VerbEntry record, and add to a * simplenlg WordElement For now just extract transitive, instransitive, * and/or ditransitive * * @param wordElement * @param verbEntry */ private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry) { if (verbEntry == null) { // should only happen for aux verbs, which have // auxEntry instead of verbEntry in NIH Lex // just flag as transitive and return wordElement.setFeature(LexicalFeature.INTRANSITIVE, false); wordElement.setFeature(LexicalFeature.TRANSITIVE, true); wordElement.setFeature(LexicalFeature.DITRANSITIVE, false); return; } bool intransitiveVerb = notEmpty(verbEntry.GetIntran()); bool transitiveVerb = notEmpty(verbEntry.GetTran()) || notEmpty(verbEntry.GetCplxtran()); bool ditransitiveVerb = notEmpty(verbEntry.GetDitran()); wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb); wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb); wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb); // add the inflectional variants List <string> variants = verbEntry.GetVariants(); if (!variants.isEmpty()) { var wordVariants = new List <Inflection>(); foreach (var v in variants) { int index = v.indexOf("|"); string code; Inflection infl; if (index > -1) { code = v.substring(0, index).toLowerCase().trim(); infl = Inflection.getInflCode(code); } else { infl = Inflection.getInflCode(v.toLowerCase().trim()); } if (infl != null) { wordElement.addInflectionalVariant(infl); wordVariants.add(infl); } } // if the variants include "reg", this is the default, otherwise // just a random pick Inflection defaultVariant = wordVariants .contains(Inflection.REGULAR) || wordVariants.isEmpty() ? Inflection.REGULAR : wordVariants.get(0); // wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants); // wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant); wordElement.setDefaultInflectionalVariant(defaultVariant); } // ignore (for now) other info in record return; }
/** * make a WordElement from a lexical record. Currently just specifies basic * params and inflections Should do more in the future! * * @param record * @return */ private WordElement makeWord(LexRecord record) { // get basic data string baseForm = record.GetBase(); ILexicalCategory category = getSimplenlgCategory(record); string id = record.GetEui(); // create word class var wordElement = new WordElement(baseForm, (LexicalCategory)category, id); // now add type information switch (category.lexType) { case LexicalCategoryEnum.ADJECTIVE: addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry()); break; case LexicalCategoryEnum.ADVERB: addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry()); break; case LexicalCategoryEnum.NOUN: addNounInfo(wordElement, record.GetCatEntry().GetNounEntry()); break; case LexicalCategoryEnum.VERB: addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry()); break; // ignore closed class words } var defaultInfl = (Inflection)wordElement .getDefaultInflectionalVariant(); // now add inflected forms // if (keepStandardInflections || !standardInflections(record, // category)) { foreach (InflVar inflection in record.GetInflVarsAndAgreements() .GetInflValues()) { string simplenlgInflection = getSimplenlgInflection(inflection .GetInflection()); if (simplenlgInflection != null) { string inflectedForm = inflection.GetVar(); Inflection inflType = Inflection.getInflCode(inflection .GetType()); // store all inflectional variants, except for regular ones // unless explicitly set if (inflType != null && !(Inflection.REGULAR.Equals(inflType) && !this.keepStandardInflections)) { wordElement.addInflectionalVariant(inflType, simplenlgInflection, inflectedForm); } // if the infl variant is the default, also set this feature on // the word if (defaultInfl == null || (defaultInfl.Equals(inflType) && !(Inflection.REGULAR .Equals(inflType) && !this.keepStandardInflections))) { wordElement.setFeature(simplenlgInflection, inflectedForm); } // wordElement // .setFeature(simplenlgInflection, inflection.GetVar()); } } // } // add acronym info addAcronymInfo(wordElement, record); // now add spelling variants addSpellingVariants(wordElement, record); return(wordElement); }