Example #1
0
        /**
         * create a simplenlg WordElement from a Word node in a lexicon XML file
         *
         * @param wordNode
         * @return
         * @throws XPathUtilException
         */

        private WordElement convertNodeToWord(XmlNode wordNode)
        {
            // if this isn't a Word node, ignore it
            if (!wordNode.LocalName.equalsIgnoreCase(XML_WORD))
            {
                return(null);
            }

            // // if there is no base, flag an error and return null
            // string base = XPathUtil.extractValue(wordNode, Constants.XML_BASE);
            // if (base == null) {
            // Console.WriteLine("Error in loading XML lexicon: Word with no base");
            // return null;
            // }

            // create word
            var word        = new WordElement();
            var inflections = new List <Inflection>();

            // now copy features
            var nodes = wordNode.SelectNodes("*");

            for (var i = 0; i < nodes.Count; i++)
            {
                var featureNode = nodes[i];

                if (featureNode.NodeType == XmlNodeType.Element)
                {
                    var feature = featureNode.LocalName.trim();
                    var value   = featureNode.InnerText;

                    if (value != null)
                    {
                        value = value.trim();
                    }

                    if (feature == null)
                    {
                        Debug.WriteLine("Error in XML lexicon node for " + word);
                        break;
                    }

                    if (feature.equalsIgnoreCase(XML_BASE))
                    {
                        word.setBaseForm(value);
                    }
                    else if (feature.equalsIgnoreCase(XML_CATEGORY))
                    {
                        var c = LexicalCategoryExtensions.valueOf(value.toUpperCase());
                        word.setCategory(c);
                    }
                    else if (feature.equalsIgnoreCase(XML_ID))
                    {
                        word.setId(value);
                    }

                    else if (value == null || value.Equals(""))
                    {
                        // if this is an infl code, add it to inflections
                        Tuple <bool, Inflection> infl = InflectionExtensions.getInflCode(feature);

                        if (infl.Item1)
                        {
                            inflections.Add(infl.Item2);
                        }
                        else
                        {
                            word.setFeature(feature, true);
                        }
                    }
                    else
                    {
                        word.setFeature(feature, value);
                    }
                }
            }

            // if no infl specified, assume regular
            if (inflections.isEmpty())
            {
                inflections.Add(Inflection.REGULAR);
            }

            // default inflection code is "reg" if we have it, else random pick form
            // infl codes available
            var defaultInfl = inflections.Contains(Inflection.REGULAR)
                ? Inflection.REGULAR
                : inflections[0];

            word.setFeature(LexicalFeature.DEFAULT_INFL, defaultInfl);
            word.setDefaultInflectionalVariant(defaultInfl);

            foreach (var infl in inflections)
            {
                word.addInflectionalVariant(infl);
            }

            // done, return word
            return(word);
        }
Example #2
0
        /**
         * extract noun information from NIH NounEntry record, and add to a
         * simplenlg WordElement For now just extract whether count/non-count and
         * whether proper or not
         *
         * @param wordElement
         * @param nounEntry
         */

        private void addNounInfo(WordElement wordElement, NounEntry nounEntry)
        {
            bool proper = nounEntry.IsProper();
            // bool nonCountVariant = false;
            // bool regVariant = false;

            // add the inflectional variants
            List <string> variants = nounEntry.GetVariants();

            if (!variants.isEmpty())
            {
                var wordVariants = new List <Inflection>();

                foreach (var v in variants)
                {
                    int    index = v.indexOf("|");
                    string code;

                    if (index > -1)
                    {
                        code = v.substring(0, index).toLowerCase().trim();
                    }
                    else
                    {
                        code = v.toLowerCase().trim();
                    }

                    Inflection infl = Inflection.getInflCode(code);

                    if (infl != null)
                    {
                        wordVariants.add(infl);
                        wordElement.addInflectionalVariant(infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise
                // just a random pick
                Inflection defaultVariant = wordVariants
                                            .Contains(Inflection.REGULAR) ||
                                            wordVariants.isEmpty()
                    ? Inflection.REGULAR
                    : wordVariants.get(0);
                wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // for (string variant : variants) {
            // if (variant.startsWith("uncount")
            // || variant.startsWith("groupuncount"))
            // nonCountVariant = true;
            //
            // if (variant.startsWith("reg"))
            // regVariant = true;
            // // ignore other variant info
            // }

            // lots of words have both "reg" and "unCount", indicating they
            // can be used in either way. Regard such words as normal,
            // only flag as nonCount if unambiguous
            // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant
            // && !regVariant);
            wordElement.setFeature(LexicalFeature.PROPER, proper);
            // ignore (for now) other info in record

            return;
        }
Example #3
0
        /**
         * extract verb information from NIH VerbEntry record, and add to a
         * simplenlg WordElement For now just extract transitive, instransitive,
         * and/or ditransitive
         *
         * @param wordElement
         * @param verbEntry
         */

        private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry)
        {
            if (verbEntry == null)
            {
                // should only happen for aux verbs, which have
                // auxEntry instead of verbEntry in NIH Lex
                // just flag as transitive and return
                wordElement.setFeature(LexicalFeature.INTRANSITIVE, false);
                wordElement.setFeature(LexicalFeature.TRANSITIVE, true);
                wordElement.setFeature(LexicalFeature.DITRANSITIVE, false);
                return;
            }

            bool intransitiveVerb = notEmpty(verbEntry.GetIntran());
            bool transitiveVerb   = notEmpty(verbEntry.GetTran()) ||
                                    notEmpty(verbEntry.GetCplxtran());
            bool ditransitiveVerb = notEmpty(verbEntry.GetDitran());

            wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb);
            wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb);
            wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb);

            // add the inflectional variants
            List <string> variants = verbEntry.GetVariants();

            if (!variants.isEmpty())
            {
                var wordVariants = new List <Inflection>();

                foreach (var v in variants)
                {
                    int        index = v.indexOf("|");
                    string     code;
                    Inflection infl;

                    if (index > -1)
                    {
                        code = v.substring(0, index).toLowerCase().trim();
                        infl = Inflection.getInflCode(code);
                    }
                    else
                    {
                        infl = Inflection.getInflCode(v.toLowerCase().trim());
                    }

                    if (infl != null)
                    {
                        wordElement.addInflectionalVariant(infl);
                        wordVariants.add(infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise
                // just a random pick
                Inflection defaultVariant = wordVariants
                                            .contains(Inflection.REGULAR) ||
                                            wordVariants.isEmpty()
                    ? Inflection.REGULAR
                    : wordVariants.get(0);
//			wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants);
//			wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // ignore (for now) other info in record
            return;
        }
Example #4
0
        /**
         * make a WordElement from a lexical record. Currently just specifies basic
         * params and inflections Should do more in the future!
         *
         * @param record
         * @return
         */

        private WordElement makeWord(LexRecord record)
        {
            // get basic data
            string           baseForm = record.GetBase();
            ILexicalCategory category = getSimplenlgCategory(record);
            string           id       = record.GetEui();

            // create word class
            var wordElement = new WordElement(baseForm, (LexicalCategory)category, id);

            // now add type information
            switch (category.lexType)
            {
            case LexicalCategoryEnum.ADJECTIVE:
                addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry());
                break;

            case LexicalCategoryEnum.ADVERB:
                addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry());
                break;

            case LexicalCategoryEnum.NOUN:
                addNounInfo(wordElement, record.GetCatEntry().GetNounEntry());
                break;

            case LexicalCategoryEnum.VERB:
                addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry());
                break;
                // ignore closed class words
            }

            var defaultInfl = (Inflection)wordElement
                              .getDefaultInflectionalVariant();

            // now add inflected forms
            // if (keepStandardInflections || !standardInflections(record,
            // category)) {
            foreach (InflVar inflection in record.GetInflVarsAndAgreements()
                     .GetInflValues())
            {
                string simplenlgInflection = getSimplenlgInflection(inflection
                                                                    .GetInflection());

                if (simplenlgInflection != null)
                {
                    string     inflectedForm = inflection.GetVar();
                    Inflection inflType      = Inflection.getInflCode(inflection
                                                                      .GetType());

                    // store all inflectional variants, except for regular ones
                    // unless explicitly set
                    if (inflType != null &&
                        !(Inflection.REGULAR.Equals(inflType) && !this.keepStandardInflections))
                    {
                        wordElement.addInflectionalVariant(inflType,
                                                           simplenlgInflection, inflectedForm);
                    }

                    // if the infl variant is the default, also set this feature on
                    // the word
                    if (defaultInfl == null ||
                        (defaultInfl.Equals(inflType) && !(Inflection.REGULAR
                                                           .Equals(inflType) && !this.keepStandardInflections)))
                    {
                        wordElement.setFeature(simplenlgInflection, inflectedForm);
                    }

                    // wordElement
                    // .setFeature(simplenlgInflection, inflection.GetVar());
                }
            }
            // }

            // add acronym info
            addAcronymInfo(wordElement, record);

            // now add spelling variants
            addSpellingVariants(wordElement, record);

            return(wordElement);
        }