コード例 #1
0
        /**
         * extract adv information from NIH AdvEntry record, and add to a simplenlg
         * WordElement For now just extract modifier type
         *
         * @param wordElement
         * @param AdvEntry
         */
        private void addAdverbInfo(WordElement wordElement, AdvEntry advEntry)
        {
            bool verbModifier     = false;
            bool sentenceModifier = false;
            bool intensifier      = false;

            List <string> modifications = advEntry.GetModification();

            foreach (string modification in modifications)
            {
                if (modification.StartsWith("verb_modifier", StringComparison.Ordinal))
                {
                    verbModifier = true;
                }
                else if (modification.StartsWith("sentence_modifier", StringComparison.Ordinal))
                {
                    sentenceModifier = true;
                }
                else if (modification.StartsWith("intensifier", StringComparison.Ordinal))
                {
                    intensifier = true;
                }
                // ignore other modification types
            }
            // ignore (for now) other info in record
            wordElement.setFeature(LexicalFeature.VERB_MODIFIER, verbModifier);
            wordElement.setFeature(LexicalFeature.SENTENCE_MODIFIER, sentenceModifier);
            wordElement.setFeature(LexicalFeature.INTENSIFIER, intensifier);
        }
コード例 #2
0
        /**
         * extract adj information from NIH AdjEntry record, and add to a simplenlg
         * WordElement For now just extract position info
         *
         * @param wordElement
         * @param AdjEntry
         */
        private void addAdjectiveInfo(WordElement wordElement, AdjEntry adjEntry)
        {
            bool          qualitativeAdj = false;
            bool          colourAdj      = false;
            bool          classifyingAdj = false;
            bool          predicativeAdj = false;
            List <string> positions      = adjEntry.GetPosition();

            foreach (string position in positions)
            {
                if (position.StartsWith("attrib(1)", StringComparison.Ordinal))
                {
                    qualitativeAdj = true;
                }
                else if (position.StartsWith("attrib(2)", StringComparison.Ordinal))
                {
                    colourAdj = true;
                }
                else if (position.StartsWith("attrib(3)", StringComparison.Ordinal))
                {
                    classifyingAdj = true;
                }
                else if (position.StartsWith("pred", StringComparison.Ordinal))
                {
                    predicativeAdj = true;
                }
                // ignore other positions
            }
            // ignore (for now) other info in record
            wordElement.setFeature(LexicalFeature.QUALITATIVE, qualitativeAdj);
            wordElement.setFeature(LexicalFeature.COLOUR, colourAdj);
            wordElement.setFeature(LexicalFeature.CLASSIFYING, classifyingAdj);
            wordElement.setFeature(LexicalFeature.PREDICATIVE, predicativeAdj);
        }
コード例 #3
0
        /**
         * extract noun information from NIH NounEntry record, and add to a
         * simplenlg WordElement For now just extract whether count/non-count and
         * whether proper or not
         *
         * @param wordElement
         * @param nounEntry
         */
        private void addNounInfo(WordElement wordElement, NounEntry nounEntry)
        {
            bool proper = nounEntry.IsProper();
            // bool nonCountVariant = false;
            // bool regVariant = false;

            // add the inflectional variants
            List <string> variants = nounEntry.GetVariants();

            if (variants.Count > 0)
            {
                IList <Inflection> wordVariants = new List <Inflection>();

                foreach (string v in variants)
                {
                    int    index = v.IndexOf("|", StringComparison.Ordinal);
                    string code;

                    if (index > -1)
                    {
                        code = v.Substring(0, index).ToLower().Trim();
                    }
                    else
                    {
                        code = v.ToLower().Trim();
                    }

                    Inflection?infl = Inflection.REGULAR.getInflCode(code);

                    if (infl != null)
                    {
                        wordVariants.Add((Inflection)infl);
                        wordElement.addInflectionalVariant((Inflection)infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise just a random pick
                Inflection defaultVariant = wordVariants.Contains(Inflection.REGULAR) || wordVariants.Count == 0 ? Inflection.REGULAR : wordVariants[0];
                wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // for (String variant : variants) {
            // if (variant.startsWith("uncount")
            // || variant.startsWith("groupuncount"))
            // nonCountVariant = true;
            //
            // if (variant.startsWith("reg"))
            // regVariant = true;
            // // ignore other variant info
            // }

            // lots of words have both "reg" and "unCount", indicating they
            // can be used in either way. Regard such words as normal,
            // only flag as nonCount if unambiguous
            // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant && !regVariant);
            wordElement.setFeature(LexicalFeature.PROPER, proper);
            // ignore (for now) other info in record
        }
コード例 #4
0
        ///**
        // * Extract info about the spelling variants of a word from an NIH record,
        // * and add to the simplenlg Woordelement.
        // *
        // * <P>
        // * Spelling variants are represented as lists of strings, retrievable via
        // * {@link LexicalFeature#SPELL_VARS}
        // *
        // * @param wordElement
        // * @param record
        // */
        private void addSpellingVariants(WordElement wordElement, LexRecord record)
        {
            List <string> vars = record.GetSpellingVars();

            if (vars != null && vars.Count > 0)
            {
                wordElement.setFeature(LexicalFeature.SPELL_VARS, vars);
            }

            // we set the default spelling var as the baseForm
            wordElement.setFeature(LexicalFeature.DEFAULT_SPELL, wordElement.BaseForm);
        }
コード例 #5
0
        /**
         * extract verb information from NIH VerbEntry record, and add to a
         * simplenlg WordElement For now just extract transitive, instransitive,
         * and/or ditransitive
         *
         * @param wordElement
         * @param verbEntry
         */
        private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry)
        {
            if (verbEntry == null)
            { // should only happen for aux verbs, which have
              // auxEntry instead of verbEntry in NIH Lex
              // just flag as transitive and return
                wordElement.setFeature(LexicalFeature.INTRANSITIVE, false);
                wordElement.setFeature(LexicalFeature.TRANSITIVE, true);
                wordElement.setFeature(LexicalFeature.DITRANSITIVE, false);
                return;
            }

            bool intransitiveVerb = verbEntry.GetIntran().Any();
            bool transitiveVerb   = verbEntry.GetTran().Any() || verbEntry.GetCplxtran().Any();
            bool ditransitiveVerb = verbEntry.GetDitran().Any();

            wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb);
            wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb);
            wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb);

            // add the inflectional variants
            List <string> variants = verbEntry.GetVariants();

            if (variants.Count > 0)
            {
                IList <Inflection> wordVariants = new List <Inflection>();

                foreach (string v in variants)
                {
                    int        index = v.IndexOf("|", StringComparison.Ordinal);
                    string     code;
                    Inflection?infl;

                    if (index > -1)
                    {
                        code = v.Substring(0, index).ToLower().Trim();
                        infl = Inflection.REGULAR.getInflCode(code);
                    }
                    else
                    {
                        infl = Inflection.REGULAR.getInflCode(v.ToLower().Trim());
                    }

                    if (infl != null)
                    {
                        wordElement.addInflectionalVariant((Inflection)infl);
                        wordVariants.Add((Inflection)infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise
                // just a random pick
                Inflection defaultVariant = wordVariants.Contains(Inflection.REGULAR) || wordVariants.Count == 0 ? Inflection.REGULAR : wordVariants[0];
                //			wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants);
                //			wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // ignore (for now) other info in record
        }
コード例 #6
0
        ///**
        // * convenience method to test that a list is not null and not empty
        // *
        // * @param list
        // * @return
        // */
        //private bool notEmpty<T1>(IList<T1> list)
        //{
        //    return list != null && list.Count > 0;
        //}

        /**
         * extract information about acronyms from NIH record, and add to a
         * simplenlg WordElement.
         *
         * <P>
         * Acronyms are represented as lists of word elements. Any acronym will have
         * a list of full form word elements, retrievable via
         * {@link LexicalFeature#ACRONYM_OF}
         *
         * @param wordElement
         * @param record
         */
        private void addAcronymInfo(WordElement wordElement, LexRecord record)
        {
            // NB: the acronyms are actually the full forms of which the word is an
            // acronym
            List <string> acronyms = record.GetAcronyms();

            if (acronyms.Count > 0)
            {
                // the list of full forms of which this word is an acronym
                List <NLGElement> acronymOf = wordElement.getFeatureAsElementList(LexicalFeature.ACRONYM_OF);

                // keep all acronym full forms and set them up as wordElements
                foreach (string fullForm in acronyms)
                {
                    if (fullForm.Contains("|"))
                    {
                        // get the acronym id
                        string acronymID = fullForm.SubstringSpecial(fullForm.IndexOf("|", StringComparison.Ordinal) + 1, fullForm.Length);
                        // create the full form element
                        WordElement fullFormWE = getWordByID(acronymID);

                        if (fullForm != null)
                        {
                            // add as full form of this acronym
                            acronymOf.Add(fullFormWE);

                            // List<NLGElement> fullFormAcronyms = fullFormWE
                            // .getFeatureAsElementList(LexicalFeature.ACRONYMS);
                            // fullFormAcronyms.add(wordElement);
                            // fullFormWE.setFeature(LexicalFeature.ACRONYMS,
                            // fullFormAcronyms);
                        }
                    }
                }

                // set all the full forms for this acronym
                wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronymOf);
            }

            // if (!acronyms.isEmpty()) {
            //
            // String acronym = acronyms.get(0);
            // // remove anything after a |, this will be an NIH ID
            // if (acronym.contains("|"))
            // acronym = acronym.substring(0, acronym.indexOf("|"));
            // wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronym);
            // }
        }
コード例 #7
0
        /**
         * make a WordElement from a lexical record. Currently just specifies basic
         * params and inflections Should do more in the future!
         *
         * @param record
         * @return
         */
        private WordElement makeWord(LexRecord record) // LexRecord
        {
            // get basic data
            String          baseForm = record.GetBase();
            LexicalCategory category = record.GetSimpleNLGCategory(record);
            String          id       = record.GetEui();

            // create word class
            WordElement wordElement = new WordElement(baseForm, category, id);

            // now add type information
            switch (category.GetLexicalCategory())
            {
            case LexicalCategory.LexicalCategoryEnum.ADJECTIVE:
                addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry());
                break;

            case LexicalCategory.LexicalCategoryEnum.ADVERB:
                addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry());
                break;

            case LexicalCategory.LexicalCategoryEnum.NOUN:
                addNounInfo(wordElement, record.GetCatEntry().GetNounEntry());
                break;

            case LexicalCategory.LexicalCategoryEnum.VERB:
                addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry());
                break;
                // ignore closed class words
            }

            Inflection?defaultInfl = wordElement.getDefaultInflectionalVariant();

            // now add inflected forms
            // if (keepStandardInflections || !standardInflections(record,
            // category)) {
            foreach (InflVar inflection in record.GetInflVarsAndAgreements().GetInflValues())
            {
                String simplenlgInflection = getSimplenlgInflection(inflection
                                                                    .GetInflection());

                if (simplenlgInflection != null)
                {
                    String     inflectedForm = inflection.GetVar();
                    Inflection?inflType      = Inflection.REGULAR.getInflCode(inflection.GetType());

                    // store all inflectional variants, except for regular ones
                    // unless explicitly set
                    if (inflType != null &&
                        !(Inflection.REGULAR == inflType && !keepStandardInflections))
                    {
                        wordElement.addInflectionalVariant((Inflection)inflType,
                                                           simplenlgInflection, inflectedForm);
                    }

                    // if the infl variant is the default, also set this feature on
                    // the word
                    if (defaultInfl == null ||
                        (defaultInfl.Equals(inflType) && !(Inflection.REGULAR.Equals(inflType) && !keepStandardInflections)))
                    {
                        wordElement.setFeature(simplenlgInflection, inflectedForm);
                    }

                    // wordElement
                    // .setFeature(simplenlgInflection, inflection.GetVar());
                }
            }
            // }

            // add acronym info
            addAcronymInfo(wordElement, record);

            // now add spelling variants
            addSpellingVariants(wordElement, record);

            return(wordElement);
        }
コード例 #8
0
        /**
         * create a simplenlg WordElement from a Word node in a lexicon XML file
         *
         * @param wordNode
         * @return
         * @throws XPathUtilException
         */
        private WordElement convertNodeToWord(XmlNode wordNode)
        {
            // if this isn't a Word node, ignore it
            if (!wordNode.Name.Equals(XML_WORD, StringComparison.CurrentCultureIgnoreCase))
            {
                return(null);
            }

            // if there is no base, flag an error and return null
            // String base = XPathUtil.extractValue(wordNode, Constants.XML_BASE);
            // if (base == null) {
            // System.out.println("Error in loading XML lexicon: Word with no base");
            // return null;
            // }

            // create word
            WordElement        word        = new WordElement();
            IList <Inflection> inflections = new List <Inflection>();

            // now copy features
            XmlNodeList nodes = wordNode.ChildNodes;

            for (int i = 0; i < nodes.Count; i++)
            {
                XmlNode featureNode = nodes.Item(i);

                if (featureNode.NodeType == XmlNodeType.Element)
                {
                    string feature = featureNode.Name.Trim();
                    string value   = featureNode.InnerText;

                    if (!ReferenceEquals(value, null))
                    {
                        value = value.Trim();
                    }

                    if (ReferenceEquals(feature, null))
                    {
                        Console.Error.WriteLine("Error in XML lexicon node for " + word.ToString());
                        break;
                    }

                    if (feature.Equals(XML_BASE, StringComparison.OrdinalIgnoreCase))
                    {
                        word.BaseForm = value;
                    }
                    else if (feature.Equals(XML_CATEGORY, StringComparison.OrdinalIgnoreCase))
                    {
                        Enum.TryParse(value.ToUpper(), out LexicalCategory.LexicalCategoryEnum lexcat);
                        word.Category = new LexicalCategory(lexcat);
                    }
                    else if (feature.Equals(XML_ID, StringComparison.OrdinalIgnoreCase))
                    {
                        word.Id = value;
                    }

                    else if (ReferenceEquals(value, null) || value.Equals(""))
                    {
                        // if this is an infl code, add it to inflections
                        Inflection?infl = Inflection.REGULAR.getInflCode(feature);

                        if (infl != null)
                        {
                            inflections.Add((Inflection)infl);
                        }
                        else
                        {
                            // otherwise assume it's a boolean feature
                            word.setFeature(feature, true);
                        }
                    }
                    else
                    {
                        word.setFeature(feature, value);
                    }
                }
            }

            // if no infl specified, assume regular
            if (inflections.Count == 0)
            {
                inflections.Add(Inflection.REGULAR);
            }

            // default inflection code is "reg" if we have it, else random pick form infl codes available
            Inflection defaultInfl = inflections.Contains(Inflection.REGULAR) ? Inflection.REGULAR : inflections[0];

            word.setFeature(LexicalFeature.DEFAULT_INFL, defaultInfl);
            word.setDefaultInflectionalVariant(defaultInfl);

            foreach (Inflection infl in inflections)
            {
                word.addInflectionalVariant(infl);
            }

            // done, return word
            return(word);
        }