Esempio n. 1
0
        /**
         * extract adj information from NIH AdjEntry record, and add to a simplenlg
         * WordElement For now just extract position info
         *
         * @param wordElement
         * @param AdjEntry
         */

        private void addAdjectiveInfo(WordElement wordElement, AdjEntry adjEntry)
        {
            var           qualitativeAdj = false;
            var           colourAdj      = false;
            var           classifyingAdj = false;
            var           predicativeAdj = false;
            List <string> positions      = adjEntry.GetPosition();

            foreach (var position in positions)
            {
                if (position.startsWith("attrib(1)"))
                {
                    qualitativeAdj = true;
                }
                else if (position.startsWith("attrib(2)"))
                {
                    colourAdj = true;
                }
                else if (position.startsWith("attrib(3)"))
                {
                    classifyingAdj = true;
                }
                else if (position.startsWith("pred"))
                {
                    predicativeAdj = true;
                }
                // ignore other positions
            }
            // ignore (for now) other info in record
            wordElement.setFeature(LexicalFeature.QUALITATIVE, qualitativeAdj);
            wordElement.setFeature(LexicalFeature.COLOUR, colourAdj);
            wordElement.setFeature(LexicalFeature.CLASSIFYING, classifyingAdj);
            wordElement.setFeature(LexicalFeature.PREDICATIVE, predicativeAdj);
            return;
        }
Esempio n. 2
0
        /**
         * Creates a new element representing a word. If the word passed is already
         * an <code>NLGElement</code> then that is returned unchanged. If a
         * <code>string</code> is passed as the word then the factory will look up
         * the <code>Lexicon</code> if one exists and use the details found to
         * create a new <code>WordElement</code>.
         *
         * @param word
         *            the base word for the new element. This can be a
         *            <code>NLGElement</code>, which is returned unchanged, or a
         *            <code>string</code>, which is used to construct a new
         *            <code>WordElement</code>.
         * @param category
         *            the <code>LexicalCategory</code> for the word.
         *
         * @return an <code>NLGElement</code> representing the word.
         */

        public WordElement createWord(object word, ILexicalCategory category)
        {
            WordElement wordElement = null;

            if (word is WordElement)
            {
                wordElement = (WordElement)word;
            }
            else if (word is string && this.lexicon != null)
            {
                // AG: change: should create a WordElement, not an
                // InflectedWordElement
                // wordElement = new InflectedWordElement(
                // (string) word, category);
                // if (this.lexicon != null) {
                // doLexiconLookUp(category, (string) word, wordElement);
                // }
                // wordElement = lexicon.getWord((string) word, category);
                wordElement = lexicon.lookupWord((string)word, category);
                if (PRONOUNS.Contains((string)word))
                {
                    setPronounFeatures(wordElement, (string)word);
                }
            }

            return(wordElement);
        }
Esempio n. 3
0
        /**
         * add word to internal indices
         *
         * @param word
         */

        private void IndexWord(WordElement word)
        {
            // first index by base form
            var basef = word.getBaseForm();

            // shouldn't really need is, as all words have base forms
            if (basef != null)
            {
                updateIndex(word, basef, indexByBase);
            }

            // now index by ID, which should be unique (if present)
            var id = word.getId();

            if (id != null)
            {
                if (indexByID.ContainsKey(id))
                {
                    Console.WriteLine($"Lexicon error: ID {id} occurs more than once");
                }
                indexByID.Add(id, word);
            }

            // now index by variant
            foreach (var variant in getVariants(word))
            {
                updateIndex(word, variant, indexByVariant);
            }

            // done
        }
Esempio n. 4
0
        /**
         * extract adv information from NIH AdvEntry record, and add to a simplenlg
         * WordElement For now just extract modifier type
         *
         * @param wordElement
         * @param AdvEntry
         */

        private void addAdverbInfo(WordElement wordElement, AdvEntry advEntry)
        {
            var verbModifier     = false;
            var sentenceModifier = false;
            var intensifier      = false;

            List <string> modifications = advEntry.GetModification();

            foreach (var modification in modifications)
            {
                if (modification.startsWith("verb_modifier"))
                {
                    verbModifier = true;
                }
                else if (modification.startsWith("sentence_modifier"))
                {
                    sentenceModifier = true;
                }
                else if (modification.startsWith("intensifier"))
                {
                    intensifier = true;
                }
                // ignore other modification types
            }
            // ignore (for now) other info in record
            wordElement.setFeature(LexicalFeature.VERB_MODIFIER, verbModifier);
            wordElement.setFeature(LexicalFeature.SENTENCE_MODIFIER,
                                   sentenceModifier);
            wordElement.setFeature(LexicalFeature.INTENSIFIER, intensifier);
            return;
        }
Esempio n. 5
0
        /**
         * quick-and-dirty routine for getting morph variants should be replaced by
         * something better!
         *
         * @param word
         * @return
         */

        public HashSet <string> getVariants(WordElement word)
        {
            var variants = new HashSet <string>();

            variants.Add(word.getBaseForm());
            var category = word.getCategory();

            if (category is ILexicalCategory)
            {
                switch (category.enumType)
                {
                case (int)LexicalCategoryEnum.NOUN:
                    variants.add(getVariant(word, LexicalFeature.PLURAL, "s"));
                    break;

                case (int)LexicalCategoryEnum.ADJECTIVE:
                    variants
                    .add(getVariant(word, LexicalFeature.COMPARATIVE, "er"));
                    variants
                    .add(getVariant(word, LexicalFeature.SUPERLATIVE, "est"));
                    break;

                case (int)LexicalCategoryEnum.VERB:
                    variants.add(getVariant(word, LexicalFeature.PRESENT3S, "s"));
                    variants.add(getVariant(word, LexicalFeature.PAST, "ed"));
                    variants.add(getVariant(word, LexicalFeature.PAST_PARTICIPLE,
                                            "ed"));
                    variants.add(getVariant(word,
                                            LexicalFeature.PRESENT_PARTICIPLE, "ing"));
                    break;
                }
            }
            return(variants);
        }
Esempio n. 6
0
        public WordElement CopyWithoutSpec()
        {
            WordElement result = Copy();

            result.Base = null;
            return(result);
        }
Esempio n. 7
0
        /**
         * convenience method to update an index
         *
         * @param word
         * @param base
         * @param index
         */

        private void updateIndex(WordElement word, string basef,
                                 Dictionary <string, List <WordElement> > index)
        {
            if (!index.ContainsKey(basef))
            {
                index.Add(basef, new List <WordElement>());
            }
            index[basef].Add(word);
        }
Esempio n. 8
0
        /**
         * creates a duplicate WordElement from an existing WordElement
         *
         * @param currentWord
         *            - An existing WordElement
         */

        public WordElement(WordElement currentWord)
        {
            baseForm = currentWord.getBaseForm();
            setCategory(currentWord.getCategory());
            id          = currentWord.getId();
            inflVars    = currentWord.getInflectionalVariants();
            defaultInfl = (Inflection)currentWord.getDefaultInflectionalVariant();
            setFeatures(currentWord);
        }
Esempio n. 9
0
        // note that addFrontModifier, addPostModifier, addPreModifier are inherited from PhraseElement
        // likewise getFrontModifiers, getPostModifiers, getPreModifiers


        /** Add a modifier to a verb phrase
         * Use heuristics to decide where it goes
         * @param modifier
         */

        public override void addModifier(object modifier)
        {
            // adverb is preModifier
            // string which is one lexicographic word is looked up in lexicon,
            // if it is an adverb than it becomes a preModifier
            // Everything else is postModifier

            if (modifier == null)
            {
                return;
            }

            // get modifier as NLGElement if possible
            INLGElement modifierElement = null;

            if (modifier is INLGElement)
            {
                modifierElement = (INLGElement)modifier;
            }
            else if (modifier is string)
            {
                var modifierString = (string)modifier;
                if (modifierString.length() > 0 && !modifierString.contains(" "))
                {
                    modifierElement = getFactory().createWord(modifier, new LexicalCategory_ANY());
                }
            }

            // if no modifier element, must be a complex string
            if (modifierElement == null)
            {
                addPostModifier((string)modifier);
                return;
            }

            // extract WordElement if modifier is a single word
            WordElement modifierWord = null;

            if (modifierElement != null && modifierElement is WordElement)
            {
                modifierWord = (WordElement)modifierElement;
            }
            else if (modifierElement != null && modifierElement is InflectedWordElement)
            {
                modifierWord = ((InflectedWordElement)modifierElement).getBaseWord();
            }

            if (modifierWord != null && modifierWord.getCategory().enumType == (int)LexicalCategoryEnum.ADVERB)
            {
                addPreModifier(modifierWord);
                return;
            }

            // default case
            addPostModifier(modifierElement);
        }
Esempio n. 10
0
        /**
         * Sets Features from another existing WordElement into this WordElement.
         *
         * @param currentWord
         *              the WordElement to copy features from
         */

        public void setFeatures(WordElement currentWord)
        {
            if (null != currentWord && null != currentWord.getAllFeatures())
            {
                foreach (var feature in currentWord.getAllFeatureNames())
                {
                    setFeature(feature, currentWord.getFeature(feature));
                }
            }
        }
Esempio n. 11
0
        /**
         * Constructs a new inflected word from a WordElement
         *
         * @param word
         *            underlying wordelement
         */

        public InflectedWordElement(WordElement word)
        {
            setFeature(InternalFeature.BASE_WORD.ToString(), word);
            // AG: changed to use the default spelling variant
            // setFeature(LexicalFeature.BASE_FORM, word.getBaseForm());
            var defaultSpelling = word.getDefaultSpellingVariant();

            setFeature(LexicalFeature.BASE_FORM, defaultSpelling);
            setCategory(word.getCategory());
        }
Esempio n. 12
0
        /**
         * quick-and-dirty routine for computing morph forms Should be replaced by
         * something better!
         *
         * @param word
         * @param feature
         * @param string
         * @return
         */

        private string getVariant(WordElement word, string feature, string suffix)
        {
            if (word.hasFeature(feature))
            {
                return(word.getFeatureAsString(feature));
            }
            else
            {
                return(getForm(word.getBaseForm(), suffix));
            }
        }
Esempio n. 13
0
        /**
         * return the base form of a word
         *
         * @param element
         * @param baseWord
         * @return
         */

        private static string getBaseForm(InflectedWordElement element, WordElement baseWord)
        {
            // unclear what the right behaviour should be
            // for now, prefer baseWord.getBaseForm() to element.getBaseForm() for
            // verbs (ie, "is" mapped to "be")
            // but prefer element.getBaseForm() to baseWord.getBaseForm() for other
            // words (ie, "children" not mapped to "child")

            // AG: changed this to get the default spelling variant
            // needed to preserve spelling changes in the VP

            if ((int)LexicalCategoryEnum.VERB == element.getCategory().enumType)
            {
                if (baseWord != null && baseWord.getDefaultSpellingVariant() != null)
                {
                    return(baseWord.getDefaultSpellingVariant());
                }
                else
                {
                    return(element.getBaseForm());
                }
            }
            else
            {
                if (element.getBaseForm() != null)
                {
                    return(element.getBaseForm());
                }
                else if (baseWord == null)
                {
                    return(null);
                }
                else
                {
                    return(baseWord.getDefaultSpellingVariant());
                }
            }

            // if (LexicalCategory.VERB == element.getCategory()) {
            // if (baseWord != null && baseWord.getBaseForm() != null)
            // return baseWord.getBaseForm();
            // else
            // return element.getBaseForm();
            // } else {
            // if (element.getBaseForm() != null)
            // return element.getBaseForm();
            // else if (baseWord == null)
            // return null;
            // else
            // return baseWord.getBaseForm();
            // }
        }
Esempio n. 14
0
        /**
         * extract information about acronyms from NIH record, and add to a
         * simplenlg WordElement.
         *
         * <P>
         * Acronyms are represented as lists of word elements. Any acronym will have
         * a list of full form word elements, retrievable via
         * {@link LexicalFeature#ACRONYM_OF}
         *
         * @param wordElement
         * @param record
         */

        private void addAcronymInfo(WordElement wordElement, LexRecord record)
        {
            // NB: the acronyms are actually the full forms of which the word is an
            // acronym
            List <string> acronyms = record.GetAcronyms();

            if (!acronyms.isEmpty())
            {
                // the list of full forms of which this word is an acronym
                List <INLGElement> acronymOf = wordElement
                                               .getFeatureAsElementList(LexicalFeature.ACRONYM_OF);

                // keep all acronym full forms and set them up as wordElements
                foreach (var fullForm in acronyms)
                {
                    if (fullForm.contains("|"))
                    {
                        // get the acronym id
                        string acronymID = fullForm.substring(
                            fullForm.indexOf("|") + 1, fullForm.length());
                        // create the full form element
                        WordElement fullFormWE = this.getWordByID(acronymID);

                        if (fullForm != null)
                        {
                            // add as full form of this acronym
                            acronymOf.add(fullFormWE);

                            // List<NLGElement> fullFormAcronyms = fullFormWE
                            // .getFeatureAsElementList(LexicalFeature.ACRONYMS);
                            // fullFormAcronyms.add(wordElement);
                            // fullFormWE.setFeature(LexicalFeature.ACRONYMS,
                            // fullFormAcronyms);
                        }
                    }
                }

                // set all the full forms for this acronym
                wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronymOf);
            }

            // if (!acronyms.isEmpty()) {
            //
            // string acronym = acronyms.get(0);
            // // remove anything after a |, this will be an NIH ID
            // if (acronym.contains("|"))
            // acronym = acronym.substring(0, acronym.indexOf("|"));
            // wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronym);
            // }

            return;
        }
Esempio n. 15
0
        /**
         * Extract info about the spelling variants of a word from an NIH record,
         * and add to the simplenlg Woordelement.
         *
         * <P>
         * Spelling variants are represented as lists of strings, retrievable via
         * {@link LexicalFeature#SPELL_VARS}
         *
         * @param wordElement
         * @param record
         */

        private void addSpellingVariants(WordElement wordElement, LexRecord record)
        {
            Vector <string> vars = record.GetSpellingVars();

            if (vars != null && !vars.isEmpty())
            {
                var wordVars = new List <string>();
                wordVars.addAll(vars);
                wordElement.setFeature(LexicalFeature.SPELL_VARS, wordVars);
            }

            // we set the default spelling var as the baseForm
            wordElement.setFeature(LexicalFeature.DEFAULT_SPELL, wordElement
                                   .getBaseForm());
        }
Esempio n. 16
0
        /**
         * Retrieves the correct representation of the word from the element. This
         * method will find the <code>WordElement</code>, if it exists, for the
         * given phrase or inflected word.
         *
         * @param element
         *            the <code>NLGElement</code> from which the head is required.
         * @return the <code>WordElement</code>
         */

        private static WordElement getHeadWordElement(INLGElement element)
        {
            WordElement head = null;

            if (element is WordElement)
            {
                head = (WordElement)element;
            }
            else if (element is InflectedWordElement)
            {
                head = (WordElement)element.getFeature(InternalFeature.BASE_WORD.ToString());
            }
            else if (element is PhraseElement)
            {
                head = getHeadWordElement(((PhraseElement)element).getHead());
            }

            return(head);
        }
Esempio n. 17
0
        /**
         * This method performs the morphology for adverbs.
         *
         * @param element
         *            the <code>InflectedWordElement</code>.
         * @param baseWord
         *            the <code>WordElement</code> as created from the lexicon
         *            entry.
         * @return a <code>StringElement</code> representing the word after
         *         inflection.
         */

        public static INLGElement doAdverbMorphology(InflectedWordElement element, WordElement baseWord)
        {
            string realised = null;

            // base form from baseWord if it exists, otherwise from element
            var baseForm = getBaseForm(element, baseWord);

            if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE.ToString()))
            {
                realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE);

                if (realised == null && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.COMPARATIVE);
                }
                if (realised == null)
                {
                    realised = buildRegularComparative(baseForm);
                }
            }
            else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE.ToString()))
            {
                realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE);

                if (realised == null && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.SUPERLATIVE);
                }
                if (realised == null)
                {
                    realised = buildRegularSuperlative(baseForm);
                }
            }
            else
            {
                realised = baseForm;
            }
            var realisedElement = new StringElement(realised);

            realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(),
                                       element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString()));
            return(realisedElement);
        }
Esempio n. 18
0
        /**
         * A helper method to look up the lexicon for the given word.
         *
         * @param category
         *            the <code>LexicalCategory</code> of the word.
         * @param word
         *            the base form of the word.
         * @param wordElement
         *            the created element representing the word.
         */

        private void doLexiconLookUp(ILexicalCategory category, string word, INLGElement wordElement)
        {
            WordElement baseWord = null;

            if (category.lexType == LexicalCategoryEnum.NOUN && this.lexicon.hasWord(word, new LexicalCategory_PRONOUN()))
            {
                baseWord = this.lexicon.lookupWord(word, new LexicalCategory_PRONOUN());

                if (baseWord != null)
                {
                    wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord);
                    wordElement.setCategory(new LexicalCategory_PRONOUN());
                    if (!PRONOUNS.Contains(word))
                    {
                        wordElement.setFeature(InternalFeature.NON_MORPH.ToString(), true);
                    }
                }
            }
            else
            {
                baseWord = this.lexicon.lookupWord(word, category);
                wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord);
            }
        }
Esempio n. 19
0
        /**
         * make a WordElement from a lexical record. Currently just specifies basic
         * params and inflections Should do more in the future!
         *
         * @param record
         * @return
         */

        private WordElement makeWord(LexRecord record)
        {
            // get basic data
            string           baseForm = record.GetBase();
            ILexicalCategory category = getSimplenlgCategory(record);
            string           id       = record.GetEui();

            // create word class
            var wordElement = new WordElement(baseForm, (LexicalCategory)category, id);

            // now add type information
            switch (category.lexType)
            {
            case LexicalCategoryEnum.ADJECTIVE:
                addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry());
                break;

            case LexicalCategoryEnum.ADVERB:
                addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry());
                break;

            case LexicalCategoryEnum.NOUN:
                addNounInfo(wordElement, record.GetCatEntry().GetNounEntry());
                break;

            case LexicalCategoryEnum.VERB:
                addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry());
                break;
                // ignore closed class words
            }

            var defaultInfl = (Inflection)wordElement
                              .getDefaultInflectionalVariant();

            // now add inflected forms
            // if (keepStandardInflections || !standardInflections(record,
            // category)) {
            foreach (InflVar inflection in record.GetInflVarsAndAgreements()
                     .GetInflValues())
            {
                string simplenlgInflection = getSimplenlgInflection(inflection
                                                                    .GetInflection());

                if (simplenlgInflection != null)
                {
                    string     inflectedForm = inflection.GetVar();
                    Inflection inflType      = Inflection.getInflCode(inflection
                                                                      .GetType());

                    // store all inflectional variants, except for regular ones
                    // unless explicitly set
                    if (inflType != null &&
                        !(Inflection.REGULAR.Equals(inflType) && !this.keepStandardInflections))
                    {
                        wordElement.addInflectionalVariant(inflType,
                                                           simplenlgInflection, inflectedForm);
                    }

                    // if the infl variant is the default, also set this feature on
                    // the word
                    if (defaultInfl == null ||
                        (defaultInfl.Equals(inflType) && !(Inflection.REGULAR
                                                           .Equals(inflType) && !this.keepStandardInflections)))
                    {
                        wordElement.setFeature(simplenlgInflection, inflectedForm);
                    }

                    // wordElement
                    // .setFeature(simplenlgInflection, inflection.GetVar());
                }
            }
            // }

            // add acronym info
            addAcronymInfo(wordElement, record);

            // now add spelling variants
            addSpellingVariants(wordElement, record);

            return(wordElement);
        }
Esempio n. 20
0
        /**
         * extract noun information from NIH NounEntry record, and add to a
         * simplenlg WordElement For now just extract whether count/non-count and
         * whether proper or not
         *
         * @param wordElement
         * @param nounEntry
         */

        private void addNounInfo(WordElement wordElement, NounEntry nounEntry)
        {
            bool proper = nounEntry.IsProper();
            // bool nonCountVariant = false;
            // bool regVariant = false;

            // add the inflectional variants
            List <string> variants = nounEntry.GetVariants();

            if (!variants.isEmpty())
            {
                var wordVariants = new List <Inflection>();

                foreach (var v in variants)
                {
                    int    index = v.indexOf("|");
                    string code;

                    if (index > -1)
                    {
                        code = v.substring(0, index).toLowerCase().trim();
                    }
                    else
                    {
                        code = v.toLowerCase().trim();
                    }

                    Inflection infl = Inflection.getInflCode(code);

                    if (infl != null)
                    {
                        wordVariants.add(infl);
                        wordElement.addInflectionalVariant(infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise
                // just a random pick
                Inflection defaultVariant = wordVariants
                                            .Contains(Inflection.REGULAR) ||
                                            wordVariants.isEmpty()
                    ? Inflection.REGULAR
                    : wordVariants.get(0);
                wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // for (string variant : variants) {
            // if (variant.startsWith("uncount")
            // || variant.startsWith("groupuncount"))
            // nonCountVariant = true;
            //
            // if (variant.startsWith("reg"))
            // regVariant = true;
            // // ignore other variant info
            // }

            // lots of words have both "reg" and "unCount", indicating they
            // can be used in either way. Regard such words as normal,
            // only flag as nonCount if unambiguous
            // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant
            // && !regVariant);
            wordElement.setFeature(LexicalFeature.PROPER, proper);
            // ignore (for now) other info in record

            return;
        }
Esempio n. 21
0
        /**
         * extract verb information from NIH VerbEntry record, and add to a
         * simplenlg WordElement For now just extract transitive, instransitive,
         * and/or ditransitive
         *
         * @param wordElement
         * @param verbEntry
         */

        private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry)
        {
            if (verbEntry == null)
            {
                // should only happen for aux verbs, which have
                // auxEntry instead of verbEntry in NIH Lex
                // just flag as transitive and return
                wordElement.setFeature(LexicalFeature.INTRANSITIVE, false);
                wordElement.setFeature(LexicalFeature.TRANSITIVE, true);
                wordElement.setFeature(LexicalFeature.DITRANSITIVE, false);
                return;
            }

            bool intransitiveVerb = notEmpty(verbEntry.GetIntran());
            bool transitiveVerb   = notEmpty(verbEntry.GetTran()) ||
                                    notEmpty(verbEntry.GetCplxtran());
            bool ditransitiveVerb = notEmpty(verbEntry.GetDitran());

            wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb);
            wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb);
            wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb);

            // add the inflectional variants
            List <string> variants = verbEntry.GetVariants();

            if (!variants.isEmpty())
            {
                var wordVariants = new List <Inflection>();

                foreach (var v in variants)
                {
                    int        index = v.indexOf("|");
                    string     code;
                    Inflection infl;

                    if (index > -1)
                    {
                        code = v.substring(0, index).toLowerCase().trim();
                        infl = Inflection.getInflCode(code);
                    }
                    else
                    {
                        infl = Inflection.getInflCode(v.toLowerCase().trim());
                    }

                    if (infl != null)
                    {
                        wordElement.addInflectionalVariant(infl);
                        wordVariants.add(infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise
                // just a random pick
                Inflection defaultVariant = wordVariants
                                            .contains(Inflection.REGULAR) ||
                                            wordVariants.isEmpty()
                    ? Inflection.REGULAR
                    : wordVariants.get(0);
//			wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants);
//			wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // ignore (for now) other info in record
            return;
        }
Esempio n. 22
0
        /**
         * create a simplenlg WordElement from a Word node in a lexicon XML file
         *
         * @param wordNode
         * @return
         * @throws XPathUtilException
         */

        private WordElement convertNodeToWord(XmlNode wordNode)
        {
            // if this isn't a Word node, ignore it
            if (!wordNode.LocalName.equalsIgnoreCase(XML_WORD))
            {
                return(null);
            }

            // // if there is no base, flag an error and return null
            // string base = XPathUtil.extractValue(wordNode, Constants.XML_BASE);
            // if (base == null) {
            // Console.WriteLine("Error in loading XML lexicon: Word with no base");
            // return null;
            // }

            // create word
            var word        = new WordElement();
            var inflections = new List <Inflection>();

            // now copy features
            var nodes = wordNode.SelectNodes("*");

            for (var i = 0; i < nodes.Count; i++)
            {
                var featureNode = nodes[i];

                if (featureNode.NodeType == XmlNodeType.Element)
                {
                    var feature = featureNode.LocalName.trim();
                    var value   = featureNode.InnerText;

                    if (value != null)
                    {
                        value = value.trim();
                    }

                    if (feature == null)
                    {
                        Debug.WriteLine("Error in XML lexicon node for " + word);
                        break;
                    }

                    if (feature.equalsIgnoreCase(XML_BASE))
                    {
                        word.setBaseForm(value);
                    }
                    else if (feature.equalsIgnoreCase(XML_CATEGORY))
                    {
                        var c = LexicalCategoryExtensions.valueOf(value.toUpperCase());
                        word.setCategory(c);
                    }
                    else if (feature.equalsIgnoreCase(XML_ID))
                    {
                        word.setId(value);
                    }

                    else if (value == null || value.Equals(""))
                    {
                        // if this is an infl code, add it to inflections
                        Tuple <bool, Inflection> infl = InflectionExtensions.getInflCode(feature);

                        if (infl.Item1)
                        {
                            inflections.Add(infl.Item2);
                        }
                        else
                        {
                            word.setFeature(feature, true);
                        }
                    }
                    else
                    {
                        word.setFeature(feature, value);
                    }
                }
            }

            // if no infl specified, assume regular
            if (inflections.isEmpty())
            {
                inflections.Add(Inflection.REGULAR);
            }

            // default inflection code is "reg" if we have it, else random pick form
            // infl codes available
            var defaultInfl = inflections.Contains(Inflection.REGULAR)
                ? Inflection.REGULAR
                : inflections[0];

            word.setFeature(LexicalFeature.DEFAULT_INFL, defaultInfl);
            word.setDefaultInflectionalVariant(defaultInfl);

            foreach (var infl in inflections)
            {
                word.addInflectionalVariant(infl);
            }

            // done, return word
            return(word);
        }
Esempio n. 23
0
        // note that addFrontModifier, addPostModifier, addPreModifier are inherited
        // from PhraseElement
        // likewise getFrontModifiers, getPostModifiers, getPreModifiers

        /**
         * Add a modifier to a clause Use heuristics to decide where it goes
         *
         * @param modifier
         */


        public override void addModifier(object modifier)
        {
            // adverb is frontModifier if sentenceModifier
            // otherwise adverb is preModifier
            // string which is one lexicographic word is looked up in lexicon,
            // above rules apply if adverb
            // Everything else is postModifier

            if (modifier == null)
            {
                return;
            }

            // get modifier as NLGElement if possible
            INLGElement modifierElement = null;

            if (modifier is INLGElement)
            {
                modifierElement = (INLGElement)modifier;
            }
            else if (modifier is string)
            {
                var modifierString = (string)modifier;
                if (modifierString.length() > 0 && !modifierString.contains(" "))
                {
                    modifierElement = getFactory().createWord(modifier,
                                                              new LexicalCategory_ANY());
                }
            }

            // if no modifier element, must be a complex string
            if (modifierElement == null)
            {
                addPostModifier((string)modifier);
                return;
            }

            // AdvP is premodifer (probably should look at head to see if
            // sentenceModifier)
            if (modifierElement is AdvPhraseSpec)
            {
                addPreModifier(modifierElement);
                return;
            }

            // extract WordElement if modifier is a single word
            WordElement modifierWord = null;

            if (modifierElement != null && modifierElement is WordElement)
            {
                modifierWord = (WordElement)modifierElement;
            }
            else if (modifierElement != null && modifierElement is InflectedWordElement)
            {
                modifierWord = ((InflectedWordElement)modifierElement)
                               .getBaseWord();
            }

            if (modifierWord != null && (modifierWord.getCategory().enumType == (int)LexicalCategoryEnum.ADVERB))
            {
                // adverb rules
                if (modifierWord
                    .getFeatureAsBoolean(LexicalFeature.SENTENCE_MODIFIER))
                {
                    addFrontModifier(modifierWord);
                }
                else
                {
                    addPreModifier(modifierWord);
                }
                return;
            }

            // default case
            addPostModifier(modifierElement);
        }
Esempio n. 24
0
        /**
         * This method performs the morphology for adjectives.
         *
         * @param element
         *            the <code>InflectedWordElement</code>.
         * @param baseWord
         *            the <code>WordElement</code> as created from the lexicon
         *            entry.
         * @return a <code>StringElement</code> representing the word after
         *         inflection.
         */

        public static INLGElement doAdjectiveMorphology(InflectedWordElement element, WordElement baseWord)
        {
            string realised     = null;
            var    patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL);

            // base form from baseWord if it exists, otherwise from element
            var baseForm = getBaseForm(element, baseWord);

            if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE.ToString()))
            {
                realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE);

                if (realised == null && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.COMPARATIVE);
                }
                if (realised == null)
                {
                    if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                    {
                        realised = buildDoubleCompAdjective(baseForm);
                    }
                    else
                    {
                        realised = buildRegularComparative(baseForm);
                    }
                }
            }
            else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE.ToString()))
            {
                realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE);

                if (realised == null && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.SUPERLATIVE);
                }
                if (realised == null)
                {
                    if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                    {
                        realised = buildDoubleSuperAdjective(baseForm);
                    }
                    else
                    {
                        realised = buildRegularSuperlative(baseForm);
                    }
                }
            }
            else
            {
                realised = baseForm;
            }
            var realisedElement = new StringElement(realised);

            realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(),
                                       element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString()));
            return(realisedElement);
        }
Esempio n. 25
0
        /**
         * Sets the base word for this element.
         *
         * @param word
         *            the <code>WordElement</code> representing the base word as
         *            read from the lexicon.
         */

        public void setBaseWord(WordElement word)
        {
            setFeature(InternalFeature.BASE_WORD.ToString(), word);
        }
Esempio n. 26
0
        /**
         * This method performs the morphology for verbs.
         *
         * @param element
         *            the <code>InflectedWordElement</code>.
         * @param baseWord
         *            the <code>WordElement</code> as created from the lexicon
         *            entry.
         * @return a <code>StringElement</code> representing the word after
         *         inflection.
         */

        public static INLGElement doVerbMorphology(InflectedWordElement element, WordElement baseWord)
        {
            string realised    = null;
            var    numberValue = element.getFeature(Feature.NUMBER.ToString());
            var    personValue = element.getFeature(Feature.PERSON.ToString());
            var    tenseValue  = element.getFeatureTense(Feature.TENSE.ToString());


            var formValue    = element.getFeature(Feature.FORM.ToString());
            var patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL);

            // base form from baseWord if it exists, otherwise from element
            var baseForm = getBaseForm(element, baseWord);

            if (element.getFeatureAsBoolean(Feature.NEGATED.ToString()) || Form.BARE_INFINITIVE.Equals(formValue))
            {
                realised = baseForm;
            }
            else if (Form.PRESENT_PARTICIPLE.Equals(formValue))
            {
                realised = element.getFeatureAsString(LexicalFeature.PRESENT_PARTICIPLE);

                if (realised == null && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.PRESENT_PARTICIPLE);
                }

                if (realised == null)
                {
                    if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                    {
                        realised = buildDoublePresPartVerb(baseForm);
                    }
                    else
                    {
                        realised = buildRegularPresPartVerb(baseForm);
                    }
                }
            }
            else if (Tense.PAST.Equals(tenseValue) || Form.PAST_PARTICIPLE.Equals(formValue))
            {
                if (Form.PAST_PARTICIPLE.Equals(formValue))
                {
                    realised = element.getFeatureAsString(LexicalFeature.PAST_PARTICIPLE);

                    if (realised == null && baseWord != null)
                    {
                        realised = baseWord.getFeatureAsString(LexicalFeature.PAST_PARTICIPLE);
                    }

                    if (realised == null)
                    {
                        if ("be".equalsIgnoreCase(baseForm))
                        {
                            realised = "been";
                        }
                        else if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                        {
                            realised = buildDoublePastVerb(baseForm);
                        }
                        else
                        {
                            realised = buildRegularPastVerb(baseForm, numberValue, personValue);
                        }
                    }
                }
                else
                {
                    realised = element.getFeatureAsString(LexicalFeature.PAST);

                    if (realised == null && baseWord != null)
                    {
                        realised = baseWord.getFeatureAsString(LexicalFeature.PAST);
                    }

                    if (realised == null)
                    {
                        if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                        {
                            realised = buildDoublePastVerb(baseForm);
                        }
                        else
                        {
                            realised = buildRegularPastVerb(baseForm, numberValue, personValue);
                        }
                    }
                }
            }
            else if ((numberValue == null || NumberAgreement.SINGULAR.Equals(numberValue)) && (personValue == null ||
                                                                                               Person.THIRD.Equals(
                                                                                                   personValue)) &&
                     (Tense.PRESENT.Equals(tenseValue)))
            {
                realised = element.getFeatureAsString(LexicalFeature.PRESENT3S);

                if (realised == null && baseWord != null && !"be".equalsIgnoreCase(baseForm))
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.PRESENT3S);
                }
                if (realised == null)
                {
                    realised = buildPresent3SVerb(baseForm);
                }
            }
            else
            {
                if ("be".equalsIgnoreCase(baseForm))
                {
                    if (Person.FIRST.Equals(personValue) && (NumberAgreement.SINGULAR.Equals(numberValue) ||
                                                             numberValue == null))
                    {
                        realised = "am";
                    }
                    else
                    {
                        realised = "are";
                    }
                }
                else
                {
                    realised = baseForm;
                }
            }
            var realisedElement = new StringElement(realised);

            realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(),
                                       element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString()));
            return(realisedElement);
        }
Esempio n. 27
0
        /**
         * This method performs the morphology for nouns.
         *
         * @param element
         *            the <code>InflectedWordElement</code>.
         * @param baseWord
         *            the <code>WordElement</code> as created from the lexicon
         *            entry.
         * @return a <code>StringElement</code> representing the word after
         *         inflection.
         */

        public static StringElement doNounMorphology(InflectedWordElement element, WordElement baseWord)
        {
            var realised = new StringBuilder();

            // base form from baseWord if it exists, otherwise from element
            var baseForm = getBaseForm(element, baseWord);

            if (element.isPlural() && !element.getFeatureAsBoolean(LexicalFeature.PROPER))
            {
                string pluralForm = null;

                // AG changed: now check if default infl is uncount
                // if (element.getFeatureAsBoolean(LexicalFeature.NON_COUNT)
                // ) {
                // pluralForm = baseForm;
                var elementDefaultInfl = element.getFeature(LexicalFeature.DEFAULT_INFL);

                if (elementDefaultInfl != null && Inflection.UNCOUNT.Equals(elementDefaultInfl))
                {
                    pluralForm = baseForm;
                }
                else
                {
                    pluralForm = element.getFeatureAsString(LexicalFeature.PLURAL);
                }

                if (pluralForm == null && baseWord != null)
                {
                    // AG changed: now check if default infl is uncount
                    // if (baseWord.getFeatureAsBoolean(LexicalFeature.NON_COUNT)
                    // ) {
                    // pluralForm = baseForm;
                    var baseDefaultInfl = baseWord.getFeatureAsString(LexicalFeature.DEFAULT_INFL);
                    if (baseDefaultInfl != null && baseDefaultInfl.Equals("uncount"))
                    {
                        pluralForm = baseForm;
                    }
                    else
                    {
                        pluralForm = baseWord.getFeatureAsString(LexicalFeature.PLURAL);
                    }
                }

                if (pluralForm == null)
                {
                    var pattern = element.getFeature(LexicalFeature.DEFAULT_INFL);
                    if (Inflection.GRECO_LATIN_REGULAR.Equals(pattern))
                    {
                        pluralForm = buildGrecoLatinPluralNoun(baseForm);
                    }
                    else
                    {
                        pluralForm = buildRegularPluralNoun(baseForm);
                    }
                }
                realised.append(pluralForm);
            }
            else
            {
                realised.append(baseForm);
            }

            checkPossessive(element, realised);
            var realisedElement = new StringElement(realised.ToString());

            realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION.ToString(),
                                       element.getFeature(InternalFeature.DISCOURSE_FUNCTION.ToString()));
            return(realisedElement);
        }