示例#1
0
        /**
         * get matching keys from an index map
         *
         * @param indexKey
         * @param category
         * @param indexMap
         * @return
         */

        private List <WordElement> getWordsFromIndex(string indexKey,
                                                     ILexicalCategory category, Dictionary <string, List <WordElement> > indexMap)
        {
            var result = new List <WordElement>();

            // case 1: unknown, return empty list
            if (!indexMap.ContainsKey(indexKey))
            {
                return(result);
            }

            // case 2: category is ANY, return everything
            if (category.enumType == (int)LexicalCategoryEnum.ANY)
            {
                foreach (var word in indexMap[indexKey])
                {
                    result.Add(new WordElement(word));
                }
                return(result);
            }
            else
            {
                // case 3: other category, search for match
                foreach (var word in indexMap[indexKey])
                {
                    if (word.getCategory().enumType == category.enumType)
                    {
                        result.Add(new WordElement(word));
                    }
                }
            }
            return(result);
        }
示例#2
0
        /**
         * Create an inflected word element. InflectedWordElement represents a word
         * that already specifies the morphological and other features that it
         * should exhibit in a realisation. While normally, phrases are constructed
         * using <code>WordElement</code>s, and features are set on phrases, it is
         * sometimes desirable to set features directly on words (for example, when
         * one wants to elide a specific word, but not its parent phrase).
         *
         * <P>
         * If the object passed is already a <code>WordElement</code>, then a new
         *
         * <code>InflectedWordElement<code> is returned which wraps this <code>WordElement</code>
         * . If the object is a <code>string</code>, then the
         * <code>WordElement</code> representing this <code>string</code> is looked
         * up, and a new
         * <code>InflectedWordElement<code> wrapping this is returned. If no such <code>WordElement</code>
         * is found, the element returned is an <code>InflectedWordElement</code>
         * with the supplied string as baseform and no base <code>WordElement</code>
         * . If an <code>NLGElement</code> is passed, this is returned unchanged.
         *
         * @param word
         *            the word
         * @param category
         *            the category
         * @return an <code>InflectedWordElement</code>, or the original supplied
         *         object if it is an <code>NLGElement</code>.
         */

        public INLGElement createInflectedWord(object word, ILexicalCategory category)
        {
            // first get the word element
            INLGElement inflElement = null;

            if (word is WordElement)
            {
                inflElement = new InflectedWordElement((WordElement)word);
            }
            else
            if (word is string)
            {
                var baseword = createWord((string)word, category);

                if (baseword != null && baseword is WordElement)
                {
                    inflElement = new InflectedWordElement((WordElement)baseword);
                }
                else
                {
                    inflElement = new InflectedWordElement((string)word, category);
                }
            }
            else if (word is INLGElement)
            {
                inflElement = (INLGElement)word;
            }

            return(inflElement);
        }
示例#3
0
        /**
         * Creates a new element representing a word. If the word passed is already
         * an <code>NLGElement</code> then that is returned unchanged. If a
         * <code>string</code> is passed as the word then the factory will look up
         * the <code>Lexicon</code> if one exists and use the details found to
         * create a new <code>WordElement</code>.
         *
         * @param word
         *            the base word for the new element. This can be a
         *            <code>NLGElement</code>, which is returned unchanged, or a
         *            <code>string</code>, which is used to construct a new
         *            <code>WordElement</code>.
         * @param category
         *            the <code>LexicalCategory</code> for the word.
         *
         * @return an <code>NLGElement</code> representing the word.
         */

        public WordElement createWord(object word, ILexicalCategory category)
        {
            WordElement wordElement = null;

            if (word is WordElement)
            {
                wordElement = (WordElement)word;
            }
            else if (word is string && this.lexicon != null)
            {
                // AG: change: should create a WordElement, not an
                // InflectedWordElement
                // wordElement = new InflectedWordElement(
                // (string) word, category);
                // if (this.lexicon != null) {
                // doLexiconLookUp(category, (string) word, wordElement);
                // }
                // wordElement = lexicon.getWord((string) word, category);
                wordElement = lexicon.lookupWord((string)word, category);
                if (PRONOUNS.Contains((string)word))
                {
                    setPronounFeatures(wordElement, (string)word);
                }
            }

            return(wordElement);
        }
示例#4
0
        /**
         * return <code>true</code> if the lexicon contains a WordElement which
         * matches the specified variant form and category
         *
         * @param variant
         *            - base form, inflected form, or spelling variant of word
         * @param category
         *            - syntactic category of word (ANY for unknown)
         * @return <code>true</code> if Lexicon contains such a WordElement
         */

        public bool hasWordFromVariant(string variant, ILexicalCategory category)
        {
// convenience
            // method
            // derived
            // from
            // other
            // methods)
            // {
            return(!getWordsFromVariant(variant, category).isEmpty());
        }
示例#5
0
        /**
         * return <code>true</code> if the lexicon contains a WordElement which has
         * the specified base form and category
         *
         * @param baseForm
         *            - base form of word, eg "be" or "dog" (not "is" or "dogs")
         * @param category
         *            - syntactic category of word (ANY for unknown)
         * @return <code>true</code> if Lexicon contains such a WordElement
         */

        public bool hasWord(string baseForm, ILexicalCategory category)
        {
// convenience
            // method
            // derived
            // from
            // other
            // methods)
            // {
            return(!getWords(baseForm, category).isEmpty());
        }
示例#6
0
        /**
         * this method creates an NLGElement from an object If object is null,
         * return null If the object is already an NLGElement, it is returned
         * unchanged Exception: if it is an InflectedWordElement, return underlying
         * WordElement If it is a string which matches a lexicon entry or pronoun,
         * the relevant WordElement is returned If it is a different string, a
         * wordElement is created if the string is a single word Otherwise a
         * StringElement is returned Otherwise throw an exception
         *
         * @param element
         *            - object to look up
         * @param category
         *            - default lexical category of object
         * @return NLGelement
         */

        public INLGElement createNLGElement(object element, ILexicalCategory category)
        {
            if (element == null)
            {
                return(null);
            }

            // InflectedWordElement - return underlying word
            else if (element
                     is InflectedWordElement)
            {
                return(((InflectedWordElement)element).getBaseWord());
            }

            // StringElement - look up in lexicon if it is a word
            // otherwise return element
            else
            if (element
                is StringElement)
            {
                if (stringIsWord(((StringElement)element).getRealisation(), category))
                {
                    return(createWord(((StringElement)element).getRealisation(), category));
                }
                else
                {
                    return((StringElement)element);
                }
            }

            // other NLGElement - return element
            else
            if (element is INLGElement)
            {
                return((INLGElement)element);
            }

            // string - look up in lexicon if a word, otherwise return StringElement
            else
            if (element  is string)
            {
                if (stringIsWord((string)element, category))
                {
                    return(createWord(element, category));
                }
                else
                {
                    return(new StringElement((string)element));
                }
            }

            throw new ArgumentException(element.ToString() + " is not a valid type");
        }
示例#7
0
        Inflection defaultInfl;                          // the default inflectional variant

        // LexicalCategory category; // type of word

        /**********************************************************/
        // constructors
        /**********************************************************/


        /**
         * create a WordElement with the specified baseForm, category, ID
         *
         * @param baseForm
         *            - base form of WordElement
         * @param category
         *            - category of WordElement
         * @param id
         *            - ID of word in lexicon
         */

        public WordElement(string baseForm = null, ILexicalCategory category = null, string id = null)
        {
            this.baseForm = baseForm;
            if (category == null)
            {
                setCategory(new LexicalCategory_ANY());
            }
            else
            {
                setCategory(category);
            }
            this.id  = id;
            inflVars = new Dictionary <Inflection, InflectionSet>();
        }
示例#8
0
        /**
         * returns a WordElement which has the specified inflected form and/or
         * spelling variant that matches the specified variant, of the specified
         * category
         *
         * @param variant
         *            - base form, inflected form, or spelling variant of word
         * @param category
         *            - syntactic category of word (ANY for unknown)
         * @return a matching WordElement (if found), otherwise a new word is
         *         created using thie variant as the base form
         */

        public WordElement getWordFromVariant(string variant,
                                              ILexicalCategory category)
        {
            var wordElements = getWordsFromVariant(variant, category);

            if (wordElements.isEmpty())
            {
                return(createWord(variant, category)); // return default WordElement
            }
            // using variant as base
            // form
            else
            {
                return(selectMatchingWord(wordElements, variant));
            }
        }
示例#9
0
        /**
         * Realises the key word of the interrogative. For example, <em>who</em>,
         * <em>what</em>
         *
         * @param keyWord
         *            the key word of the interrogative.
         * @param cat
         *            the category (usually pronoun, but not in the case of
         *            "how many")
         * @param parent
         *            the parent <code>SyntaxProcessor</code> that will do the
         *            realisation of the complementiser.
         * @param realisedElement
         *            the current realisation of the clause.
         * @param phraseFactory
         *            the phrase factory to be used.
         */

        private static void realiseInterrogativeKeyWord(string keyWord,
                                                        ILexicalCategory cat,
                                                        SyntaxProcessor parent,
                                                        ListElement realisedElement,
                                                        NLGFactory phraseFactory)
        {
            if (keyWord != null)
            {
                var question       = phraseFactory.createWord(keyWord, cat);
                var currentElement = parent.realise(question);

                if (currentElement != null)
                {
                    realisedElement.addComponent(currentElement);
                }
            }
        }
示例#10
0
        /* (non-Javadoc)
         * @see simplenlg.lexicon.Lexicon#getWordsFromVariant(java.lang.string, simplenlg.features.LexicalCategory)
         */

        public override List <WordElement> getWordsFromVariant(string variant, ILexicalCategory category)
        {
            var result = new List <WordElement>();

            foreach (var lex in lexiconList)
            {
                var lexResult = lex.getWordsFromVariant(variant, category);
                if (lexResult != null && !lexResult.isEmpty())
                {
                    result.AddRange(lexResult);
                    if (!alwaysSearchAll)
                    {
                        return(result);
                    }
                }
            }
            return(result);
        }
示例#11
0
        /***************************************************************************/
        // default methods for looking up words
        // These try the following (in this order)
        // 1) word with matching base
        // 2) word with matching variant
        // 3) word with matching ID
        // 4) create a new workd
        /***************************************************************************/

        /**
         * General word lookup method, tries base form, variant, ID (in this order)
         * Creates new word if can't find existing word
         *
         * @param baseForm
         * @param category
         * @return word
         */

        public WordElement lookupWord(string baseForm, ILexicalCategory category)
        {
            if (hasWord(baseForm, category))
            {
                return(getWord(baseForm, category));
            }
            else if (hasWordFromVariant(baseForm, category))
            {
                return(getWordFromVariant(baseForm, category));
            }
            else if (hasWordByID(baseForm))
            {
                return(getWordByID(baseForm));
            }
            else
            {
                return(createWord(baseForm, category));
            }
        }
示例#12
0
        /**
         * get a WordElement which has the specified base form and category
         *
         * @param baseForm
         *            - base form of word, eg "be" or "dog" (not "is" or "dogs")
         * @param category
         *            - syntactic category of word (ANY for unknown)
         * @return if Lexicon contains such a WordElement, it is returned (the first
         *         match is returned if there are several matches). If the Lexicon
         *         does not contain such a WordElement, a new WordElement is created
         *         and returned
         */

        public WordElement getWord(string baseForm, ILexicalCategory category)
        {
// convenience
            // method
            // derived
            // from
            // other
            // methods
            var wordElements = getWords(baseForm, category);

            if (wordElements.isEmpty())
            {
                return(createWord(baseForm, category)); // return default WordElement
            }
            // of this baseForm,
            // category
            else
            {
                return(selectMatchingWord(wordElements, baseForm));
            }
        }
示例#13
0
        /**
         * A helper method to look up the lexicon for the given word.
         *
         * @param category
         *            the <code>LexicalCategory</code> of the word.
         * @param word
         *            the base form of the word.
         * @param wordElement
         *            the created element representing the word.
         */

        private void doLexiconLookUp(ILexicalCategory category, string word, INLGElement wordElement)
        {
            WordElement baseWord = null;

            if (category.lexType == LexicalCategoryEnum.NOUN && this.lexicon.hasWord(word, new LexicalCategory_PRONOUN()))
            {
                baseWord = this.lexicon.lookupWord(word, new LexicalCategory_PRONOUN());

                if (baseWord != null)
                {
                    wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord);
                    wordElement.setCategory(new LexicalCategory_PRONOUN());
                    if (!PRONOUNS.Contains(word))
                    {
                        wordElement.setFeature(InternalFeature.NON_MORPH.ToString(), true);
                    }
                }
            }
            else
            {
                baseWord = this.lexicon.lookupWord(word, category);
                wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord);
            }
        }
示例#14
0
        /*
         * (non-Javadoc)
         *
         * @see simplenlg.lexicon.Lexicon#getWordsFromVariant(java.lang.string,
         * simplenlg.features.LexicalCategory)
         */

        public override List <WordElement> getWordsFromVariant(string variant,
                                                               ILexicalCategory category)
        {
            return(getWordsFromIndex(variant, category, indexByVariant));
        }
示例#15
0
        /******************************************************************************************/
        // main methods to get data from lexicon
        /******************************************************************************************/

        /*
         * (non-Javadoc)
         *
         * @see simplenlg.lexicon.Lexicon#getWords(java.lang.string,
         * simplenlg.features.LexicalCategory)
         */

        public override List <WordElement> getWords(string baseForm, ILexicalCategory category)
        {
            return(getWordsFromIndex(baseForm, category, indexByBase));
        }
示例#16
0
        /****************************************************************************/
        // get words by variant - try to return a WordElement given an inflectional
        // or spelling
        // variant. For the moment, acronyms are considered as separate words, not
        // variants
        // (this may change in the future)
        // fundamental version is getWordsFromVariant(string baseForm, Category
        // category),
        // this must be defined by subclasses. Other versions are convenience
        // methods. These may be overriden for efficiency, but this is not required.
        /****************************************************************************/

        /**
         * returns Words which have an inflected form and/or spelling variant that
         * matches the specified variant, and are in the specified category. <br>
         * <I>Note:</I> the returned word list may not be complete, it depends on
         * how it is implemented by the underlying lexicon
         *
         * @param variant
         *            - base form, inflected form, or spelling variant of word
         * @param category
         *            - syntactic category of word (ANY for unknown)
         * @return list of all matching Words (empty list if no matching WordElement
         *         found)
         */

        public abstract List <WordElement> getWordsFromVariant(string variant,
                                                               ILexicalCategory category);
示例#17
0
 /**
  * return true if string is a word
  *
  * @param string
  * @param category
  * @return
  */
 private bool stringIsWord(string str, ILexicalCategory category)
 {
     return(lexicon != null &&
            (lexicon.hasWord(str, category) || PRONOUNS.Contains(str) || (str.matches(WORD_REGEX))));
 }
示例#18
0
        /**
         * create a default WordElement. May be overridden by specific types of
         * lexicon
         *
         * @param baseForm
         *            - base form of word
         * @param category
         *            - category of word
         * @return WordElement entry for specified info
         */

        protected WordElement createWord(string baseForm, ILexicalCategory category)
        {
            return(new WordElement(baseForm, category)); // return default
            // WordElement of this
            // baseForm, category
        }
示例#19
0
        /****************************************************************************/
        // get words by baseform and category
        // fundamental version is getWords(string baseForm, Category category),
        // this must be defined by subclasses. Other versions are convenience
        // methods. These may be overriden for efficiency, but this is not required.
        /****************************************************************************/

        /**
         * returns all Words which have the specified base form and category
         *
         * @param baseForm
         *            - base form of word, eg "be" or "dog" (not "is" or "dogs")
         * @param category
         *            - syntactic category of word (ANY for unknown)
         * @return collection of all matching Words (may be empty)
         */

        public abstract List <WordElement> getWords(string baseForm, ILexicalCategory category);
示例#20
0
        /**
         * make a WordElement from a lexical record. Currently just specifies basic
         * params and inflections Should do more in the future!
         *
         * @param record
         * @return
         */

        private WordElement makeWord(LexRecord record)
        {
            // get basic data
            string           baseForm = record.GetBase();
            ILexicalCategory category = getSimplenlgCategory(record);
            string           id       = record.GetEui();

            // create word class
            var wordElement = new WordElement(baseForm, (LexicalCategory)category, id);

            // now add type information
            switch (category.lexType)
            {
            case LexicalCategoryEnum.ADJECTIVE:
                addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry());
                break;

            case LexicalCategoryEnum.ADVERB:
                addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry());
                break;

            case LexicalCategoryEnum.NOUN:
                addNounInfo(wordElement, record.GetCatEntry().GetNounEntry());
                break;

            case LexicalCategoryEnum.VERB:
                addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry());
                break;
                // ignore closed class words
            }

            var defaultInfl = (Inflection)wordElement
                              .getDefaultInflectionalVariant();

            // now add inflected forms
            // if (keepStandardInflections || !standardInflections(record,
            // category)) {
            foreach (InflVar inflection in record.GetInflVarsAndAgreements()
                     .GetInflValues())
            {
                string simplenlgInflection = getSimplenlgInflection(inflection
                                                                    .GetInflection());

                if (simplenlgInflection != null)
                {
                    string     inflectedForm = inflection.GetVar();
                    Inflection inflType      = Inflection.getInflCode(inflection
                                                                      .GetType());

                    // store all inflectional variants, except for regular ones
                    // unless explicitly set
                    if (inflType != null &&
                        !(Inflection.REGULAR.Equals(inflType) && !this.keepStandardInflections))
                    {
                        wordElement.addInflectionalVariant(inflType,
                                                           simplenlgInflection, inflectedForm);
                    }

                    // if the infl variant is the default, also set this feature on
                    // the word
                    if (defaultInfl == null ||
                        (defaultInfl.Equals(inflType) && !(Inflection.REGULAR
                                                           .Equals(inflType) && !this.keepStandardInflections)))
                    {
                        wordElement.setFeature(simplenlgInflection, inflectedForm);
                    }

                    // wordElement
                    // .setFeature(simplenlgInflection, inflection.GetVar());
                }
            }
            // }

            // add acronym info
            addAcronymInfo(wordElement, record);

            // now add spelling variants
            addSpellingVariants(wordElement, record);

            return(wordElement);
        }