/** * get matching keys from an index map * * @param indexKey * @param category * @param indexMap * @return */ private List <WordElement> getWordsFromIndex(string indexKey, ILexicalCategory category, Dictionary <string, List <WordElement> > indexMap) { var result = new List <WordElement>(); // case 1: unknown, return empty list if (!indexMap.ContainsKey(indexKey)) { return(result); } // case 2: category is ANY, return everything if (category.enumType == (int)LexicalCategoryEnum.ANY) { foreach (var word in indexMap[indexKey]) { result.Add(new WordElement(word)); } return(result); } else { // case 3: other category, search for match foreach (var word in indexMap[indexKey]) { if (word.getCategory().enumType == category.enumType) { result.Add(new WordElement(word)); } } } return(result); }
/** * Create an inflected word element. InflectedWordElement represents a word * that already specifies the morphological and other features that it * should exhibit in a realisation. While normally, phrases are constructed * using <code>WordElement</code>s, and features are set on phrases, it is * sometimes desirable to set features directly on words (for example, when * one wants to elide a specific word, but not its parent phrase). * * <P> * If the object passed is already a <code>WordElement</code>, then a new * * <code>InflectedWordElement<code> is returned which wraps this <code>WordElement</code> * . If the object is a <code>string</code>, then the * <code>WordElement</code> representing this <code>string</code> is looked * up, and a new * <code>InflectedWordElement<code> wrapping this is returned. If no such <code>WordElement</code> * is found, the element returned is an <code>InflectedWordElement</code> * with the supplied string as baseform and no base <code>WordElement</code> * . If an <code>NLGElement</code> is passed, this is returned unchanged. * * @param word * the word * @param category * the category * @return an <code>InflectedWordElement</code>, or the original supplied * object if it is an <code>NLGElement</code>. */ public INLGElement createInflectedWord(object word, ILexicalCategory category) { // first get the word element INLGElement inflElement = null; if (word is WordElement) { inflElement = new InflectedWordElement((WordElement)word); } else if (word is string) { var baseword = createWord((string)word, category); if (baseword != null && baseword is WordElement) { inflElement = new InflectedWordElement((WordElement)baseword); } else { inflElement = new InflectedWordElement((string)word, category); } } else if (word is INLGElement) { inflElement = (INLGElement)word; } return(inflElement); }
/** * Creates a new element representing a word. If the word passed is already * an <code>NLGElement</code> then that is returned unchanged. If a * <code>string</code> is passed as the word then the factory will look up * the <code>Lexicon</code> if one exists and use the details found to * create a new <code>WordElement</code>. * * @param word * the base word for the new element. This can be a * <code>NLGElement</code>, which is returned unchanged, or a * <code>string</code>, which is used to construct a new * <code>WordElement</code>. * @param category * the <code>LexicalCategory</code> for the word. * * @return an <code>NLGElement</code> representing the word. */ public WordElement createWord(object word, ILexicalCategory category) { WordElement wordElement = null; if (word is WordElement) { wordElement = (WordElement)word; } else if (word is string && this.lexicon != null) { // AG: change: should create a WordElement, not an // InflectedWordElement // wordElement = new InflectedWordElement( // (string) word, category); // if (this.lexicon != null) { // doLexiconLookUp(category, (string) word, wordElement); // } // wordElement = lexicon.getWord((string) word, category); wordElement = lexicon.lookupWord((string)word, category); if (PRONOUNS.Contains((string)word)) { setPronounFeatures(wordElement, (string)word); } } return(wordElement); }
/** * return <code>true</code> if the lexicon contains a WordElement which * matches the specified variant form and category * * @param variant * - base form, inflected form, or spelling variant of word * @param category * - syntactic category of word (ANY for unknown) * @return <code>true</code> if Lexicon contains such a WordElement */ public bool hasWordFromVariant(string variant, ILexicalCategory category) { // convenience // method // derived // from // other // methods) // { return(!getWordsFromVariant(variant, category).isEmpty()); }
/** * return <code>true</code> if the lexicon contains a WordElement which has * the specified base form and category * * @param baseForm * - base form of word, eg "be" or "dog" (not "is" or "dogs") * @param category * - syntactic category of word (ANY for unknown) * @return <code>true</code> if Lexicon contains such a WordElement */ public bool hasWord(string baseForm, ILexicalCategory category) { // convenience // method // derived // from // other // methods) // { return(!getWords(baseForm, category).isEmpty()); }
/** * this method creates an NLGElement from an object If object is null, * return null If the object is already an NLGElement, it is returned * unchanged Exception: if it is an InflectedWordElement, return underlying * WordElement If it is a string which matches a lexicon entry or pronoun, * the relevant WordElement is returned If it is a different string, a * wordElement is created if the string is a single word Otherwise a * StringElement is returned Otherwise throw an exception * * @param element * - object to look up * @param category * - default lexical category of object * @return NLGelement */ public INLGElement createNLGElement(object element, ILexicalCategory category) { if (element == null) { return(null); } // InflectedWordElement - return underlying word else if (element is InflectedWordElement) { return(((InflectedWordElement)element).getBaseWord()); } // StringElement - look up in lexicon if it is a word // otherwise return element else if (element is StringElement) { if (stringIsWord(((StringElement)element).getRealisation(), category)) { return(createWord(((StringElement)element).getRealisation(), category)); } else { return((StringElement)element); } } // other NLGElement - return element else if (element is INLGElement) { return((INLGElement)element); } // string - look up in lexicon if a word, otherwise return StringElement else if (element is string) { if (stringIsWord((string)element, category)) { return(createWord(element, category)); } else { return(new StringElement((string)element)); } } throw new ArgumentException(element.ToString() + " is not a valid type"); }
Inflection defaultInfl; // the default inflectional variant // LexicalCategory category; // type of word /**********************************************************/ // constructors /**********************************************************/ /** * create a WordElement with the specified baseForm, category, ID * * @param baseForm * - base form of WordElement * @param category * - category of WordElement * @param id * - ID of word in lexicon */ public WordElement(string baseForm = null, ILexicalCategory category = null, string id = null) { this.baseForm = baseForm; if (category == null) { setCategory(new LexicalCategory_ANY()); } else { setCategory(category); } this.id = id; inflVars = new Dictionary <Inflection, InflectionSet>(); }
/** * returns a WordElement which has the specified inflected form and/or * spelling variant that matches the specified variant, of the specified * category * * @param variant * - base form, inflected form, or spelling variant of word * @param category * - syntactic category of word (ANY for unknown) * @return a matching WordElement (if found), otherwise a new word is * created using thie variant as the base form */ public WordElement getWordFromVariant(string variant, ILexicalCategory category) { var wordElements = getWordsFromVariant(variant, category); if (wordElements.isEmpty()) { return(createWord(variant, category)); // return default WordElement } // using variant as base // form else { return(selectMatchingWord(wordElements, variant)); } }
/** * Realises the key word of the interrogative. For example, <em>who</em>, * <em>what</em> * * @param keyWord * the key word of the interrogative. * @param cat * the category (usually pronoun, but not in the case of * "how many") * @param parent * the parent <code>SyntaxProcessor</code> that will do the * realisation of the complementiser. * @param realisedElement * the current realisation of the clause. * @param phraseFactory * the phrase factory to be used. */ private static void realiseInterrogativeKeyWord(string keyWord, ILexicalCategory cat, SyntaxProcessor parent, ListElement realisedElement, NLGFactory phraseFactory) { if (keyWord != null) { var question = phraseFactory.createWord(keyWord, cat); var currentElement = parent.realise(question); if (currentElement != null) { realisedElement.addComponent(currentElement); } } }
/* (non-Javadoc) * @see simplenlg.lexicon.Lexicon#getWordsFromVariant(java.lang.string, simplenlg.features.LexicalCategory) */ public override List <WordElement> getWordsFromVariant(string variant, ILexicalCategory category) { var result = new List <WordElement>(); foreach (var lex in lexiconList) { var lexResult = lex.getWordsFromVariant(variant, category); if (lexResult != null && !lexResult.isEmpty()) { result.AddRange(lexResult); if (!alwaysSearchAll) { return(result); } } } return(result); }
/***************************************************************************/ // default methods for looking up words // These try the following (in this order) // 1) word with matching base // 2) word with matching variant // 3) word with matching ID // 4) create a new workd /***************************************************************************/ /** * General word lookup method, tries base form, variant, ID (in this order) * Creates new word if can't find existing word * * @param baseForm * @param category * @return word */ public WordElement lookupWord(string baseForm, ILexicalCategory category) { if (hasWord(baseForm, category)) { return(getWord(baseForm, category)); } else if (hasWordFromVariant(baseForm, category)) { return(getWordFromVariant(baseForm, category)); } else if (hasWordByID(baseForm)) { return(getWordByID(baseForm)); } else { return(createWord(baseForm, category)); } }
/** * get a WordElement which has the specified base form and category * * @param baseForm * - base form of word, eg "be" or "dog" (not "is" or "dogs") * @param category * - syntactic category of word (ANY for unknown) * @return if Lexicon contains such a WordElement, it is returned (the first * match is returned if there are several matches). If the Lexicon * does not contain such a WordElement, a new WordElement is created * and returned */ public WordElement getWord(string baseForm, ILexicalCategory category) { // convenience // method // derived // from // other // methods var wordElements = getWords(baseForm, category); if (wordElements.isEmpty()) { return(createWord(baseForm, category)); // return default WordElement } // of this baseForm, // category else { return(selectMatchingWord(wordElements, baseForm)); } }
/** * A helper method to look up the lexicon for the given word. * * @param category * the <code>LexicalCategory</code> of the word. * @param word * the base form of the word. * @param wordElement * the created element representing the word. */ private void doLexiconLookUp(ILexicalCategory category, string word, INLGElement wordElement) { WordElement baseWord = null; if (category.lexType == LexicalCategoryEnum.NOUN && this.lexicon.hasWord(word, new LexicalCategory_PRONOUN())) { baseWord = this.lexicon.lookupWord(word, new LexicalCategory_PRONOUN()); if (baseWord != null) { wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord); wordElement.setCategory(new LexicalCategory_PRONOUN()); if (!PRONOUNS.Contains(word)) { wordElement.setFeature(InternalFeature.NON_MORPH.ToString(), true); } } } else { baseWord = this.lexicon.lookupWord(word, category); wordElement.setFeature(InternalFeature.BASE_WORD.ToString(), baseWord); } }
/* * (non-Javadoc) * * @see simplenlg.lexicon.Lexicon#getWordsFromVariant(java.lang.string, * simplenlg.features.LexicalCategory) */ public override List <WordElement> getWordsFromVariant(string variant, ILexicalCategory category) { return(getWordsFromIndex(variant, category, indexByVariant)); }
/******************************************************************************************/ // main methods to get data from lexicon /******************************************************************************************/ /* * (non-Javadoc) * * @see simplenlg.lexicon.Lexicon#getWords(java.lang.string, * simplenlg.features.LexicalCategory) */ public override List <WordElement> getWords(string baseForm, ILexicalCategory category) { return(getWordsFromIndex(baseForm, category, indexByBase)); }
/****************************************************************************/ // get words by variant - try to return a WordElement given an inflectional // or spelling // variant. For the moment, acronyms are considered as separate words, not // variants // (this may change in the future) // fundamental version is getWordsFromVariant(string baseForm, Category // category), // this must be defined by subclasses. Other versions are convenience // methods. These may be overriden for efficiency, but this is not required. /****************************************************************************/ /** * returns Words which have an inflected form and/or spelling variant that * matches the specified variant, and are in the specified category. <br> * <I>Note:</I> the returned word list may not be complete, it depends on * how it is implemented by the underlying lexicon * * @param variant * - base form, inflected form, or spelling variant of word * @param category * - syntactic category of word (ANY for unknown) * @return list of all matching Words (empty list if no matching WordElement * found) */ public abstract List <WordElement> getWordsFromVariant(string variant, ILexicalCategory category);
/** * return true if string is a word * * @param string * @param category * @return */ private bool stringIsWord(string str, ILexicalCategory category) { return(lexicon != null && (lexicon.hasWord(str, category) || PRONOUNS.Contains(str) || (str.matches(WORD_REGEX)))); }
/** * create a default WordElement. May be overridden by specific types of * lexicon * * @param baseForm * - base form of word * @param category * - category of word * @return WordElement entry for specified info */ protected WordElement createWord(string baseForm, ILexicalCategory category) { return(new WordElement(baseForm, category)); // return default // WordElement of this // baseForm, category }
/****************************************************************************/ // get words by baseform and category // fundamental version is getWords(string baseForm, Category category), // this must be defined by subclasses. Other versions are convenience // methods. These may be overriden for efficiency, but this is not required. /****************************************************************************/ /** * returns all Words which have the specified base form and category * * @param baseForm * - base form of word, eg "be" or "dog" (not "is" or "dogs") * @param category * - syntactic category of word (ANY for unknown) * @return collection of all matching Words (may be empty) */ public abstract List <WordElement> getWords(string baseForm, ILexicalCategory category);
/** * make a WordElement from a lexical record. Currently just specifies basic * params and inflections Should do more in the future! * * @param record * @return */ private WordElement makeWord(LexRecord record) { // get basic data string baseForm = record.GetBase(); ILexicalCategory category = getSimplenlgCategory(record); string id = record.GetEui(); // create word class var wordElement = new WordElement(baseForm, (LexicalCategory)category, id); // now add type information switch (category.lexType) { case LexicalCategoryEnum.ADJECTIVE: addAdjectiveInfo(wordElement, record.GetCatEntry().GetAdjEntry()); break; case LexicalCategoryEnum.ADVERB: addAdverbInfo(wordElement, record.GetCatEntry().GetAdvEntry()); break; case LexicalCategoryEnum.NOUN: addNounInfo(wordElement, record.GetCatEntry().GetNounEntry()); break; case LexicalCategoryEnum.VERB: addVerbInfo(wordElement, record.GetCatEntry().GetVerbEntry()); break; // ignore closed class words } var defaultInfl = (Inflection)wordElement .getDefaultInflectionalVariant(); // now add inflected forms // if (keepStandardInflections || !standardInflections(record, // category)) { foreach (InflVar inflection in record.GetInflVarsAndAgreements() .GetInflValues()) { string simplenlgInflection = getSimplenlgInflection(inflection .GetInflection()); if (simplenlgInflection != null) { string inflectedForm = inflection.GetVar(); Inflection inflType = Inflection.getInflCode(inflection .GetType()); // store all inflectional variants, except for regular ones // unless explicitly set if (inflType != null && !(Inflection.REGULAR.Equals(inflType) && !this.keepStandardInflections)) { wordElement.addInflectionalVariant(inflType, simplenlgInflection, inflectedForm); } // if the infl variant is the default, also set this feature on // the word if (defaultInfl == null || (defaultInfl.Equals(inflType) && !(Inflection.REGULAR .Equals(inflType) && !this.keepStandardInflections))) { wordElement.setFeature(simplenlgInflection, inflectedForm); } // wordElement // .setFeature(simplenlgInflection, inflection.GetVar()); } } // } // add acronym info addAcronymInfo(wordElement, record); // now add spelling variants addSpellingVariants(wordElement, record); return(wordElement); }