Beispiel #1
0
        /**
         * Determines the maximim position at which this modifier can occur.
         *
         * @param modifier
         *            the modifier to be checked.
         * @return the maximum position for this modifier.
         */
        private static int getMaxPos(NLGElement modifier)
        {
            int position = NOUN_POSITION;

            if (modifier.isA(new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ADJECTIVE)) || modifier.isA(new PhraseCategory(PhraseCategory.PhraseCategoryEnum.ADJECTIVE_PHRASE)))
            {
                WordElement adjective = getHeadWordElement(modifier);

                if (adjective.getFeatureAsBoolean(LexicalFeature.CLASSIFYING))
                {
                    position = CLASSIFYING_POSITION;
                }
                else if (adjective.getFeatureAsBoolean(LexicalFeature.COLOUR))
                {
                    position = COLOUR_POSITION;
                }
                else if (adjective.getFeatureAsBoolean(LexicalFeature.QUALITATIVE))
                {
                    position = QUALITATIVE_POSITION;
                }
                else
                {
                    position = CLASSIFYING_POSITION;
                }
            }
            return(position);
        }
Beispiel #2
0
        /// <summary>
        ///     合并多行注释
        /// </summary>
        /// <param name="codeElement"></param>
        private void MergeMulitRem(AnalyzeElement codeElement)
        {
            var last = codeElement.Elements[codeElement.Elements.Count - 1];

            codeElement.Elements.RemoveAt(codeElement.Elements.Count - 1);
            var element = codeElement.Elements[codeElement.Elements.Count - 1];

            element.ItemRace           = CodeItemRace.Assist;
            element.ItemFamily         = CodeItemFamily.Rem;
            element.Word              += last.Word;
            codeElement.CurrentElement = element;

            bool isShift = false;

            for (CurWordIndex++; CurWordIndex < WordElements.Count; CurWordIndex++)
            {
                WordElement curElement = WordElements[CurWordIndex];
                element.Word += curElement.RealWord;
                if (isShift && curElement.Char == '/')//注释结束
                {
                    return;
                }
                isShift = curElement.Char == '*';
            }
        }
Beispiel #3
0
        /**
         * extract adj information from NIH AdjEntry record, and add to a simplenlg
         * WordElement For now just extract position info
         *
         * @param wordElement
         * @param AdjEntry
         */
        private void addAdjectiveInfo(WordElement wordElement, AdjEntry adjEntry)
        {
            bool          qualitativeAdj = false;
            bool          colourAdj      = false;
            bool          classifyingAdj = false;
            bool          predicativeAdj = false;
            List <string> positions      = adjEntry.GetPosition();

            foreach (string position in positions)
            {
                if (position.StartsWith("attrib(1)", StringComparison.Ordinal))
                {
                    qualitativeAdj = true;
                }
                else if (position.StartsWith("attrib(2)", StringComparison.Ordinal))
                {
                    colourAdj = true;
                }
                else if (position.StartsWith("attrib(3)", StringComparison.Ordinal))
                {
                    classifyingAdj = true;
                }
                else if (position.StartsWith("pred", StringComparison.Ordinal))
                {
                    predicativeAdj = true;
                }
                // ignore other positions
            }
            // ignore (for now) other info in record
            wordElement.setFeature(LexicalFeature.QUALITATIVE, qualitativeAdj);
            wordElement.setFeature(LexicalFeature.COLOUR, colourAdj);
            wordElement.setFeature(LexicalFeature.CLASSIFYING, classifyingAdj);
            wordElement.setFeature(LexicalFeature.PREDICATIVE, predicativeAdj);
        }
        private IList <string> breakWord(string word, HashSet <string> wordDict)
        {
            Queue <WordElement> q = new Queue <WordElement>();

            q.Enqueue(new WordElement(0));
            IList <string> result = new List <string>();

            while (q.Count > 0)
            {
                WordElement ele = q.Dequeue();
                if (ele.nextindex >= word.Length)
                {
                    result.Add(ele.result);
                }

                for (int i = ele.nextindex; i <= word.Length; i++)
                {
                    string possibleWord = word.Substring(ele.nextindex, i - ele.nextindex);
                    if (wordDict.Contains(possibleWord))
                    {
                        WordElement newEle = new WordElement(i, ele.result);
                        string      temp   = newEle.result != null ? newEle.result : null;
                        newEle.result = temp != null ? temp + " " + possibleWord : possibleWord;
                        q.Enqueue(newEle);
                    }
                }
            }

            return(result);
        }
        public virtual void verbInflectionalVariantsTest()
        {
            WordElement word = lexicon.getWord("lie", new LexicalCategory(LexicalCategory.LexicalCategoryEnum.VERB));

            Assert.AreEqual(Inflection.REGULAR, word.getDefaultInflectionalVariant());


            // default past is "lied"
            InflectedWordElement infl = new InflectedWordElement(word);

            infl.setFeature(Feature.TENSE, Tense.PAST);
            string past = realiser.realise(infl).Realisation;

            Assert.AreEqual("lied", past);

            // switch to irregular
            word.setDefaultInflectionalVariant(Inflection.IRREGULAR);
            infl = new InflectedWordElement(word);
            infl.setFeature(Feature.TENSE, Tense.PAST);
            past = realiser.realise(infl).Realisation;
            Assert.AreEqual("lay", past);

            // switch back to regular
            word.setDefaultInflectionalVariant(Inflection.REGULAR);
            Assert.AreEqual(null, word.getFeature(LexicalFeature.PAST));
            infl = new InflectedWordElement(word);
            infl.setFeature(Feature.TENSE, Tense.PAST);
            past = realiser.realise(infl).Realisation;
            Assert.AreEqual("lied", past);
        }
Beispiel #6
0
        /**
         * extract verb information from NIH VerbEntry record, and add to a
         * simplenlg WordElement For now just extract transitive, instransitive,
         * and/or ditransitive
         *
         * @param wordElement
         * @param verbEntry
         */
        private void addVerbInfo(WordElement wordElement, VerbEntry verbEntry)
        {
            if (verbEntry == null)
            { // should only happen for aux verbs, which have
              // auxEntry instead of verbEntry in NIH Lex
              // just flag as transitive and return
                wordElement.setFeature(LexicalFeature.INTRANSITIVE, false);
                wordElement.setFeature(LexicalFeature.TRANSITIVE, true);
                wordElement.setFeature(LexicalFeature.DITRANSITIVE, false);
                return;
            }

            bool intransitiveVerb = verbEntry.GetIntran().Any();
            bool transitiveVerb   = verbEntry.GetTran().Any() || verbEntry.GetCplxtran().Any();
            bool ditransitiveVerb = verbEntry.GetDitran().Any();

            wordElement.setFeature(LexicalFeature.INTRANSITIVE, intransitiveVerb);
            wordElement.setFeature(LexicalFeature.TRANSITIVE, transitiveVerb);
            wordElement.setFeature(LexicalFeature.DITRANSITIVE, ditransitiveVerb);

            // add the inflectional variants
            List <string> variants = verbEntry.GetVariants();

            if (variants.Count > 0)
            {
                IList <Inflection> wordVariants = new List <Inflection>();

                foreach (string v in variants)
                {
                    int        index = v.IndexOf("|", StringComparison.Ordinal);
                    string     code;
                    Inflection?infl;

                    if (index > -1)
                    {
                        code = v.Substring(0, index).ToLower().Trim();
                        infl = Inflection.REGULAR.getInflCode(code);
                    }
                    else
                    {
                        infl = Inflection.REGULAR.getInflCode(v.ToLower().Trim());
                    }

                    if (infl != null)
                    {
                        wordElement.addInflectionalVariant((Inflection)infl);
                        wordVariants.Add((Inflection)infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise
                // just a random pick
                Inflection defaultVariant = wordVariants.Contains(Inflection.REGULAR) || wordVariants.Count == 0 ? Inflection.REGULAR : wordVariants[0];
                //			wordElement.setFeature(LexicalFeature.INFLECTIONS, wordVariants);
                //			wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // ignore (for now) other info in record
        }
        public virtual void nounInflectionalVariantsTest()
        {
            WordElement word =
                lexicon.getWord("sanctum", new LexicalCategory(LexicalCategory.LexicalCategoryEnum.NOUN));

            Assert.AreEqual(Inflection.REGULAR, word.getDefaultInflectionalVariant());


            // reg plural shouldn't be stored
            Assert.AreEqual(null, word.getFeature(LexicalFeature.PLURAL));
            InflectedWordElement infl = new InflectedWordElement(word);

            infl.setFeature(Feature.NUMBER, NumberAgreement.PLURAL);
            string plur = realiser.realise(infl).Realisation;

            Assert.AreEqual("sanctums", plur);

            // switch to glreg
            word.setDefaultInflectionalVariant(Inflection.GRECO_LATIN_REGULAR);
            infl = new InflectedWordElement(word);
            infl.setFeature(Feature.NUMBER, NumberAgreement.PLURAL);
            plur = realiser.realise(infl).Realisation;
            Assert.AreEqual("sancta", plur);

            // and back to reg
            word.setDefaultInflectionalVariant(Inflection.REGULAR);
            infl = new InflectedWordElement(word);
            infl.setFeature(Feature.NUMBER, NumberAgreement.PLURAL);
            plur = realiser.realise(infl).Realisation;
            Assert.AreEqual("sanctums", plur);
        }
Beispiel #8
0
        /**
         * quick-and-dirty routine for getting morph variants should be replaced by
         * something better!
         *
         * @param word
         * @return
         */
        private ISet <string> getVariants(WordElement word)
        {
            ISet <string> variants = new HashSet <string>();

            variants.Add(word.BaseForm);
            ElementCategory category = word.Category;

            if (category is LexicalCategory)
            {
                switch (((LexicalCategory)category).GetLexicalCategory())
                {
                case LexicalCategory.LexicalCategoryEnum.NOUN:
                    variants.Add(getVariant(word, LexicalFeature.PLURAL, "s"));
                    break;

                case LexicalCategory.LexicalCategoryEnum.ADJECTIVE:
                    variants.Add(getVariant(word, LexicalFeature.COMPARATIVE, "er"));
                    variants.Add(getVariant(word, LexicalFeature.SUPERLATIVE, "est"));
                    break;

                case LexicalCategory.LexicalCategoryEnum.VERB:
                    variants.Add(getVariant(word, LexicalFeature.PRESENT3S, "s"));
                    variants.Add(getVariant(word, LexicalFeature.PAST, "ed"));
                    variants.Add(getVariant(word, LexicalFeature.PAST_PARTICIPLE, "ed"));
                    variants.Add(getVariant(word, LexicalFeature.PRESENT_PARTICIPLE, "ing"));
                    break;

                default:
                    // only base needed for other forms
                    break;
                }
            }
            return(variants);
        }
Beispiel #9
0
        /**
         * add word to internal indices
         *
         * @param word
         */
        private void IndexWord(WordElement word)
        {
            // first index by base form
            string @base = word.BaseForm;

            // shouldn't really need is, as all words have base forms
            if (!ReferenceEquals(@base, null))
            {
                updateIndex(word, @base, indexByBase);
            }

            // now index by ID, which should be unique (if present)
            string id = word.Id;

            if (!ReferenceEquals(id, null))
            {
                if (indexByID.ContainsKey(id))
                {
                    Console.WriteLine("Lexicon error: ID " + id + " occurs more than once");
                }
                indexByID[id] = word;
            }

            // now index by variant
            foreach (string variant in getVariants(word))
            {
                updateIndex(word, variant, indexByVariant);
            }

            // done
        }
        public virtual void spellingVariantsInNPTest()
        {
            WordElement asd = lexicon.getWord("Adams-Stokes disease");

            Assert.AreEqual("Adams-Stokes disease", asd.DefaultSpellingVariant);
            NPPhraseSpec np = factory.createNounPhrase(asd);

            np.setSpecifier(lexicon.getWord("the"));
            Assert.AreEqual("the Adams-Stokes disease", realiser.realise(np).Realisation);


            // change spelling var
            asd.DefaultSpellingVariant = "Adams Stokes disease";
            Assert.AreEqual("Adams Stokes disease", asd.DefaultSpellingVariant);
            Assert.AreEqual("the Adams Stokes disease", realiser.realise(np).Realisation);


            //default infl for this word is uncount
            np.setFeature(Feature.NUMBER, NumberAgreement.PLURAL);
            Assert.AreEqual("the Adams Stokes disease", realiser.realise(np).Realisation);


            //change default infl for this word
            asd.setDefaultInflectionalVariant(Inflection.REGULAR);
            Assert.AreEqual("the Adams Stokes diseases", realiser.realise(np).Realisation);
        }
Beispiel #11
0
        ///// <summary>
        ///// 组合代码
        ///// </summary>
        ///// <param name="isCodeBlock"></param>
        ///// <param name="codeElement"></param>
        //void JoinCode(bool isCodeBlock, WordElement codeElement)
        //{
        //    if (contentElement != null)
        //    {
        //        if (isCodeBlock)
        //        {
        //            AppendContentCode(contentElement);
        //            contentElement = null;
        //        }
        //        else
        //        {
        //            contentElement.Append(codeElement.Element.End, $"{{{codeElement.RealWord.Trim()}}}");
        //        }
        //    }
        //    else if (!isCodeBlock)
        //    {
        //        Codes.Add($"strResult.Append({codeElement.RealWord.Trim()});");
        //    }
        //    if (isCodeBlock && codeElement != null)
        //    {
        //        Codes.Add(codeElement.RealWord);
        //    }
        //    if (CurWordIndex < this.WordElements.Count && this.WordElements[CurWordIndex].Element.IsSpace)
        //    {
        //        CurWordIndex--; //空白还回去
        //    }
        //}



        #endregion

        #region 特殊单元的组合

        /// <summary>
        ///     处理当前语言内容
        /// </summary>
        /// <param name="codeElement">语言节点</param>
        /// <param name="preChar"></param>
        public override void MergeLanWord(AnalyzeElement codeElement, ref char preChar)
        {
            WordElement cur = WordElements[CurWordIndex];

            codeElement.Append(cur);
            if (!cur.IsPunctuate)
            {
                preChar = '\0';
                return;
            }
            switch (cur.Char)
            {
                #region 注释

            case '/':
                if (preChar != '/')
                {
                    preChar = '/';
                    return;
                }
                MergeSinleRem(codeElement);
                break;

            case '*':
                if (preChar == '/')
                {
                    MergeMulitRem(codeElement);
                }
                break;

                #endregion

                #region 字符串

            case '@':
                preChar = '@';
                return;

            case '\"':
                if (preChar == '@')
                {
                    var block = codeElement.Elements[codeElement.Elements.Count - 2];
                    block.Append(cur);
                    codeElement.Elements.RemoveAt(codeElement.Elements.Count - 1);
                    MergeMulitString(block);
                }
                else
                {
                    MergeString(cur, '\"');
                }
                break;

            case '\'':
                MergeString(cur, '\'');
                break;

                #endregion
            }
            preChar = '\0';
        }
Beispiel #12
0
    public override bool Load(SecurityElement element)
    {
        if (element.Tag != "Items")
        {
            return(false);
        }

        if (element.Children != null)
        {
            foreach (SecurityElement childrenElement in element.Children)
            {
                if (childrenElement.Tag == "Item")
                {
                    WordElement wordElement = null;
                    if (!LoadWordElement(childrenElement, out wordElement))
                    {
                        continue;
                    }

                    _mWordElementList[wordElement.WordID] = wordElement;
                }
            }

            return(true);
        }
        return(false);
    }
Beispiel #13
0
        /**
         * extract adv information from NIH AdvEntry record, and add to a simplenlg
         * WordElement For now just extract modifier type
         *
         * @param wordElement
         * @param AdvEntry
         */
        private void addAdverbInfo(WordElement wordElement, AdvEntry advEntry)
        {
            bool verbModifier     = false;
            bool sentenceModifier = false;
            bool intensifier      = false;

            List <string> modifications = advEntry.GetModification();

            foreach (string modification in modifications)
            {
                if (modification.StartsWith("verb_modifier", StringComparison.Ordinal))
                {
                    verbModifier = true;
                }
                else if (modification.StartsWith("sentence_modifier", StringComparison.Ordinal))
                {
                    sentenceModifier = true;
                }
                else if (modification.StartsWith("intensifier", StringComparison.Ordinal))
                {
                    intensifier = true;
                }
                // ignore other modification types
            }
            // ignore (for now) other info in record
            wordElement.setFeature(LexicalFeature.VERB_MODIFIER, verbModifier);
            wordElement.setFeature(LexicalFeature.SENTENCE_MODIFIER, sentenceModifier);
            wordElement.setFeature(LexicalFeature.INTENSIFIER, intensifier);
        }
Beispiel #14
0
        /**
         * extract noun information from NIH NounEntry record, and add to a
         * simplenlg WordElement For now just extract whether count/non-count and
         * whether proper or not
         *
         * @param wordElement
         * @param nounEntry
         */
        private void addNounInfo(WordElement wordElement, NounEntry nounEntry)
        {
            bool proper = nounEntry.IsProper();
            // bool nonCountVariant = false;
            // bool regVariant = false;

            // add the inflectional variants
            List <string> variants = nounEntry.GetVariants();

            if (variants.Count > 0)
            {
                IList <Inflection> wordVariants = new List <Inflection>();

                foreach (string v in variants)
                {
                    int    index = v.IndexOf("|", StringComparison.Ordinal);
                    string code;

                    if (index > -1)
                    {
                        code = v.Substring(0, index).ToLower().Trim();
                    }
                    else
                    {
                        code = v.ToLower().Trim();
                    }

                    Inflection?infl = Inflection.REGULAR.getInflCode(code);

                    if (infl != null)
                    {
                        wordVariants.Add((Inflection)infl);
                        wordElement.addInflectionalVariant((Inflection)infl);
                    }
                }

                // if the variants include "reg", this is the default, otherwise just a random pick
                Inflection defaultVariant = wordVariants.Contains(Inflection.REGULAR) || wordVariants.Count == 0 ? Inflection.REGULAR : wordVariants[0];
                wordElement.setFeature(LexicalFeature.DEFAULT_INFL, defaultVariant);
                wordElement.setDefaultInflectionalVariant(defaultVariant);
            }

            // for (String variant : variants) {
            // if (variant.startsWith("uncount")
            // || variant.startsWith("groupuncount"))
            // nonCountVariant = true;
            //
            // if (variant.startsWith("reg"))
            // regVariant = true;
            // // ignore other variant info
            // }

            // lots of words have both "reg" and "unCount", indicating they
            // can be used in either way. Regard such words as normal,
            // only flag as nonCount if unambiguous
            // wordElement.setFeature(LexicalFeature.NON_COUNT, nonCountVariant && !regVariant);
            wordElement.setFeature(LexicalFeature.PROPER, proper);
            // ignore (for now) other info in record
        }
Beispiel #15
0
 /**
  * convenience method to update an index
  *
  * @param word
  * @param base
  * @param index
  */
 private void updateIndex(WordElement word, string @base, IDictionary <string, IList <WordElement> > index)
 {
     if (!index.ContainsKey(@base))
     {
         index[@base] = new List <WordElement>();
     }
     index[@base].Add(word);
 }
Beispiel #16
0
        public Phrase GetRandomSentence(WordElement word = null)
        {
            if (word == null)
            {
                word = GetRandomWord();
            }

            return(new Phrase(this, word));
        }
Beispiel #17
0
        /// <summary>
        ///     处理当前语言内容
        /// </summary>
        /// <param name="codeElement">语言节点</param>
        /// <param name="preChar"></param>
        public override void MergeLanWord(AnalyzeElement codeElement, ref char preChar)
        {
            WordElement cur = WordElements[CurWordIndex];

            codeElement.Append(cur);
            if (!cur.IsPunctuate)
            {
                preChar = '\0';
                return;
            }
            switch (cur.Char)
            {
                #region 注释

            case '-':
                if (preChar != '-')
                {
                    preChar = '-';
                    return;
                }
                {
                    var block = codeElement.Elements[codeElement.Elements.Count - 2];
                    block.Append(cur);
                    codeElement.Elements.RemoveAt(codeElement.Elements.Count - 1);
                    MergeLuaRem(block);
                    block.SetRace(CodeItemRace.Assist, CodeItemFamily.Rem);
                }
                break;

                #endregion

                #region 字符串

            case '\"':
            case '\'':
                MergeString(cur, cur.Char);
                break;

            case '[':
                if (preChar != '[')
                {
                    preChar = '[';
                    return;
                }
                {
                    var block = codeElement.Elements[codeElement.Elements.Count - 2];
                    block.Append(cur);
                    codeElement.Elements.RemoveAt(codeElement.Elements.Count - 1);
                    MergeMulitString(block);
                    block.SetRace(CodeItemRace.Value, CodeItemFamily.Constant, CodeItemType.String);
                }
                break;

                #endregion
            }
            preChar = '\0';
        }
Beispiel #18
0
        /**
         * This method performs the morphology for adjectives.
         *
         * @param element
         *            the <code>InflectedWordElement</code>.
         * @param baseWord
         *            the <code>WordElement</code> as created from the lexicon
         *            entry.
         * @return a <code>StringElement</code> representing the word after
         *         inflection.
         */
        public static NLGElement doAdjectiveMorphology(InflectedWordElement element, WordElement baseWord)
        {
            string realised     = null;
            object patternValue = element.getFeature(LexicalFeature.DEFAULT_INFL);

            // base form from baseWord if it exists, otherwise from element
            string baseForm = getBaseForm(element, baseWord);

            if (element.getFeatureAsBoolean(Feature.IS_COMPARATIVE))
            {
                realised = element.getFeatureAsString(LexicalFeature.COMPARATIVE);

                if (ReferenceEquals(realised, null) && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.COMPARATIVE);
                }
                if (ReferenceEquals(realised, null))
                {
                    if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                    {
                        realised = buildDoubleCompAdjective(baseForm);
                    }
                    else
                    {
                        realised = buildRegularComparative(baseForm);
                    }
                }
            }
            else if (element.getFeatureAsBoolean(Feature.IS_SUPERLATIVE))
            {
                realised = element.getFeatureAsString(LexicalFeature.SUPERLATIVE);

                if (ReferenceEquals(realised, null) && baseWord != null)
                {
                    realised = baseWord.getFeatureAsString(LexicalFeature.SUPERLATIVE);
                }
                if (ReferenceEquals(realised, null))
                {
                    if (Inflection.REGULAR_DOUBLE.Equals(patternValue))
                    {
                        realised = buildDoubleSuperAdjective(baseForm);
                    }
                    else
                    {
                        realised = buildRegularSuperlative(baseForm);
                    }
                }
            }
            else
            {
                realised = baseForm;
            }
            StringElement realisedElement = new StringElement(realised);

            realisedElement.setFeature(InternalFeature.DISCOURSE_FUNCTION, element.getFeature(InternalFeature.DISCOURSE_FUNCTION));
            return(realisedElement);
        }
        // note that addFrontModifier, addPostModifier, addPreModifier are inherited from PhraseElement
        // likewise getFrontModifiers, getPostModifiers, getPreModifiers


        /** Add a modifier to a verb phrase
         * Use heuristics to decide where it goes
         * @param modifier
         */
        public override void addModifier(object modifier)
        {
            // adverb is preModifier
            // string which is one lexicographic word is looked up in lexicon,
            // if it is an adverb than it becomes a preModifier
            // Everything else is postModifier

            if (modifier == null)
            {
                return;
            }

            // get modifier as NLGElement if possible
            NLGElement modifierElement = null;

            if (modifier is NLGElement)
            {
                modifierElement = (NLGElement)modifier;
            }
            else if (modifier is string)
            {
                string modifierString = (string)modifier;
                if (modifierString.Length > 0 && !modifierString.Contains(" "))
                {
                    modifierElement = Factory.createWord(modifier, new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ANY));
                }
            }

            // if no modifier element, must be a complex string
            if (modifierElement == null)
            {
                addPostModifier((string)modifier);
                return;
            }

            // extract WordElement if modifier is a single word
            WordElement modifierWord = null;

            if (modifierElement != null && modifierElement is WordElement)
            {
                modifierWord = (WordElement)modifierElement;
            }
            else if (modifierElement != null && modifierElement is InflectedWordElement)
            {
                modifierWord = ((InflectedWordElement)modifierElement).BaseWord;
            }

            if (modifierWord != null && modifierWord.Category == LexicalCategory.LexicalCategoryEnum.ADVERB)
            {
                addPreModifier(modifierWord);
                return;
            }

            // default case
            addPostModifier(modifierElement);
        }
Beispiel #20
0
        public virtual void acronymsTests()
        {
            WordElement        uk            = lexicon.getWord("UK");
            WordElement        unitedKingdom = lexicon.getWord("United Kingdom");
            IList <NLGElement> fullForms     = uk.getFeatureAsElementList(LexicalFeature.ACRONYM_OF);


            Assert.AreEqual(3, fullForms.Count);
            Assert.IsTrue(fullForms.Contains(unitedKingdom));
        }
Beispiel #21
0
        /**
         * Unwrap word element.
         *
         * @param wordElement
         *            the word element
         * @return the nLG element
         */
        private NLGElement UnwrapWordElement(XmlWordElement wordElement)
        {
            NLGElement word = null;

            if (wordElement != null)
            {
                if (true.Equals(wordElement.Canned))
                {
                    word = factory.createStringElement(wordElement.Base);
                }
                else
                {
                    LexicalCategory lexCat = new LexicalCategory(LexicalCategory.LexicalCategoryEnum.ANY);
                    ElementCategory cat    = UnwrapCategory(wordElement.Cat);

                    if (cat != null && cat is LexicalCategory)
                    {
                        lexCat = (LexicalCategory)cat;
                    }

                    // String baseForm = getBaseWord(wordElement);
                    string baseForm = wordElement.Base;

                    if (!ReferenceEquals(baseForm, null))
                    {
                        word = factory.createWord(baseForm, lexCat);

                        if (word is InflectedWordElement && ((InflectedWordElement)word).BaseWord.BaseForm.Length == 0)
                        {
                            word = null;                             // cch TESTING
                        }
                        else if (word is WordElement)
                        {
                            WordElement we = (WordElement)word;

                            // Inflection
                            if (wordElement.Var != null)
                            {
                                Enum.TryParse(wordElement.Var.ToString(), out Inflection defaultInflection);
                                we.setDefaultInflectionalVariant(defaultInflection);
                            }

                            // Spelling variant may have been given as base form in xml.
                            // If so, use that variant.
                            if (!Regex.IsMatch(baseForm, "^" + we.BaseForm + "$"))
                            {
                                we.DefaultSpellingVariant = baseForm;
                            }
                        }
                    }
                }
            }

            return(word);
        }
Beispiel #22
0
 /**
  * quick-and-dirty routine for computing morph forms Should be replaced by
  * something better!
  *
  * @param word
  * @param feature
  * @param string
  * @return
  */
 private string getVariant(WordElement word, string feature, string suffix)
 {
     if (word.hasFeature(feature))
     {
         return(word.getFeatureAsString(feature));
     }
     else
     {
         return(getForm(word.BaseForm, suffix));
     }
 }
Beispiel #23
0
        ///**
        // * Extract info about the spelling variants of a word from an NIH record,
        // * and add to the simplenlg Woordelement.
        // *
        // * <P>
        // * Spelling variants are represented as lists of strings, retrievable via
        // * {@link LexicalFeature#SPELL_VARS}
        // *
        // * @param wordElement
        // * @param record
        // */
        private void addSpellingVariants(WordElement wordElement, LexRecord record)
        {
            List <string> vars = record.GetSpellingVars();

            if (vars != null && vars.Count > 0)
            {
                wordElement.setFeature(LexicalFeature.SPELL_VARS, vars);
            }

            // we set the default spelling var as the baseForm
            wordElement.setFeature(LexicalFeature.DEFAULT_SPELL, wordElement.BaseForm);
        }
Beispiel #24
0
        /**
         * return the base form of a word
         *
         * @param element
         * @param baseWord
         * @return
         */
        private static string getBaseForm(InflectedWordElement element, WordElement baseWord)
        {
            // unclear what the right behaviour should be
            // for now, prefer baseWord.getBaseForm() to element.getBaseForm() for
            // verbs (ie, "is" mapped to "be")
            // but prefer element.getBaseForm() to baseWord.getBaseForm() for other
            // words (ie, "children" not mapped to "child")

            // AG: changed this to get the default spelling variant
            // needed to preserve spelling changes in the VP

            if (element.Category == LexicalCategory.LexicalCategoryEnum.VERB)
            {
                if (baseWord != null && baseWord.DefaultSpellingVariant != null)
                {
                    return(baseWord.DefaultSpellingVariant);
                }
                else
                {
                    return(element.BaseForm);
                }
            }
            else
            {
                if (element.BaseForm != null)
                {
                    return(element.BaseForm);
                }
                else if (baseWord == null)
                {
                    return(null);
                }
                else
                {
                    return(baseWord.DefaultSpellingVariant);
                }
            }

            // if (LexicalCategory.VERB == element.getCategory()) {
            // if (baseWord != null && baseWord.getBaseForm() != null)
            // return baseWord.getBaseForm();
            // else
            // return element.getBaseForm();
            // } else {
            // if (element.getBaseForm() != null)
            // return element.getBaseForm();
            // else if (baseWord == null)
            // return null;
            // else
            // return baseWord.getBaseForm();
            // }
        }
        public virtual void spellingVariantsInVPTest()
        {
            WordElement eth = (WordElement)factory.createWord("etherise",
                                                              new LexicalCategory(LexicalCategory.LexicalCategoryEnum.VERB));

            Assert.AreEqual("etherize", eth.DefaultSpellingVariant);
            eth.DefaultSpellingVariant = "etherise";
            Assert.AreEqual("etherise", eth.DefaultSpellingVariant);
            SPhraseSpec s = factory.createClause(factory.createNounPhrase("the", "doctor"), eth,
                                                 factory.createNounPhrase("the patient"));

            Assert.AreEqual("the doctor etherises the patient", realiser.realise(s).Realisation);
        }
Beispiel #26
0
        public static bool equals(this WordElement o, WordElement d)
        {
            if (o != null)
            {
                var we = o;

                return((o.baseForm == d.baseForm || o.baseForm
                        .Equals(d.baseForm)) &&
                       (o.id == d.id || o.id.Equals(d.id)) &&
                       d.features.equals(o.features));
            }

            return(false);
        }
Beispiel #27
0
        /**
         * add special cases to lexicon
         *
         */
        private void addSpecialCases()
        {
            // add variants of "be"
            WordElement be = getWord("be", new LexicalCategory(LexicalCategory.LexicalCategoryEnum.VERB));

            if (be != null)
            {
                updateIndex(be, "is", indexByVariant);
                updateIndex(be, "am", indexByVariant);
                updateIndex(be, "are", indexByVariant);
                updateIndex(be, "was", indexByVariant);
                updateIndex(be, "were", indexByVariant);
            }
        }
Beispiel #28
0
        /// <summary>
        ///     合并注释
        /// </summary>
        /// <param name="element"></param>
        private void MergeLuaRem(WordElement element)
        {
            if (IsWithLineEnd())
            {
                return;
            }
            element.Append(WordElements[++CurWordIndex]);
            var a = WordElements[CurWordIndex].Char;

            if (IsWithLineEnd())
            {
                return;
            }
            element.Append(WordElements[++CurWordIndex]);
            var b = WordElements[CurWordIndex].Char;

            if (a == '[' && b == '[') //多行注释
            {
                bool isShift = false;
                for (CurWordIndex++; CurWordIndex < WordElements.Count; CurWordIndex++)
                {
                    element.Append(WordElements[++CurWordIndex]);
                    if (WordElements[CurWordIndex].Char == ']') //多行注释结束
                    {
                        if (isShift)
                        {
                            return;
                        }
                        isShift = true;
                    }
                    else if (isShift)
                    {
                        isShift = false;
                    }
                }
            }
            else
            {
                for (CurWordIndex++; CurWordIndex < WordElements.Count; CurWordIndex++)
                {
                    if (WordElements[CurWordIndex].IsLine) //行已结束
                    {
                        --CurWordIndex;                    //回退
                        return;
                    }
                    element.Append(WordElements[++CurWordIndex]);
                }
            }
        }
Beispiel #29
0
        ///**
        // * convenience method to test that a list is not null and not empty
        // *
        // * @param list
        // * @return
        // */
        //private bool notEmpty<T1>(IList<T1> list)
        //{
        //    return list != null && list.Count > 0;
        //}

        /**
         * extract information about acronyms from NIH record, and add to a
         * simplenlg WordElement.
         *
         * <P>
         * Acronyms are represented as lists of word elements. Any acronym will have
         * a list of full form word elements, retrievable via
         * {@link LexicalFeature#ACRONYM_OF}
         *
         * @param wordElement
         * @param record
         */
        private void addAcronymInfo(WordElement wordElement, LexRecord record)
        {
            // NB: the acronyms are actually the full forms of which the word is an
            // acronym
            List <string> acronyms = record.GetAcronyms();

            if (acronyms.Count > 0)
            {
                // the list of full forms of which this word is an acronym
                List <NLGElement> acronymOf = wordElement.getFeatureAsElementList(LexicalFeature.ACRONYM_OF);

                // keep all acronym full forms and set them up as wordElements
                foreach (string fullForm in acronyms)
                {
                    if (fullForm.Contains("|"))
                    {
                        // get the acronym id
                        string acronymID = fullForm.SubstringSpecial(fullForm.IndexOf("|", StringComparison.Ordinal) + 1, fullForm.Length);
                        // create the full form element
                        WordElement fullFormWE = getWordByID(acronymID);

                        if (fullForm != null)
                        {
                            // add as full form of this acronym
                            acronymOf.Add(fullFormWE);

                            // List<NLGElement> fullFormAcronyms = fullFormWE
                            // .getFeatureAsElementList(LexicalFeature.ACRONYMS);
                            // fullFormAcronyms.add(wordElement);
                            // fullFormWE.setFeature(LexicalFeature.ACRONYMS,
                            // fullFormAcronyms);
                        }
                    }
                }

                // set all the full forms for this acronym
                wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronymOf);
            }

            // if (!acronyms.isEmpty()) {
            //
            // String acronym = acronyms.get(0);
            // // remove anything after a |, this will be an NIH ID
            // if (acronym.contains("|"))
            // acronym = acronym.substring(0, acronym.indexOf("|"));
            // wordElement.setFeature(LexicalFeature.ACRONYM_OF, acronym);
            // }
        }
Beispiel #30
0
        private IEnumerable <BaseElement> ProcessLine(string line)
        {
            var         songTokens = Regex.Matches(line, @"([\s/]*)([^\s/]*)");
            BaseElement element    = null;

            foreach (Match token in songTokens)
            {
                int index = 0;
                foreach (Group g in token.Groups)
                {
                    if (index != 0)
                    {
                        BaseElement previousElement = element;

                        if (!string.IsNullOrWhiteSpace(g.Value))
                        {
                            var rawChord = ParseExact(g.Value);

                            if (rawChord != null)
                            {
                                var chord = new Chord(rawChord);
                                element = new ChordElement(chord);
                            }
                            else
                            {
                                element = new WordElement(g.Value);
                            }
                        }
                        else
                        {
                            element = new WhitespaceElement(g.Value);
                        }

                        element.PreviousElement = previousElement;

                        if (previousElement != null)
                        {
                            previousElement.NextElement = element;
                        }

                        yield return(element);
                    }
                    index++;
                }
            }
        }
Beispiel #31
0
 private static bool NormalQuiz(Quiz q, out WordElement[] waitList)
 {
     int r = lastRand, max = 2 * Math.Max(DictService.WordsCount, DictService.MeansCount);
     float ratio = DictService.WordsCount * 1.0f / DictService.MeansCount;
     while (r == lastRand)
         r = rand.Next(max);
     lastRand = r;
     bool isWord = (r % 2 == 1);
     r /= 2;
     //get basic data
     if (isWord)
     {
         var word = DictService.WordAt(ratio < 1.0f ? (int)(r * ratio) : r);
         q.quest = word.Letters;
         waitList = DictService.GetMeansByWId(word.Id);
     }
     else
     {
         var mean = DictService.MeanAt(ratio > 1.0f ? (int)(r / ratio) : r);
         q.quest = mean.Meaning;
         waitList = DictService.GetWordsByMId(mean.Id);
     }
     return isWord;
 }
Beispiel #32
0
        private static bool AdaptQuiz(Quiz q, out WordElement[] waitList)
        {
            int r = lastRand;
            while (r == lastRand)
                r = rand.Next(DictService.WrongCount);
            var stat = DictService.EleAt(lastRand = r);
            q.quest = stat.str;

            bool isWord = true;
            waitList = DictService.GetMeansByWord(stat);
            if (waitList == null)
            {
                isWord = false;
                waitList = DictService.GetWordsByMean(stat);
            }
            return isWord;
        }