Beispiel #1
0
        // Match Functions
        /// <summary>
        /// Match input sentence with current element and output relavent parts, by nature of design elements only match as long as it can recognize and doesn't require input sentence to be exact length
        /// bOptional should be checked by caller
        /// </summary>
        /// <param name="content">string to be matched from begining</param>
        /// <param name="consumed">actual number of characters consumed during the match</param>
        /// <returns>An element instance if match found, otherwise null; Caller might also want to remove trailing space</returns>
        /// <Debug> By design MatchELement doesn't consider English spacing, so caller must be cautious about that since consumed doesn't count ending spaces</Debug>
        public PatternElementInstance MatchElement(string content, VocabularyManager vocabulary, out int consumed)
        {
            switch (Type)
            {
            case PatternElementType.SpecificWord:
                consumed = content.IndexOf(content.TrimStart());
                if (content.TrimStart().IndexOf(Key.ToLower()) == 0)
                {
                    consumed += Key.Length;
                    return(new PatternElementInstance(Type, Key));
                }
                break;

            case PatternElementType.VarietyWord:
                consumed = content.IndexOf(content.TrimStart());
                if (vocabulary.IsPraseVaryingFormOrSynonymUndetermined(content.TrimStart(), Key, ref consumed) == true)
                {
                    return(new PatternElementInstance(Type, content.Substring(0, consumed)));
                }
                break;

            case PatternElementType.WordAttribute:
                // We do not explicitly trim for this; Handling of beginning white spaces dealt with below
                if (Key == "any")
                {
                    throw new Exception("Any should be handled outside");
                }
                else
                {
                    // Get attribtues to match; Attributes are guaranted to be valid at load time
                    bool          bInfinite  = (Key.ElementAt(0) == '*');
                    string[]      attributes = Key.Split(new char[] { '+', '*' });
                    WordAttribute attribute  = 0;
                    foreach (string a in attributes)
                    {
                        if (string.IsNullOrWhiteSpace(a))
                        {
                            continue;
                        }
                        attribute |= (WordAttribute)Enum.Parse(typeof(WordAttribute), a);
                    }
                    consumed = 0;
                    // The input must be recognziable so it's gonna be a phrase of some kind
                    Phrase phrase = vocabulary.GetPhrase(content);     // <Improvement> Could we be matching the shortest attribute? // <Warning> GetPhrase() trimmed, so phrase.Length might not equal actual consumed characters
                    while (phrase != null)
                    {
                        // Try match against attributes
                        if ((phrase.Attribute & attribute) == attribute || attribute == WordAttribute.any)
                        {
                            consumed += content.IndexOf(phrase.Key) + phrase.Key.Length;    // Use content.IndexOf(phrase.Key) first to find where in the original string our phrase is is necessary for sometimes there might be some spaces in front of it
                            phrase    = vocabulary.GetPhrase(content.Substring(consumed));  // Continue with next phrase
                        }
                        else
                        {
                            phrase = null;
                        }
                        if (!bInfinite)
                        {
                            break;
                        }
                    }
                    if (consumed != 0)
                    {
                        return(new PatternElementInstance(Type, content.Substring(0, consumed).Trim()));       // Return that many elements as one single phrase (which by itself may not exist in the library)
                        // <Development> This can be utilzied by action handlers for learning new expressions e.g. "big shinny red juicy" apple
                    }
                }
                break;

            case PatternElementType.SubPattern:
                consumed = 0;
                PatternInstance subPatternInstance = SubPattern.Match(content, vocabulary, ref consumed, false);
                if (subPatternInstance != null)
                {
                    return(new PatternElementInstance(Type, subPatternInstance));
                }
                break;

            case PatternElementType.Choice:
                // Emitting a successful choice at the first matching // <Improvement> A more accurate way would be to match all options and use the longest match, e.g. Courtesy Interrupt
                PatternElementInstance ChoiceInstance = null;
                foreach (PatternElement choiceElement in Choices)
                {
                    ChoiceInstance = choiceElement.MatchElement(content, vocabulary, out consumed);
                    if (ChoiceInstance != null)
                    {
                        return(new PatternElementInstance(Type, ChoiceInstance.ElementValue));
                    }
                }
                // Valid if we have at least one and only one choice
                break;

            case PatternElementType.Tag:
                consumed = content.IndexOf(content.TrimStart());
                string tagValue = MatchTag(Key, content.TrimStart(), vocabulary);
                if (tagValue != null)
                {
                    consumed += tagValue.Length;
                    return(new PatternElementInstance(Type, tagValue));
                }
                break;

            case PatternElementType.CategoryInclude:
            {
                consumed = content.IndexOf(content.TrimStart());
                string match = MatchCategory(Key, true, content.TrimStart(), vocabulary);
                if (match != null)
                {
                    consumed += match.Length;
                    return(new PatternElementInstance(Type, match));
                }
            }
            break;

            case PatternElementType.CategoryExclude:
            {
                consumed = content.IndexOf(content.TrimStart());
                string match = MatchCategory(Key, false, content.TrimStart(), vocabulary);
                if (match != null)
                {
                    consumed += match.Length;
                    return(new PatternElementInstance(Type, match));
                }
            }
            break;

            case PatternElementType.Punctuation:
                if (content.IndexOf(Key) == 0)
                {
                    consumed = Key.Length;
                    return(new PatternElementInstance(Type, Key));
                }
                break;

            case PatternElementType.UnknownPhrase:
                // Try extract unknown from known
                string unknownString = vocabulary.GetUnknownPhrase(content);
                if (unknownString != null)      // Commit only if we find no match
                {
                    consumed = unknownString.Length;
                    return(new PatternElementInstance(Type, unknownString));
                }
                break;

            default:
                break;
            }
            consumed = 0;
            return(null);
        }
Beispiel #2
0
        // Interaction Interface: Return whether or not a given sentence matches given pattern: If a match is found a PatternInstance will be returned other wise null
        // Notice that patterns provide EXACT matches, this gives designers flexibility in defining general or specific matches per need/context; For subpatterns the length cannot be decided though so an option if given
        // @sentence: input sentence doesn't need to be well formed: auxiliary blanks, English or chinese, punctuations or not -- those are handled within pattern and pattern elements themselves
        // <Improvement Note> The impelmentation of this function should be language neutral and doesn't assume meaningful symbols are seperated by spaces
        public PatternInstance Match(string content, VocabularyManager vocabulary, ref int processedLocation, bool bExact = true)
        {
            processedLocation = content.IndexOf(content.Trim());
            // Basic Format procesing
            content = content.Trim().ToLower();

            // Match against elements
            PatternInstance instance        = new PatternInstance(this);
            int             currentLocation = 0;

            for (int i = 0; i < Elements.Count; i++)
            {
                // Get a reference
                PatternElement element = Elements[i];

                // Input string boundary check
                if (content.Length <= currentLocation)
                {
                    if (element.bOptional == false)
                    {
                        return(null);
                    }
                    else
                    {
                        continue;
                    }
                }

                // Skip spaces
                while (content[currentLocation] == ' ' && content.Length <= currentLocation)
                {
                    currentLocation++;
                }

                // Continue checking
                int consumed = 0;
                // Specially handle <any> attribute
                PatternElementInstance eInstance = null;
                if (element.Key == "any")
                {
                    // Just match next element from character (be it a word or a phrase) until we find a match or not, in former case the words skipped is the <any>; in later case the pattern simply doesn't match
                    if (i + 1 < Elements.Count)
                    {
                        PatternElement         nextElement   = Elements[i + 1];
                        int                    anySize       = 0;
                        PatternElementInstance tempEInstance = null;
                        while ((tempEInstance = element.MatchElement(content.Substring(currentLocation + anySize), vocabulary, out consumed)) == null)
                        {
                            anySize++;
                        }
                        if (tempEInstance == null)
                        {
                            return(null);
                        }
                        else
                        {
                            eInstance = new PatternElementInstance(PatternElementType.WordAttribute, content.Substring(currentLocation, anySize));
                            consumed  = anySize;
                        }
                    }
                    else
                    {
                        eInstance = new PatternElementInstance(PatternElementType.WordAttribute, content.Substring(currentLocation));
                        consumed  = content.Length - currentLocation;
                    }

                    //// Match any untill the next element
                    //string any = vocabulary.GetAnyPhrase(content);
                    //if (unknownString != null)   // Commit only if we find no match
                    //{
                    //    consumed = unknownString.Length;
                    //    return new PatternElementInstance(Type, unknownString);
                    //}
                }
                else
                {
                    eInstance = element.MatchElement(content.Substring(currentLocation), vocabulary, out consumed);  // Notice consumed is language neutral, i.e. in English it contains spaces skipped, while in other languages it's only element length
                }
                if (eInstance != null)
                {
                    instance.ComponentElements.Add(eInstance);
                    currentLocation += consumed;
                }
                else if (element.bOptional != true)
                {
                    return(null);
                }
            }
            if (currentLocation != content.Length)
            {
                instance.bPartialMatch = true;
            }
            if (bExact && instance.bPartialMatch == true)
            {
                return(null);
            }
            processedLocation += currentLocation;
            return(instance);
        }