// Match Functions /// <summary> /// Match input sentence with current element and output relavent parts, by nature of design elements only match as long as it can recognize and doesn't require input sentence to be exact length /// bOptional should be checked by caller /// </summary> /// <param name="content">string to be matched from begining</param> /// <param name="consumed">actual number of characters consumed during the match</param> /// <returns>An element instance if match found, otherwise null; Caller might also want to remove trailing space</returns> /// <Debug> By design MatchELement doesn't consider English spacing, so caller must be cautious about that since consumed doesn't count ending spaces</Debug> public PatternElementInstance MatchElement(string content, VocabularyManager vocabulary, out int consumed) { switch (Type) { case PatternElementType.SpecificWord: consumed = content.IndexOf(content.TrimStart()); if (content.TrimStart().IndexOf(Key.ToLower()) == 0) { consumed += Key.Length; return(new PatternElementInstance(Type, Key)); } break; case PatternElementType.VarietyWord: consumed = content.IndexOf(content.TrimStart()); if (vocabulary.IsPraseVaryingFormOrSynonymUndetermined(content.TrimStart(), Key, ref consumed) == true) { return(new PatternElementInstance(Type, content.Substring(0, consumed))); } break; case PatternElementType.WordAttribute: // We do not explicitly trim for this; Handling of beginning white spaces dealt with below if (Key == "any") { throw new Exception("Any should be handled outside"); } else { // Get attribtues to match; Attributes are guaranted to be valid at load time bool bInfinite = (Key.ElementAt(0) == '*'); string[] attributes = Key.Split(new char[] { '+', '*' }); WordAttribute attribute = 0; foreach (string a in attributes) { if (string.IsNullOrWhiteSpace(a)) { continue; } attribute |= (WordAttribute)Enum.Parse(typeof(WordAttribute), a); } consumed = 0; // The input must be recognziable so it's gonna be a phrase of some kind Phrase phrase = vocabulary.GetPhrase(content); // <Improvement> Could we be matching the shortest attribute? // <Warning> GetPhrase() trimmed, so phrase.Length might not equal actual consumed characters while (phrase != null) { // Try match against attributes if ((phrase.Attribute & attribute) == attribute || attribute == WordAttribute.any) { consumed += content.IndexOf(phrase.Key) + phrase.Key.Length; // Use content.IndexOf(phrase.Key) first to find where in the original string our phrase is is necessary for sometimes there might be some spaces in front of it phrase = vocabulary.GetPhrase(content.Substring(consumed)); // Continue with next phrase } else { phrase = null; } if (!bInfinite) { break; } } if (consumed != 0) { return(new PatternElementInstance(Type, content.Substring(0, consumed).Trim())); // Return that many elements as one single phrase (which by itself may not exist in the library) // <Development> This can be utilzied by action handlers for learning new expressions e.g. "big shinny red juicy" apple } } break; case PatternElementType.SubPattern: consumed = 0; PatternInstance subPatternInstance = SubPattern.Match(content, vocabulary, ref consumed, false); if (subPatternInstance != null) { return(new PatternElementInstance(Type, subPatternInstance)); } break; case PatternElementType.Choice: // Emitting a successful choice at the first matching // <Improvement> A more accurate way would be to match all options and use the longest match, e.g. Courtesy Interrupt PatternElementInstance ChoiceInstance = null; foreach (PatternElement choiceElement in Choices) { ChoiceInstance = choiceElement.MatchElement(content, vocabulary, out consumed); if (ChoiceInstance != null) { return(new PatternElementInstance(Type, ChoiceInstance.ElementValue)); } } // Valid if we have at least one and only one choice break; case PatternElementType.Tag: consumed = content.IndexOf(content.TrimStart()); string tagValue = MatchTag(Key, content.TrimStart(), vocabulary); if (tagValue != null) { consumed += tagValue.Length; return(new PatternElementInstance(Type, tagValue)); } break; case PatternElementType.CategoryInclude: { consumed = content.IndexOf(content.TrimStart()); string match = MatchCategory(Key, true, content.TrimStart(), vocabulary); if (match != null) { consumed += match.Length; return(new PatternElementInstance(Type, match)); } } break; case PatternElementType.CategoryExclude: { consumed = content.IndexOf(content.TrimStart()); string match = MatchCategory(Key, false, content.TrimStart(), vocabulary); if (match != null) { consumed += match.Length; return(new PatternElementInstance(Type, match)); } } break; case PatternElementType.Punctuation: if (content.IndexOf(Key) == 0) { consumed = Key.Length; return(new PatternElementInstance(Type, Key)); } break; case PatternElementType.UnknownPhrase: // Try extract unknown from known string unknownString = vocabulary.GetUnknownPhrase(content); if (unknownString != null) // Commit only if we find no match { consumed = unknownString.Length; return(new PatternElementInstance(Type, unknownString)); } break; default: break; } consumed = 0; return(null); }
public PatternElementInstance(PatternElementType type, PatternInstance subPattern) { // Assert ElementType == PatternElementType.SubPattern ElementType = type; SubPattern = subPattern; }
// Interaction Interface: Return whether or not a given sentence matches given pattern: If a match is found a PatternInstance will be returned other wise null // Notice that patterns provide EXACT matches, this gives designers flexibility in defining general or specific matches per need/context; For subpatterns the length cannot be decided though so an option if given // @sentence: input sentence doesn't need to be well formed: auxiliary blanks, English or chinese, punctuations or not -- those are handled within pattern and pattern elements themselves // <Improvement Note> The impelmentation of this function should be language neutral and doesn't assume meaningful symbols are seperated by spaces public PatternInstance Match(string content, VocabularyManager vocabulary, ref int processedLocation, bool bExact = true) { processedLocation = content.IndexOf(content.Trim()); // Basic Format procesing content = content.Trim().ToLower(); // Match against elements PatternInstance instance = new PatternInstance(this); int currentLocation = 0; for (int i = 0; i < Elements.Count; i++) { // Get a reference PatternElement element = Elements[i]; // Input string boundary check if (content.Length <= currentLocation) { if (element.bOptional == false) { return(null); } else { continue; } } // Skip spaces while (content[currentLocation] == ' ' && content.Length <= currentLocation) { currentLocation++; } // Continue checking int consumed = 0; // Specially handle <any> attribute PatternElementInstance eInstance = null; if (element.Key == "any") { // Just match next element from character (be it a word or a phrase) until we find a match or not, in former case the words skipped is the <any>; in later case the pattern simply doesn't match if (i + 1 < Elements.Count) { PatternElement nextElement = Elements[i + 1]; int anySize = 0; PatternElementInstance tempEInstance = null; while ((tempEInstance = element.MatchElement(content.Substring(currentLocation + anySize), vocabulary, out consumed)) == null) { anySize++; } if (tempEInstance == null) { return(null); } else { eInstance = new PatternElementInstance(PatternElementType.WordAttribute, content.Substring(currentLocation, anySize)); consumed = anySize; } } else { eInstance = new PatternElementInstance(PatternElementType.WordAttribute, content.Substring(currentLocation)); consumed = content.Length - currentLocation; } //// Match any untill the next element //string any = vocabulary.GetAnyPhrase(content); //if (unknownString != null) // Commit only if we find no match //{ // consumed = unknownString.Length; // return new PatternElementInstance(Type, unknownString); //} } else { eInstance = element.MatchElement(content.Substring(currentLocation), vocabulary, out consumed); // Notice consumed is language neutral, i.e. in English it contains spaces skipped, while in other languages it's only element length } if (eInstance != null) { instance.ComponentElements.Add(eInstance); currentLocation += consumed; } else if (element.bOptional != true) { return(null); } } if (currentLocation != content.Length) { instance.bPartialMatch = true; } if (bExact && instance.bPartialMatch == true) { return(null); } processedLocation += currentLocation; return(instance); }