public static IEnumerable <IList <Token> > Parse(Parser parser) { var result = new List <IList <Token> >(); var sentence = new List <Token>(); var tokens = parser.GetTokens(); for (int i = 0; i < tokens.Count; i++) { var item = tokens[i]; var token = _classifier.Classify(item); token.Index = i; if (string.IsNullOrEmpty(token.Content) == false) { sentence.Add(token); } if (TokenNormalizer.EndsWithPunctuation(item)) { result.Add(sentence); sentence = new List <Token>(); } } if (sentence.IsEmpty() == false) { result.Add(sentence); } return(result); }
internal Token Classify(string token) { var normalizedLemma = TokenNormalizer.NormalizeLemmaFull(token); if (_keywords.Contains(normalizedLemma)) { return(new Token { Content = token, Kind = TokenKind.Keyword }); } if (_words.ContainsKey(normalizedLemma)) { return(new Token { Content = token, Kind = _words[normalizedLemma] }); } return(new Token { Content = TokenNormalizer.RemovePunctuation(token), Kind = TokenKind.None }); }
public QueryDefinition Parse(IList <Token> tokens) { var result = new QueryDefinition(); var lemma = tokens[0]; var startIndex = 1; bool found = false; // Note: Adjective comes first! if (tokens.Count > 1 && tokens[0].Kind == TokenKind.None) { //startIndex++; int index = 0; var attributeDefinition = FindNextAttributeDefinition(tokens, ref index); if (attributeDefinition != null) { result.Properties.Add(attributeDefinition); lemma = tokens[index + 1]; startIndex = Math.Max(attributeDefinition.Key.Max(x => x.Index), attributeDefinition.Value.Max(x => x.Index)) + 2; found = true; } if (found == false) { if (_properties.ContainsKey(tokens[0].Content.ToLowerInvariant())) { result.Properties.Add(new AttributeDefinition(MultipartToken.FromToken(new Token { Kind = TokenKind.None, Content = _properties[tokens[0].Content.ToLowerInvariant()] }), MultipartToken.FromToken(new Token { Kind = TokenKind.None, Content = tokens[0].Content.ToLowerInvariant() }) )); } else { // Todo: Determine if it's a bool property or if it's non-bool property! result.Properties.Add(new AttributeDefinition(MultipartToken.FromToken(new Token { Kind = TokenKind.None, Content = tokens[0].Content }))); } } } // Note: Regular Parsing -> Propery - Value Pair! if (lemma.Kind == TokenKind.Keyword) { // Note: Now that we have a keyword find the next two tokens! If we can! var index = startIndex; var propertyValuePair = FindNextAttributeDefinition(tokens, ref index); result.Target = TokenNormalizer.CaseNormalizeLemma(lemma.Content).ToLowerInvariant(); while (propertyValuePair != null) { result.Properties.Add(propertyValuePair); // Todo: We should be counting, instead of always adding one!! index += 1; propertyValuePair = FindNextAttributeDefinition(tokens, ref index); } } return(result); }