Esempio n. 1
0
        public static IEnumerable <IList <Token> > Parse(Parser parser)
        {
            var result   = new List <IList <Token> >();
            var sentence = new List <Token>();
            var tokens   = parser.GetTokens();

            for (int i = 0; i < tokens.Count; i++)
            {
                var item  = tokens[i];
                var token = _classifier.Classify(item);

                token.Index = i;

                if (string.IsNullOrEmpty(token.Content) == false)
                {
                    sentence.Add(token);
                }

                if (TokenNormalizer.EndsWithPunctuation(item))
                {
                    result.Add(sentence);
                    sentence = new List <Token>();
                }
            }

            if (sentence.IsEmpty() == false)
            {
                result.Add(sentence);
            }

            return(result);
        }
Esempio n. 2
0
        internal Token Classify(string token)
        {
            var normalizedLemma = TokenNormalizer.NormalizeLemmaFull(token);

            if (_keywords.Contains(normalizedLemma))
            {
                return(new Token
                {
                    Content = token,
                    Kind = TokenKind.Keyword
                });
            }

            if (_words.ContainsKey(normalizedLemma))
            {
                return(new Token
                {
                    Content = token,
                    Kind = _words[normalizedLemma]
                });
            }

            return(new Token
            {
                Content = TokenNormalizer.RemovePunctuation(token),
                Kind = TokenKind.None
            });
        }
Esempio n. 3
0
        public QueryDefinition Parse(IList <Token> tokens)
        {
            var result     = new QueryDefinition();
            var lemma      = tokens[0];
            var startIndex = 1;

            bool found = false;

            // Note: Adjective comes first!
            if (tokens.Count > 1 && tokens[0].Kind == TokenKind.None)
            {
                //startIndex++;

                int index = 0;
                var attributeDefinition = FindNextAttributeDefinition(tokens, ref index);

                if (attributeDefinition != null)
                {
                    result.Properties.Add(attributeDefinition);
                    lemma      = tokens[index + 1];
                    startIndex = Math.Max(attributeDefinition.Key.Max(x => x.Index), attributeDefinition.Value.Max(x => x.Index)) + 2;
                    found      = true;
                }

                if (found == false)
                {
                    if (_properties.ContainsKey(tokens[0].Content.ToLowerInvariant()))
                    {
                        result.Properties.Add(new AttributeDefinition(MultipartToken.FromToken(new Token
                        {
                            Kind    = TokenKind.None,
                            Content = _properties[tokens[0].Content.ToLowerInvariant()]
                        }),
                                                                      MultipartToken.FromToken(new Token
                        {
                            Kind    = TokenKind.None,
                            Content = tokens[0].Content.ToLowerInvariant()
                        })
                                                                      ));
                    }
                    else
                    {
                        // Todo: Determine if it's a bool property or if it's non-bool property!
                        result.Properties.Add(new AttributeDefinition(MultipartToken.FromToken(new Token
                        {
                            Kind    = TokenKind.None,
                            Content = tokens[0].Content
                        })));
                    }
                }
            }

            // Note: Regular Parsing -> Propery - Value Pair!
            if (lemma.Kind == TokenKind.Keyword)
            {
                // Note: Now that we have a keyword find the next two tokens! If we can!
                var index             = startIndex;
                var propertyValuePair = FindNextAttributeDefinition(tokens, ref index);

                result.Target = TokenNormalizer.CaseNormalizeLemma(lemma.Content).ToLowerInvariant();

                while (propertyValuePair != null)
                {
                    result.Properties.Add(propertyValuePair);
                    // Todo: We should be counting, instead of always adding one!!
                    index            += 1;
                    propertyValuePair = FindNextAttributeDefinition(tokens, ref index);
                }
            }

            return(result);
        }