Example #1
0
        public LexerResult Tokenize(string query)
        {
            if (string.IsNullOrWhiteSpace(query))
            {
                throw new ArgumentException();
            }

            var finalResult = new LexerResult();
            var reader      = new LexerStringReader(query, 0);

            while (reader.IsEndOfQuery() == false)
            {
                reader.MoveBy(SkipWhitespaces(reader));

                var tokenFound = false;

                foreach (var tokenizer in _languageConfig.TokenizersSource.OrderedTokenizers)
                {
                    var runtimeInfo = new LexerRuntimeInfo(reader);
                    var result      = tokenizer.Tokenize(ref runtimeInfo);
                    if (result.Success || result.IsPartiallySuccessful)
                    {
                        finalResult.Tokens.Add(result.Token);
                        reader.ForceMoveBy(result.Token.Length);
                        tokenFound = true;
                        break;
                    }
                }

                if (tokenFound == false)
                {
                    var startPosition = reader.CurrentPosition;
                    var word          = reader.ReadTillEndOfWord();
                    var unknownToken  = new UnknownToken(startPosition, word.Length);
                    finalResult.Tokens.Add(unknownToken);
                }
            }

            return(finalResult);
        }
Example #2
0
        private bool TryFindNode(ref LexerStringReader reader, out Node node, out int length)
        {
            var  initialPosition      = reader.CurrentPosition;
            var  currentNode          = _root;
            Node lastSuccessfulNode   = null;
            var  lastSuccessfulLength = 0;

            var endOfQuery = false;

            while (currentNode != null)
            {
                var progressed = false;

                var currentChar = reader.CurrentChar;

                // PERF NOTE: bisection could be implemented here, but since number of children should remain small
                // then overhead could be larger than actual gains
                foreach (var child in currentNode.Children)
                {
                    var isWhitespace = false;

                    if (char.IsWhiteSpace(child.Key) && char.IsWhiteSpace(currentChar))
                    {
                        isWhitespace = true;
                    }
                    else if (currentChar != child.Key &&
                             (_caseSensitive || char.ToLower(currentChar) != char.ToLower(child.Key)))
                    {
                        // TODO: PERF: use zero cost extension point trick to implement case sensivity flag (see Performance.md)
                        continue;
                    }

                    // check if not the end
                    if (reader.IsEndOfQuery())
                    {
                        endOfQuery = true;
                    }

                    // read characters
                    if (isWhitespace)
                    {
                        reader.ReadTillEndOfWhitespace();               // TODO: multi whitespace read opt in (perhaps use extension point trick?)
                    }
                    else
                    {
                        reader.MoveBy(1);
                    }

                    // assign results
                    currentNode = child;
                    if (currentNode.Value != null)
                    {
                        lastSuccessfulNode   = currentNode;
                        lastSuccessfulLength = reader.CurrentPosition - initialPosition;
                    }

                    // break if nothing more to be processed
                    if (endOfQuery)
                    {
                        break;
                    }

                    progressed = true;
                    break;
                }

                if (endOfQuery)
                {
                    break;
                }

                if (!progressed)
                {
                    break;
                }
            }

            if (lastSuccessfulNode != null)
            {
                length = lastSuccessfulLength;
                if (endOfQuery)
                {
                    length++;
                }

                node = lastSuccessfulNode;
                return(true);
            }

            node   = null;
            length = 0;
            return(false);
        }