Beispiel #1
0
        /// <inheritdoc />
        public IReadOnlyList <Token> Process(ReadOnlySpan <char> text)
        {
            var processedTokens = new TokenStore();
            var tokenIndex      = 0;
            var tokenBuilder    = new StringBuilder();

            this.Process(text, ref tokenIndex, 0, processedTokens, tokenBuilder);

            return(processedTokens.ToList());
        }
Beispiel #2
0
        public IEnumerable <Token> Process(ReadOnlySpan <char> input)
        {
            var processedWords = new TokenStore();
            var wordIndex      = 0;
            var start          = 0;
            var wordBuilder    = new StringBuilder();

            Process(processedWords, ref wordIndex, ref start, 0, wordBuilder, input);

            return(processedWords.ToList());
        }
Beispiel #3
0
        public IEnumerable <Token> Process(ReadOnlySpan <char> input)
        {
            var processedWords = new TokenStore(); // TODO Pool?

            var wordIndex   = 0;
            var start       = 0;
            var wordBuilder = new StringBuilder();
            var hash        = new TokenHash();

            for (var i = 0; i < input.Length; i++)
            {
                var current = input[i];
                if (this.IsWordSplitCharacter(current))
                {
                    if (wordBuilder.Length > 0)
                    {
                        CaptureWord(processedWords, hash, wordIndex, start, i, wordBuilder);
                        wordIndex++;
                        wordBuilder.Length = 0;
                        hash = new TokenHash();
                    }

                    start = i + 1;
                }
                else
                {
                    foreach (var processed in this.inputPreprocessorPipeline.Process(current))
                    {
                        wordBuilder.Append(processed);
                        hash = hash.Combine(processed);
                    }
                }
            }

            if (wordBuilder.Length > 0)
            {
                CaptureWord(processedWords, hash, wordIndex, start, input.Length, wordBuilder);
            }

            return(processedWords.ToList());
        }
Beispiel #4
0
        public IEnumerable <Token> Process(IEnumerable <string> inputs)
        {
            if (inputs is null)
            {
                return(Enumerable.Empty <Token>());
            }

            var processedWords = new TokenStore();
            var wordIndex      = 0;
            var start          = 0;
            var wordBuilder    = new StringBuilder();
            var endOffset      = 0;

            foreach (var input in inputs)
            {
                Process(processedWords, ref wordIndex, ref start, endOffset, wordBuilder, input.AsSpan());
                endOffset += input.Length;
            }

            return(processedWords.ToList());
        }
Beispiel #5
0
        /// <inheritdoc />
        public IReadOnlyList <Token> Process(IEnumerable <DocumentTextFragment> document)
        {
            if (document is null)
            {
                return(Array.Empty <Token>());
            }

            var processedTokens = new TokenStore();
            var tokenIndex      = 0;
            var tokenBuilder    = new StringBuilder();

            foreach (var documentFragment in document)
            {
                this.Process(
                    documentFragment.Text.Span,
                    ref tokenIndex,
                    documentFragment.Offset,
                    processedTokens,
                    tokenBuilder);
            }

            return(processedTokens.ToList());
        }