示例#1
0
        private string GenerateStringLine(CharIndex index)
        {
            var stringBuilder = new StringBuilder();

            var first  = index.GetFirst();
            var offset = index.GetOffset();
            var i      = first + offset;
            var k      = first;

            while (i != _input.Length && _input[i] != '\r' && _input[i] != '\n')
            {
                stringBuilder.Append(_input[i]);
                i++;
            }

            if (offset != 0)
            {
                stringBuilder.Append(' ');
            }

            while (k < first + offset - 1)
            {
                stringBuilder.Append(_input[k]);
                k++;
            }
            return(stringBuilder.ToString());
        }
示例#2
0
        private string GenerateStringLineWithUrl(CharIndex index)
        {
            var stringBuilder = new StringBuilder();

            var first  = index.GetFirst();
            var offset = index.GetOffset();
            var i      = first + offset;
            var k      = first;

            while (i != _input.Length && _input[i] != '\r' && _input[i] != '\n')
            {
                stringBuilder.Append(_input[i]);
                i++;
            }

            if (offset != 0)
            {
                stringBuilder.Append(' ');
            }

            while (k < first + offset - 1)
            {
                stringBuilder.Append(_input[k]);
                k++;
            }

            var urlBuilder = new StringBuilder();

            urlBuilder.Append(' ');

            var matchingUrlIndex = _urlIndices[index.GetLineNumber()];

            for (int j = matchingUrlIndex; j < _urls.Length; j++)
            {
                if (_urls[j] == '\n')
                {
                    break;
                }

                urlBuilder.Append(_urls[j]);
            }

            stringBuilder.Append(urlBuilder);

            return(stringBuilder.ToString());
        }
示例#3
0
        private bool StartsWithNoiseWord(CharIndex index, char[] input)
        {
            var startingIndex = index.GetOffset() + index.GetFirst();
            var firstChar     = input[startingIndex];

            if (firstChar == '\t' || firstChar == ' ' || firstChar == '\n')
            {
                return(true);
            }
            firstChar = char.ToLowerInvariant(firstChar);

            // noise words only start with these characters
            if (firstChar == 'a' || firstChar == 'i' || firstChar == 'o' || firstChar == 't' || firstChar == 'b')
            {
                var stringBuilder = new StringBuilder();
                int i             = startingIndex;
                while (i < input.Length && !(input[i] == '\r' || input[i] == ' ' || input[i] == '\t'))
                {
                    stringBuilder.Append(char.ToLower(input[i]));
                    i++;

                    // too long to be a noise word
                    if (stringBuilder.Length >= 4)
                    {
                        return(false);
                    }
                }
                var possibleNoiseWord = stringBuilder.ToString();

                foreach (var noiseWord in _noiseWords)
                {
                    if (possibleNoiseWord == noiseWord)
                    {
                        return(true);
                    }
                }
            }

            return(false);
        }