private string GenerateStringLine(CharIndex index) { var stringBuilder = new StringBuilder(); var first = index.GetFirst(); var offset = index.GetOffset(); var i = first + offset; var k = first; while (i != _input.Length && _input[i] != '\r' && _input[i] != '\n') { stringBuilder.Append(_input[i]); i++; } if (offset != 0) { stringBuilder.Append(' '); } while (k < first + offset - 1) { stringBuilder.Append(_input[k]); k++; } return(stringBuilder.ToString()); }
private string GenerateStringLineWithUrl(CharIndex index) { var stringBuilder = new StringBuilder(); var first = index.GetFirst(); var offset = index.GetOffset(); var i = first + offset; var k = first; while (i != _input.Length && _input[i] != '\r' && _input[i] != '\n') { stringBuilder.Append(_input[i]); i++; } if (offset != 0) { stringBuilder.Append(' '); } while (k < first + offset - 1) { stringBuilder.Append(_input[k]); k++; } var urlBuilder = new StringBuilder(); urlBuilder.Append(' '); var matchingUrlIndex = _urlIndices[index.GetLineNumber()]; for (int j = matchingUrlIndex; j < _urls.Length; j++) { if (_urls[j] == '\n') { break; } urlBuilder.Append(_urls[j]); } stringBuilder.Append(urlBuilder); return(stringBuilder.ToString()); }
private bool StartsWithNoiseWord(CharIndex index, char[] input) { var startingIndex = index.GetOffset() + index.GetFirst(); var firstChar = input[startingIndex]; if (firstChar == '\t' || firstChar == ' ' || firstChar == '\n') { return(true); } firstChar = char.ToLowerInvariant(firstChar); // noise words only start with these characters if (firstChar == 'a' || firstChar == 'i' || firstChar == 'o' || firstChar == 't' || firstChar == 'b') { var stringBuilder = new StringBuilder(); int i = startingIndex; while (i < input.Length && !(input[i] == '\r' || input[i] == ' ' || input[i] == '\t')) { stringBuilder.Append(char.ToLower(input[i])); i++; // too long to be a noise word if (stringBuilder.Length >= 4) { return(false); } } var possibleNoiseWord = stringBuilder.ToString(); foreach (var noiseWord in _noiseWords) { if (possibleNoiseWord == noiseWord) { return(true); } } } return(false); }