public IEnumerable <TextSubString> ParseSentenceWordsForSpellCheck(ITextSubString textData) { foreach (var sentenceChunk in ParseSentenceChunks(textData)) { var word = sentenceChunk.SubText; if (StringUtil.IsWrappedInQuotes(word)) { continue; // If something is wrapped in quotes we should probably leave it alone. } // Trim the special characters such as periods and parenthesis from the ends of the word as we can safely ignore them, usually. var wordChunk = TrimWordEndCharacters(sentenceChunk); if (!ReferenceEquals(wordChunk, sentenceChunk)) { word = wordChunk.SubText; // if the word was updated we need to change the string variable to match } // Check to make sure that the word is large enough to bother checking. // Check to make sure that the word is composed of valid characters so we don't spell check code or URIs. if (wordChunk.Length < _minWordSize || !WordHasAllValidChars(word)) { continue; } yield return(wordChunk); } }
/// <summary> /// Locates text sections between, before, and after XML tags. /// </summary> /// <param name="textData">The text to parse.</param> /// <returns>The text portions between tags.</returns> public IEnumerable <TextSubString> ParseXmlTextParts(ITextSubString textData) { var textDataOffset = textData.Offset; int localTextStartIndex = 0; foreach (Match xmlNodeMatch in XmlNodeSplitRegex.Matches(textData.SubText)) { // The first free text part is from location 0 to the start of the first found tag. // After that, except for the last part the free text part is located between the end // of the previous tag and the start of the current tag. yield return(new TextSubString( textData.Source, localTextStartIndex + textDataOffset, xmlNodeMatch.Index - localTextStartIndex )); localTextStartIndex = xmlNodeMatch.Index + xmlNodeMatch.Length; } // The last free text part is from the end of the last tag to the end of the text string. // If there are no tags found the entire text string is used. yield return(new TextSubString( textData.Source, localTextStartIndex + textDataOffset, textData.Length - localTextStartIndex )); }
public IEnumerable <TextSubString> ParseSentenceChunks(ITextSubString textData) { return(StringLiteralWordParser.Matches(textData.SubText) .Cast <Match>() .Select(x => new TextSubString( textData.Source, textData.Offset + x.Index, x.Length ))); }