public virtual int IncrementCounter(int i, NodeReader reader, int state, Tag tag) { string nextLine = null; if ((state == TAG_BEGIN_PARSING_STATE || state == TAG_IGNORE_DATA_STATE || state == TAG_IGNORE_BEGIN_TAG_STATE) && i == tag.TagLine.Length - 1) { // The while loop below is a bug fix contributed by // Annette Doyle - see testcase HTMLImageScannerTest.testImageTagOnMultipleLines() // Further modified by Somik Raha, to remove bug - HTMLTagTest.testBrokenTag int numLinesAdvanced = 0; do { nextLine = reader.GetNextLine(); numLinesAdvanced++; } while (nextLine != null && nextLine.Length == 0); if (nextLine == null) { // This means we have a broken tag. Fill in an end tag symbol here. nextLine = ">"; } else { // This means this is just a new line, hence add the new line character tag.Append(Parser.LineSeparator); } // Ensure blank lines are included in tag's 'tagLines' while (--numLinesAdvanced > 0) { tag.TagLine = ""; } // We need to continue parsing to the next line tag.TagLine = nextLine; i = -1; } return(++i); }
/// <summary> Locate the StringNode within the input string, by parsing from the given position /// </summary> /// <param name="reader">HTML reader to be provided so as to allow reading of next line /// </param> /// <param name="input">Input String /// </param> /// <param name="position">Position to start parsing from /// </param> /// <param name="balance_quotes">If <code>true</code> enter ignoring state on /// encountering quotes. /// /// </param> public virtual Node Find(NodeReader reader, string input, int position, bool balance_quotes) { StringBuilder textBuffer = new StringBuilder(); int state = BEFORE_PARSE_BEGINS_STATE; int textBegin = position; int textEnd = position; int inputLen = input.Length; char ch; char ignore_ender = '\"'; for (int i = position; (i < inputLen && state != PARSE_COMPLETED_STATE); i++) { ch = input[i]; if (ch == '<' && state != PARSE_IGNORE_STATE) { if (BeginTag(input, i)) { state = PARSE_COMPLETED_STATE; textEnd = i - 1; } } if (balance_quotes && (ch == '\'' || ch == '"')) { if (state == PARSE_IGNORE_STATE) { if (ch == ignore_ender) { state = PARSE_HAS_BEGUN_STATE; } } else { ignore_ender = ch; state = PARSE_IGNORE_STATE; } } if (state == BEFORE_PARSE_BEGINS_STATE) { state = PARSE_HAS_BEGUN_STATE; } if (state == PARSE_HAS_BEGUN_STATE || state == PARSE_IGNORE_STATE) { textBuffer.Append(input[i]); } // Patch by Cedric Rosa if (state == BEFORE_PARSE_BEGINS_STATE && i == inputLen - 1) { state = PARSE_HAS_BEGUN_STATE; } if (state == PARSE_HAS_BEGUN_STATE && i == inputLen - 1) { do { input = reader.GetNextLine(); if (input != null && input.Length == 0) { textBuffer.Append(Parser.LineSeparator); } } while (input != null && input.Length == 0); if (input == null) { textEnd = i; state = PARSE_COMPLETED_STATE; } else { textBuffer.Append(Parser.LineSeparator); inputLen = input.Length; i = -1; } } } return(StringNode.CreateStringNode(textBuffer, textBegin, textEnd, reader.Parser.ShouldDecodeNodes)); }