public virtual int IncrementCounter(int i, NodeReader reader, int state, Tag tag)
        {
            string nextLine = null;

            if ((state == TAG_BEGIN_PARSING_STATE || state == TAG_IGNORE_DATA_STATE ||
                 state == TAG_IGNORE_BEGIN_TAG_STATE) && i == tag.TagLine.Length - 1)
            {
                // The while loop below is a bug fix contributed by
                // Annette Doyle - see testcase HTMLImageScannerTest.testImageTagOnMultipleLines()
                // Further modified by Somik Raha, to remove bug - HTMLTagTest.testBrokenTag
                int numLinesAdvanced = 0;
                do
                {
                    nextLine = reader.GetNextLine();
                    numLinesAdvanced++;
                } while (nextLine != null && nextLine.Length == 0);
                if (nextLine == null)
                {
                    // This means we have a broken tag. Fill in an end tag symbol here.
                    nextLine = ">";
                }
                else
                {
                    // This means this is just a new line, hence add the new line character
                    tag.Append(Parser.LineSeparator);
                }

                // Ensure blank lines are included in tag's 'tagLines'
                while (--numLinesAdvanced > 0)
                {
                    tag.TagLine = "";
                }

                // We need to continue parsing to the next line
                tag.TagLine = nextLine;
                i           = -1;
            }
            return(++i);
        }
Beispiel #2
0
        /// <summary> Locate the StringNode within the input string, by parsing from the given position
        /// </summary>
        /// <param name="reader">HTML reader to be provided so as to allow reading of next line
        /// </param>
        /// <param name="input">Input String
        /// </param>
        /// <param name="position">Position to start parsing from
        /// </param>
        /// <param name="balance_quotes">If <code>true</code> enter ignoring state on
        /// encountering quotes.
        ///
        /// </param>
        public virtual Node Find(NodeReader reader, string input, int position, bool balance_quotes)
        {
            StringBuilder textBuffer = new StringBuilder();
            int           state      = BEFORE_PARSE_BEGINS_STATE;
            int           textBegin  = position;
            int           textEnd    = position;
            int           inputLen   = input.Length;
            char          ch;
            char          ignore_ender = '\"';

            for (int i = position; (i < inputLen && state != PARSE_COMPLETED_STATE); i++)
            {
                ch = input[i];
                if (ch == '<' && state != PARSE_IGNORE_STATE)
                {
                    if (BeginTag(input, i))
                    {
                        state   = PARSE_COMPLETED_STATE;
                        textEnd = i - 1;
                    }
                }
                if (balance_quotes && (ch == '\'' || ch == '"'))
                {
                    if (state == PARSE_IGNORE_STATE)
                    {
                        if (ch == ignore_ender)
                        {
                            state = PARSE_HAS_BEGUN_STATE;
                        }
                    }
                    else
                    {
                        ignore_ender = ch;
                        state        = PARSE_IGNORE_STATE;
                    }
                }
                if (state == BEFORE_PARSE_BEGINS_STATE)
                {
                    state = PARSE_HAS_BEGUN_STATE;
                }
                if (state == PARSE_HAS_BEGUN_STATE || state == PARSE_IGNORE_STATE)
                {
                    textBuffer.Append(input[i]);
                }
                // Patch by Cedric Rosa
                if (state == BEFORE_PARSE_BEGINS_STATE && i == inputLen - 1)
                {
                    state = PARSE_HAS_BEGUN_STATE;
                }
                if (state == PARSE_HAS_BEGUN_STATE && i == inputLen - 1)
                {
                    do
                    {
                        input = reader.GetNextLine();
                        if (input != null && input.Length == 0)
                        {
                            textBuffer.Append(Parser.LineSeparator);
                        }
                    } while (input != null && input.Length == 0);

                    if (input == null)
                    {
                        textEnd = i;
                        state   = PARSE_COMPLETED_STATE;
                    }
                    else
                    {
                        textBuffer.Append(Parser.LineSeparator);
                        inputLen = input.Length;
                        i        = -1;
                    }
                }
            }
            return(StringNode.CreateStringNode(textBuffer, textBegin, textEnd, reader.Parser.ShouldDecodeNodes));
        }