public void StartElement(HtmlNode node)
        {
            labelStacks.AddItem(null);
            TagAction ta = tagActions.Get(node.Name);

            if (ta != null)
            {
                if (ta.ChangesTagLevel())
                {
                    tagLevel++;
                }
                flush = ta.Start(this, node.Name, node.Attributes) | flush;
            }
            else
            {
                tagLevel++;
                flush = true;
            }
            lastEvent    = NBoilerpipeContentHandler.Event.START_TAG;
            lastStartTag = node.Name;
        }
        public void EndElement(HtmlNode node)
        {
            TagAction ta = tagActions.Get(node.Name);

            if (ta != null)
            {
                flush = ta.End(this, node.Name) | flush;
            }
            else
            {
                flush = true;
            }
            if (ta == null || ta.ChangesTagLevel())
            {
                tagLevel--;
            }
            if (flush)
            {
                FlushBlock();
            }
            lastEvent  = NBoilerpipeContentHandler.Event.END_TAG;
            lastEndTag = node.Name;
            labelStacks.RemoveLast();
        }
        public void HandleText(HtmlTextNode node)
        {
            if (IsTag(node.Text))
            {
                node.Text = "";
            }

            char[] ch     = HttpUtility.HtmlDecode(node.Text).ToCharArray();
            int    start  = 0;
            int    length = ch.Length;

            textElementIdx++;

            if (flush)
            {
                FlushBlock();
                flush = false;
            }
            if (inIgnorableElement != 0)
            {
                return;
            }

            char c;
            bool startWhitespace = false;
            bool endWhitespace   = false;

            if (length == 0)
            {
                return;
            }
            int end = start + length;

            for (int i = start; i < end; i++)
            {
                if (IsWhiteSpace(ch [i]))
                {
                    ch [i] = ' ';
                }
            }
            while (start < end)
            {
                c = ch [start];
                if (c == ' ')
                {
                    startWhitespace = true;
                    start++;
                    length--;
                }
                else
                {
                    break;
                }
            }
            while (length > 0)
            {
                c = ch [start + length - 1];
                if (c == ' ')
                {
                    endWhitespace = true;
                    length--;
                }
                else
                {
                    break;
                }
            }
            if (length == 0)
            {
                if (startWhitespace || endWhitespace)
                {
                    if (!sbLastWasWhitespace)
                    {
                        textBuilder.Append(' ');
                        tokenBuilder.Append(' ');
                    }
                    sbLastWasWhitespace = true;
                }
                else
                {
                    sbLastWasWhitespace = false;
                }
                lastEvent = NBoilerpipeContentHandler.Event.WHITESPACE;
                return;
            }
            if (startWhitespace)
            {
                if (!sbLastWasWhitespace)
                {
                    textBuilder.Append(' ');
                    tokenBuilder.Append(' ');
                }
            }
            if (blockTagLevel == -1)
            {
                blockTagLevel = tagLevel;
            }
            textBuilder.Append(ch, start, length);
            tokenBuilder.Append(ch, start, length);
            if (endWhitespace)
            {
                textBuilder.Append(' ');
                tokenBuilder.Append(' ');
            }
            sbLastWasWhitespace = endWhitespace;
            lastEvent           = NBoilerpipeContentHandler.Event.CHARACTERS;
            currentContainedTextElements.Add(textElementIdx);
        }
 public void EndElement(HtmlNode node)
 {
     TagAction ta = tagActions.Get (node.Name);
     if (ta != null) {
         flush = ta.End (this, node.Name) | flush;
     } else {
         flush = true;
     }
     if (ta == null || ta.ChangesTagLevel ()) {
         tagLevel--;
     }
     if (flush) {
         FlushBlock ();
     }
     lastEvent = NBoilerpipeContentHandler.Event.END_TAG;
     lastEndTag = node.Name;
     labelStacks.RemoveLast ();
 }
 public void StartElement(HtmlNode node)
 {
     labelStacks.AddItem (null);
     TagAction ta = tagActions.Get (node.Name);
     if (ta != null) {
         if (ta.ChangesTagLevel ()) {
             tagLevel++;
         }
         flush = ta.Start (this, node.Name, node.Attributes) | flush;
     } else {
         tagLevel++;
         flush = true;
     }
     lastEvent = NBoilerpipeContentHandler.Event.START_TAG;
     lastStartTag = node.Name;
 }
        public void HandleText(HtmlTextNode node)
        {
            if (IsTag (node.Text))
                node.Text = "";

            char[] ch = HttpUtility.HtmlDecode (node.Text).ToCharArray ();
            int start = 0;
            int length = ch.Length;

            textElementIdx++;

            if (flush) {
                FlushBlock ();
                flush = false;
            }
            if (inIgnorableElement != 0) {
                return;
            }

            char c;
            bool startWhitespace = false;
            bool endWhitespace = false;
            if (length == 0) {
                return;
            }
            int end = start + length;
            for (int i = start; i < end; i++) {
                if (IsWhiteSpace (ch [i])) {
                    ch [i] = ' ';
                }
            }
            while (start < end) {
                c = ch [start];
                if (c == ' ') {
                    startWhitespace = true;
                    start++;
                    length--;
                } else {
                    break;
                }
            }
            while (length > 0) {
                c = ch [start + length - 1];
                if (c == ' ') {
                    endWhitespace = true;
                    length--;
                } else {
                    break;
                }
            }
            if (length == 0) {
                if (startWhitespace || endWhitespace) {
                    if (!sbLastWasWhitespace) {
                        textBuilder.Append (' ');
                        tokenBuilder.Append (' ');
                    }
                    sbLastWasWhitespace = true;
                } else {
                    sbLastWasWhitespace = false;
                }
                lastEvent = NBoilerpipeContentHandler.Event.WHITESPACE;
                return;
            }
            if (startWhitespace) {
                if (!sbLastWasWhitespace) {
                    textBuilder.Append (' ');
                    tokenBuilder.Append (' ');
                }
            }
            if (blockTagLevel == -1) {
                blockTagLevel = tagLevel;
            }
            textBuilder.Append (ch, start, length);
            tokenBuilder.Append (ch, start, length);
            if (endWhitespace) {
                textBuilder.Append (' ');
                tokenBuilder.Append (' ');
            }
            sbLastWasWhitespace = endWhitespace;
            lastEvent = NBoilerpipeContentHandler.Event.CHARACTERS;
            currentContainedTextElements.Add (textElementIdx);
        }