Exemplo n.º 1
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     if (--instance._inAnchor == 0)
     {
         if (instance._inIgnorableElement == 0)
         {
             instance.AddWhitespaceIfNecessary();
             instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_END);
             instance._tokenBuffer.Append(' ');
             instance._sbLastWasWhitespace = true;
         }
     }
     return(false);
 }
Exemplo n.º 2
0
            public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
            {
                if (instance._inAnchor++ > 0)
                {
                    // as nested A elements are not allowed per specification, we
                    // are probably reaching this branch due to a bug in the XML
                    // parser
                    Console.Error.WriteLine(
                        "Warning: Input contains nested A elements -- You have probably hit a bug in your HTML parser. Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...");

                    End(instance, uri, localName, qName);
                }
                if (instance._inIgnorableElement == 0)
                {
                    instance.AddWhitespaceIfNecessary();
                    instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_START);
                    instance._tokenBuffer.Append(' ');
                    instance._sbLastWasWhitespace = true;
                }
                return(false);
            }
Exemplo n.º 3
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.AddWhitespaceIfNecessary();
     return(false);
 }
Exemplo n.º 4
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.AddWhitespaceIfNecessary();
     return(false);
 }