public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { if (--instance.InAnchor == 0) { if (instance.InIgnorableElement == 0) { instance.AddWhitespaceIfNecessary(); instance.TokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_END); instance.TokenBuffer.Append(' '); instance.SbLastWasWhitespace = true; } } return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { if (instance.InAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser Console.Error.WriteLine( "Warning: Input contains nested A elements -- You have probably hit a bug in your HTML parser. Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."); End(instance, uri, localName, qName); } if (instance.InIgnorableElement == 0) { instance.AddWhitespaceIfNecessary(); instance.TokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_START); instance.TokenBuffer.Append(' '); instance.SbLastWasWhitespace = true; } return(false); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { instance.AddWhitespaceIfNecessary(); return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { instance.AddWhitespaceIfNecessary(); return(false); }