public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { if (--instance._inAnchor == 0) { if (instance._inIgnorableElement == 0) { instance.AddWhitespaceIfNecessary(); instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_END); instance._tokenBuffer.Append(' '); instance._sbLastWasWhitespace = true; } } return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { if (instance._inAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser Console.Error.WriteLine( "Warning: Input contains nested A elements -- You have probably hit a bug in your HTML parser. Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."); End(instance, uri, localName, qName); } if (instance._inIgnorableElement == 0) { instance.AddWhitespaceIfNecessary(); instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_START); instance._tokenBuffer.Append(' '); instance._sbLastWasWhitespace = true; } return(false); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { instance.AddWhitespaceIfNecessary(); return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { instance.AddWhitespaceIfNecessary(); return(false); }