public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { string sizeAttr = attributes.GetValue("size"); if (sizeAttr != null) { Match m = PAT_FONT_SIZE.Match(sizeAttr); if (m.Success) { string rel = m.Groups[1].Value; int val = int.Parse(m.Groups[2].Value); int size; if (rel.Length == 0) { // absolute size = val; } else { // relative int prevSize; if (instance._fontSizeStack.Count == 0) { prevSize = 3; } else { prevSize = 3; foreach (var s in instance._fontSizeStack) { if (s != null) { prevSize = s.Value; break; } } } if (rel[0] == '+') { size = prevSize + val; } else { size = prevSize - val; } } instance._fontSizeStack.AddFirst(size); } else { instance._fontSizeStack.AddFirst((int?)null); } } else { instance._fontSizeStack.AddFirst((int?)null); } return(false); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { if (--instance._inAnchor == 0) { if (instance._inIgnorableElement == 0) { instance.AddWhitespaceIfNecessary(); instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_END); instance._tokenBuffer.Append(' '); instance._sbLastWasWhitespace = true; } } return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { if (instance._inAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser Console.Error.WriteLine( "Warning: Input contains nested A elements -- You have probably hit a bug in your HTML parser. Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."); End(instance, uri, localName, qName); } if (instance._inIgnorableElement == 0) { instance.AddWhitespaceIfNecessary(); instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_START); instance._tokenBuffer.Append(' '); instance._sbLastWasWhitespace = true; } return(false); }
/// <summary> /// Constructs a <see cref="BoilerpipeHtmlParser"/> using the given <see cref="IContentHandler" />. /// </summary> /// <param name="contentHandler">the <see cref="IContentHandler" /> to use</param> public BoilerpipeHtmlParser(BoilerpipeHtmlContentHandler contentHandler) { SetContentHandler(contentHandler); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { instance.FlushBlock(); instance._inBody--; return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { instance.FlushBlock(); instance._inBody++; return(false); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { instance.AddWhitespaceIfNecessary(); return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { return(t1.Start(instance, uri, localName, qName, attributes) | t2.Start(instance, uri, localName, qName, attributes)); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { return(false); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { return(false); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { instance._inIgnorableElement--; return(true); }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { instance._inIgnorableElement++; return(true); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { instance._fontSizeStack.RemoveFirst(); return(false); }
public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName) { return(t1.End(instance, uri, localName, qName) | t2.End(instance, uri, localName, qName)); }
public void SetContentHandler(BoilerpipeHtmlContentHandler contentHandler) { _contentHandler = contentHandler; }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { instance.AddWhitespaceIfNecessary(); return(false); }
public void SetContentHandler(IContentHandler contentHandler) { _contentHandler = null; }
public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes) { instance.AddLabelAction(_action); return(true); }