public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
            {
                string sizeAttr = attributes.GetValue("size");

                if (sizeAttr != null)
                {
                    Match m = FontSizeRegex.Match(sizeAttr);
                    if (m.Success)
                    {
                        string rel = m.Groups[1].Value;
                        int    val = int.Parse(m.Groups[2].Value);
                        int    size;
                        if (rel.Length == 0)
                        {
                            // absolute
                            size = val;
                        }
                        else
                        {
                            // relative
                            int prevSize;
                            if (instance.FontSizeStack.Count == 0)
                            {
                                prevSize = 3;
                            }
                            else
                            {
                                prevSize = 3;
                                foreach (var s in instance.FontSizeStack)
                                {
                                    if (s != null)
                                    {
                                        prevSize = s.Value;
                                        break;
                                    }
                                }
                            }
                            if (rel[0] == '+')
                            {
                                size = prevSize + val;
                            }
                            else
                            {
                                size = prevSize - val;
                            }
                        }
                        instance.FontSizeStack.AddFirst(size);
                    }
                    else
                    {
                        instance.FontSizeStack.AddFirst((int?)null);
                    }
                }
                else
                {
                    instance.FontSizeStack.AddFirst((int?)null);
                }
                return(false);
            }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     if (--instance.InAnchor == 0)
     {
         if (instance.InIgnorableElement == 0)
         {
             instance.AddWhitespaceIfNecessary();
             instance.TokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_END);
             instance.TokenBuffer.Append(' ');
             instance.SbLastWasWhitespace = true;
         }
     }
     return(false);
 }
            public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
            {
                if (instance.InAnchor++ > 0)
                {
                    // as nested A elements are not allowed per specification, we
                    // are probably reaching this branch due to a bug in the XML
                    // parser
                    Console.Error.WriteLine(
                        "Warning: Input contains nested A elements -- You have probably hit a bug in your HTML parser. Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...");

                    End(instance, uri, localName, qName);
                }
                if (instance.InIgnorableElement == 0)
                {
                    instance.AddWhitespaceIfNecessary();
                    instance.TokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_START);
                    instance.TokenBuffer.Append(' ');
                    instance.SbLastWasWhitespace = true;
                }
                return(false);
            }
Пример #4
0
 /// <summary>
 ///   Constructs a <see cref="BoilerpipeHtmlParser"/> using the given <see cref="IContentHandler" />.
 /// </summary>
 /// <param name="contentHandler">the <see cref="IContentHandler" /> to use</param>
 public BoilerpipeHtmlParser(BoilerpipeHtmlContentHandler contentHandler)
 {
     SetContentHandler(contentHandler);
 }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.FlushBlock();
     instance.InBody--;
     return(false);
 }
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.FlushBlock();
     instance.InBody++;
     return(false);
 }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.AddWhitespaceIfNecessary();
     return(false);
 }
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     return(t1.Start(instance, uri, localName, qName, attributes) | t2.Start(instance, uri, localName, qName, attributes));
 }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     return(false);
 }
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     return(false);
 }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.InIgnorableElement--;
     return(true);
 }
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.InIgnorableElement++;
     return(true);
 }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.FontSizeStack.RemoveFirst();
     return(false);
 }
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     return(t1.End(instance, uri, localName, qName) | t2.End(instance, uri, localName, qName));
 }
Пример #15
0
 public void SetContentHandler(BoilerpipeHtmlContentHandler contentHandler)
 {
     _contentHandler = contentHandler;
 }
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.AddWhitespaceIfNecessary();
     return(false);
 }
Пример #17
0
 public void SetContentHandler(IContentHandler contentHandler)
 {
     _contentHandler = null;
 }
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.AddLabelAction(_action);
     return(true);
 }