예제 #1
0
            public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
            {
                string sizeAttr = attributes.GetValue("size");

                if (sizeAttr != null)
                {
                    Match m = PAT_FONT_SIZE.Match(sizeAttr);
                    if (m.Success)
                    {
                        string rel = m.Groups[1].Value;
                        int    val = int.Parse(m.Groups[2].Value);
                        int    size;
                        if (rel.Length == 0)
                        {
                            // absolute
                            size = val;
                        }
                        else
                        {
                            // relative
                            int prevSize;
                            if (instance._fontSizeStack.Count == 0)
                            {
                                prevSize = 3;
                            }
                            else
                            {
                                prevSize = 3;
                                foreach (var s in instance._fontSizeStack)
                                {
                                    if (s != null)
                                    {
                                        prevSize = s.Value;
                                        break;
                                    }
                                }
                            }
                            if (rel[0] == '+')
                            {
                                size = prevSize + val;
                            }
                            else
                            {
                                size = prevSize - val;
                            }
                        }
                        instance._fontSizeStack.AddFirst(size);
                    }
                    else
                    {
                        instance._fontSizeStack.AddFirst((int?)null);
                    }
                }
                else
                {
                    instance._fontSizeStack.AddFirst((int?)null);
                }
                return(false);
            }
예제 #2
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     if (--instance._inAnchor == 0)
     {
         if (instance._inIgnorableElement == 0)
         {
             instance.AddWhitespaceIfNecessary();
             instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_END);
             instance._tokenBuffer.Append(' ');
             instance._sbLastWasWhitespace = true;
         }
     }
     return(false);
 }
예제 #3
0
            public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
            {
                if (instance._inAnchor++ > 0)
                {
                    // as nested A elements are not allowed per specification, we
                    // are probably reaching this branch due to a bug in the XML
                    // parser
                    Console.Error.WriteLine(
                        "Warning: Input contains nested A elements -- You have probably hit a bug in your HTML parser. Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...");

                    End(instance, uri, localName, qName);
                }
                if (instance._inIgnorableElement == 0)
                {
                    instance.AddWhitespaceIfNecessary();
                    instance._tokenBuffer.Append(BoilerpipeHtmlContentHandler.ANCHOR_TEXT_START);
                    instance._tokenBuffer.Append(' ');
                    instance._sbLastWasWhitespace = true;
                }
                return(false);
            }
예제 #4
0
 /// <summary>
 ///   Constructs a <see cref="BoilerpipeHtmlParser"/> using the given <see cref="IContentHandler" />.
 /// </summary>
 /// <param name="contentHandler">the <see cref="IContentHandler" /> to use</param>
 public BoilerpipeHtmlParser(BoilerpipeHtmlContentHandler contentHandler)
 {
     SetContentHandler(contentHandler);
 }
예제 #5
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.FlushBlock();
     instance._inBody--;
     return(false);
 }
예제 #6
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.FlushBlock();
     instance._inBody++;
     return(false);
 }
예제 #7
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance.AddWhitespaceIfNecessary();
     return(false);
 }
예제 #8
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     return(t1.Start(instance, uri, localName, qName, attributes) | t2.Start(instance, uri, localName, qName, attributes));
 }
예제 #9
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     return(false);
 }
예제 #10
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     return(false);
 }
예제 #11
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance._inIgnorableElement--;
     return(true);
 }
예제 #12
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance._inIgnorableElement++;
     return(true);
 }
예제 #13
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     instance._fontSizeStack.RemoveFirst();
     return(false);
 }
예제 #14
0
 public bool End(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName)
 {
     return(t1.End(instance, uri, localName, qName) | t2.End(instance, uri, localName, qName));
 }
예제 #15
0
 public void SetContentHandler(BoilerpipeHtmlContentHandler contentHandler)
 {
     _contentHandler = contentHandler;
 }
예제 #16
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.AddWhitespaceIfNecessary();
     return(false);
 }
예제 #17
0
 public void SetContentHandler(IContentHandler contentHandler)
 {
     _contentHandler = null;
 }
예제 #18
0
 public bool Start(BoilerpipeHtmlContentHandler instance, string uri, string localName, string qName, IAttributes attributes)
 {
     instance.AddLabelAction(_action);
     return(true);
 }