SetIsContent() public method

public SetIsContent ( bool isContent ) : bool
isContent bool
return bool
 /// <exception cref="NBoilerpipe.BoilerpipeProcessingException"></exception>
 public bool Process(TextDocument doc)
 {
     bool changes = false;
     IList<TextBlock> blocks = doc.GetTextBlocks();
     IList<TextBlock> blocksNew = new AList<TextBlock>();
     foreach (TextBlock tb in blocks)
     {
         string text = tb.GetText();
         string[] paragraphs = text.Split("[\n\r]+");
         if (paragraphs.Length < 2)
         {
             blocksNew.AddItem(tb);
             continue;
         }
         bool isContent = tb.IsContent();
         ICollection<string> labels = tb.GetLabels();
         foreach (string p in paragraphs)
         {
             TextBlock tbP = new TextBlock(p);
             tbP.SetIsContent(isContent);
             tbP.AddLabels(labels);
             blocksNew.AddItem(tbP);
             changes = true;
         }
     }
     if (changes)
     {
         blocks.Clear();
         Sharpen.Collections.AddAll(blocks, blocksNew);
     }
     return changes;
 }
Example #2
0
 protected internal bool Classify(TextBlock prev, TextBlock curr, TextBlock next)
 {
     bool isContent = (curr.GetLinkDensity() > 0 && next.GetNumWords() > 11) || (curr.
         GetNumWords() > 19 || (next.GetNumWords() > 6 && next.GetLinkDensity() == 0 && prev
         .GetLinkDensity() == 0 && (curr.GetNumWords() > 6 || prev.GetNumWords() > 7 || next
         .GetNumWords() > 19)));
     return curr.SetIsContent(isContent);
 }
        protected internal virtual bool Classify(TextBlock prev, TextBlock curr, TextBlock
			 next)
        {
            bool isContent;
            if (curr.GetLinkDensity() <= 0.333333)
            {
                if (prev.GetLinkDensity() <= 0.555556)
                {
                    if (curr.GetTextDensity() <= 9)
                    {
                        if (next.GetTextDensity() <= 10)
                        {
                            if (prev.GetTextDensity() <= 4)
                            {
                                isContent = false;
                            }
                            else
                            {
                                isContent = true;
                            }
                        }
                        else
                        {
                            isContent = true;
                        }
                    }
                    else
                    {
                        if (next.GetTextDensity() == 0)
                        {
                            isContent = false;
                        }
                        else
                        {
                            isContent = true;
                        }
                    }
                }
                else
                {
                    if (next.GetTextDensity() <= 11)
                    {
                        isContent = false;
                    }
                    else
                    {
                        isContent = true;
                    }
                }
            }
            else
            {
                isContent = false;
            }
            return curr.SetIsContent(isContent);
        }