/// <exception cref="NBoilerpipe.BoilerpipeProcessingException"></exception>
        public bool Process(TextDocument doc)
        {
            bool changes                = false;
            IList <TextBlock> blocks    = doc.GetTextBlocks();
            IList <TextBlock> blocksNew = new AList <TextBlock>();

            foreach (TextBlock tb in blocks)
            {
                string   text       = tb.GetText();
                string[] paragraphs = text.Split("[\n\r]+");
                if (paragraphs.Length < 2)
                {
                    blocksNew.AddItem(tb);
                    continue;
                }
                bool isContent = tb.IsContent();
                ICollection <string> labels = tb.GetLabels();
                foreach (string p in paragraphs)
                {
                    TextBlock tbP = new TextBlock(p);
                    tbP.SetIsContent(isContent);
                    tbP.AddLabels(labels);
                    blocksNew.AddItem(tbP);
                    changes = true;
                }
            }
            if (changes)
            {
                blocks.Clear();
                Sharpen.Collections.AddAll(blocks, blocksNew);
            }
            return(changes);
        }
        protected void AddTextBlock(TextBlock tb)
        {
            foreach (int l in fontSizeStack)
            {
                tb.AddLabels("font-" + l);
                break;
            }

            foreach (List <LabelAction> labels in labelStacks)
            {
                if (labels != null)
                {
                    foreach (LabelAction label in labels)
                    {
                        label.AddTo(tb);
                    }
                }
            }
            textBlocks.Add(tb);
        }
Exemple #3
0
        public bool Process(TextDocument doc)
        {
            bool changes = false;

            List <TextBlock> blocks = doc.TextBlocks;
            var blocksNew           = new List <TextBlock>();

            foreach (TextBlock tb in blocks)
            {
                string   text       = tb.Text;
                string[] paragraphs = Regex.Split(text, "[\n\r]+");
                if (paragraphs.Length < 2)
                {
                    blocksNew.Add(tb);
                    continue;
                }
                bool          isContent = tb.IsContent;
                List <string> labels    = (tb.Labels ?? Enumerable.Empty <string>()).ToList();
                foreach (String p in paragraphs)
                {
                    var tbP = new TextBlock(p)
                    {
                        IsContent = isContent
                    };
                    tbP.AddLabels(labels);
                    blocksNew.Add(tbP);
                    changes = true;
                }
            }

            if (changes)
            {
                blocks.Clear();
                blocks.AddRange(blocksNew);
            }

            return(changes);
        }
Exemple #4
0
 protected virtual void AddLabelsTo(TextBlock tb)
 {
     tb.AddLabels(Labels);
 }
Exemple #5
0
 protected internal void AddLabelsTo(TextBlock tb)
 {
     tb.AddLabels(labels);
 }