/// <exception cref="NBoilerpipe.BoilerpipeProcessingException"></exception> public bool Process(TextDocument doc) { bool changes = false; IList <TextBlock> blocks = doc.GetTextBlocks(); IList <TextBlock> blocksNew = new AList <TextBlock>(); foreach (TextBlock tb in blocks) { string text = tb.GetText(); string[] paragraphs = text.Split("[\n\r]+"); if (paragraphs.Length < 2) { blocksNew.AddItem(tb); continue; } bool isContent = tb.IsContent(); ICollection <string> labels = tb.GetLabels(); foreach (string p in paragraphs) { TextBlock tbP = new TextBlock(p); tbP.SetIsContent(isContent); tbP.AddLabels(labels); blocksNew.AddItem(tbP); changes = true; } } if (changes) { blocks.Clear(); Sharpen.Collections.AddAll(blocks, blocksNew); } return(changes); }
protected void AddTextBlock(TextBlock tb) { foreach (int l in fontSizeStack) { tb.AddLabels("font-" + l); break; } foreach (List <LabelAction> labels in labelStacks) { if (labels != null) { foreach (LabelAction label in labels) { label.AddTo(tb); } } } textBlocks.Add(tb); }
public bool Process(TextDocument doc) { bool changes = false; List <TextBlock> blocks = doc.TextBlocks; var blocksNew = new List <TextBlock>(); foreach (TextBlock tb in blocks) { string text = tb.Text; string[] paragraphs = Regex.Split(text, "[\n\r]+"); if (paragraphs.Length < 2) { blocksNew.Add(tb); continue; } bool isContent = tb.IsContent; List <string> labels = (tb.Labels ?? Enumerable.Empty <string>()).ToList(); foreach (String p in paragraphs) { var tbP = new TextBlock(p) { IsContent = isContent }; tbP.AddLabels(labels); blocksNew.Add(tbP); changes = true; } } if (changes) { blocks.Clear(); blocks.AddRange(blocksNew); } return(changes); }
protected virtual void AddLabelsTo(TextBlock tb) { tb.AddLabels(Labels); }
protected internal void AddLabelsTo(TextBlock tb) { tb.AddLabels(labels); }