/** * <summary>Gets whether the specified content stream part is blank.</summary> * <param name="level">Content stream part to evaluate.</param> * <param name="contentBox">Area to evaluate within the page.</param> */ private static bool IsBlank(ContentScanner level, SKRect contentBox) { if (level == null) { return(true); } while (level.MoveNext()) { ContentObject content = level.Current; if (content is ContainerObject) { // Scan the inner level! if (!IsBlank(level.ChildLevel, contentBox)) { return(false); } } else { var contentWrapper = level.CurrentWrapper; if (contentWrapper == null) { continue; } if (contentWrapper.Box.Value.IntersectsWith(contentBox)) { return(false); } } } return(true); }
private void Extract(ContentScanner level) { if (level == null) { return; } while (level.MoveNext()) { var content = level.Current; switch (content) { case ShowText text: { var font = level.State.Font; _contentList.Add(font.Decode(text.Text)); break; } case Text _: case ContainerObject _: Extract(level.ChildLevel); break; } } }
/** * <summary>Scans a content level looking for text.</summary> */ private void Extract( ContentScanner level, IList <ContentScanner.TextStringWrapper> extractedTextStrings ) { while (level.MoveNext()) { ContentObject content = level.Current; if (content is Text) { // Collect the text strings! foreach (ContentScanner.TextStringWrapper textString in ((ContentScanner.TextWrapper)level.CurrentWrapper).TextStrings) { extractedTextStrings.Add(textString); } } else if (content is ContainerObject) { // Scan the inner level! Extract( level.ChildLevel, extractedTextStrings ); } } }
/** * <summary>Scans a content level looking for text.</summary> */ /* * NOTE: Page contents are represented by a sequence of content objects, * possibly nested into multiple levels. */ private void Extract( ContentScanner level ) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is ShowText) { Font font = level.State.Font; // Extract the current text chunk, decoding it! Console.WriteLine(font.Decode(((ShowText)content).Text)); } else if (content is Text || content is ContainerObject) { // Scan the inner level! Extract(level.ChildLevel); } } }
/** * <summary>Scans a content level looking for text.</summary> */ /* * NOTE: Page contents are represented by a sequence of content objects, * possibly nested into multiple levels. */ private void Extract( ContentScanner level, PrimitiveComposer composer ) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is Text) { ContentScanner.TextWrapper text = (ContentScanner.TextWrapper)level.CurrentWrapper; int colorIndex = 0; foreach (ContentScanner.TextStringWrapper textString in text.TextStrings) { RectangleF textStringBox = textString.Box.Value; Console.WriteLine( "Text [" + "x:" + Math.Round(textStringBox.X) + "," + "y:" + Math.Round(textStringBox.Y) + "," + "w:" + Math.Round(textStringBox.Width) + "," + "h:" + Math.Round(textStringBox.Height) + "] [font size:" + Math.Round(textString.Style.FontSize) + "]: " + textString.Text ); // Drawing text character bounding boxes... colorIndex = (colorIndex + 1) % textCharBoxColors.Length; composer.SetStrokeColor(textCharBoxColors[colorIndex]); foreach (TextChar textChar in textString.TextChars) { /* * NOTE: You can get further text information * (font, font size, text color, text rendering mode) * through textChar.style. */ composer.DrawRectangle(textChar.Box); composer.Stroke(); } // Drawing text string bounding box... composer.BeginLocalState(); composer.SetLineDash(new LineDash(new double[] { 5, 5 })); composer.SetStrokeColor(textStringBoxColor); composer.DrawRectangle(textString.Box.Value); composer.Stroke(); composer.End(); } } else if (content is ContainerObject) { // Scan the inner level! Extract(level.ChildLevel, composer); } } }
/// <summary> /// Extraccción del texto de la página pasada por parámetro /// con sus respectivas propiedades /// (font, font size, text color, text rendering mode, text bounding box, etc.). /// Este escaneo se realiza por niveles, ya que las páginas están representadas /// por una secuencia de Content Objects, posiblemente anidados en múltiples niveles. /// </summary> /// <param name="level">Nivel que estamos iterando</param> private void Extract(ContentScanner level) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is Text) { //Guardamos los TextStrings con sus distintas propiedades ContentScanner.TextWrapper text = (ContentScanner.TextWrapper)level.CurrentWrapper; foreach (ContentScanner.TextStringWrapper textString in text.TextStrings) { _PdfTextStrings.Add(new PdfClownTextString( textString.Text, textString.Style.FillColor, textString.Style.StrokeColor, textString.Style.Font, textString.Style.FontSize) { Rectangle = textString.Box }); } } else if (content is XObject) { //Scanning the external level try { Extract(((XObject)content).GetScanner(level)); } catch (Exception e) { Console.WriteLine(e.StackTrace); Console.WriteLine("Soy la excepción de XObject"); } } else if (content is ContainerObject) { //Scanning the inner level try { Extract(level.ChildLevel); } catch (Exception e) { Console.WriteLine(e.StackTrace); Console.WriteLine("Soy la excepción de ContainerObject"); } } } }
/** <summary>Scans a content level looking for text.</summary> */ /* NOTE: Page contents are represented by a sequence of content objects, possibly nested into multiple levels. */ private void Extract( ContentScanner level, PrimitiveComposer composer ) { if(level == null) return; while(level.MoveNext()) { ContentObject content = level.Current; if(content is Text) { ContentScanner.TextWrapper text = (ContentScanner.TextWrapper)level.CurrentWrapper; int colorIndex = 0; foreach(ContentScanner.TextStringWrapper textString in text.TextStrings) { RectangleF textStringBox = textString.Box.Value; Console.WriteLine( "Text [" + "x:" + Math.Round(textStringBox.X) + "," + "y:" + Math.Round(textStringBox.Y) + "," + "w:" + Math.Round(textStringBox.Width) + "," + "h:" + Math.Round(textStringBox.Height) + "] [font size:" + Math.Round(textString.Style.FontSize) + "]: " + textString.Text ); // Drawing text character bounding boxes... colorIndex = (colorIndex + 1) % textCharBoxColors.Length; composer.SetStrokeColor(textCharBoxColors[colorIndex]); foreach(TextChar textChar in textString.TextChars) { /* NOTE: You can get further text information (font, font size, text color, text rendering mode) through textChar.style. */ composer.DrawRectangle(textChar.Box); composer.Stroke(); } // Drawing text string bounding box... composer.BeginLocalState(); composer.SetLineDash(new LineDash(new double[]{5,5})); composer.SetStrokeColor(textStringBoxColor); composer.DrawRectangle(textString.Box.Value); composer.Stroke(); composer.End(); } } else if(content is ContainerObject) { // Scan the inner level! Extract(level.ChildLevel, composer); } } }
private void RemoveLayerContents( ContentScanner level, ICollection <PdfName> layerEntityNames, ICollection <PdfName> layerXObjectNames, bool preserveContent ) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is MarkedContent) { var markedContent = (MarkedContent)content; var marker = (ContentMarker)markedContent.Header; if (PdfName.OC.Equals(marker.Tag) && // NOTE: /OC tag identifies layer (aka optional content) markers. layerEntityNames.Contains(marker.Name)) { if (preserveContent) { level.Current = new ContentPlaceholder(markedContent.Objects); // Replaces the layer marked content block with an anonymous container, preserving its contents. } else { level.Remove(); // Removes the layer marked content block along with its contents. continue; } } } else if (!preserveContent && content is XObject) { var xObject = (XObject)content; if (layerXObjectNames.Contains(xObject.Name)) { level.Remove(); continue; } } if (content is ContainerObject) { // Scan the inner level! RemoveLayerContents( level.ChildLevel, layerEntityNames, layerXObjectNames, preserveContent ); } } }
/** * <summary>Scans a content level looking for text.</summary> */ private void Extract( ContentScanner level, IList <ContentScanner.TextStringWrapper> extractedTextStrings ) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is Text) { // Collect the text strings! foreach (ContentScanner.TextStringWrapper textString in ((ContentScanner.TextWrapper)level.CurrentWrapper).TextStrings) { if (textString.TextChars.Count > 0) { extractedTextStrings.Add(textString); } } } else if (content is XObject) { // Scan the external level! Extract( ((XObject)content).GetScanner(level), extractedTextStrings ); } else if (content is ContainerObject) { // Scan the inner level! Extract( level.ChildLevel, extractedTextStrings ); } } }
private void Extract(ContentScanner level) { if (level == null) { return; } level.MoveStart(); while (level.MoveNext()) { ContentObject content = level.Current; if (content is ShowText) { textStrings.Add((TextStringWrapper)level.CurrentWrapper); } else if (content is ContainerObject) { Extract(level.ChildLevel); } } }
/** <summary>Scans a content level looking for text.</summary> */ /* NOTE: Page contents are represented by a sequence of content objects, possibly nested into multiple levels. */ private void Extract( ContentScanner level ) { if(level == null) return; while(level.MoveNext()) { ContentObject content = level.Current; if(content is ShowText) { Font font = level.State.Font; // Extract the current text chunk, decoding it! Console.WriteLine(font.Decode(((ShowText)content).Text)); } else if(content is Text || content is ContainerObject) { // Scan the inner level! Extract(level.ChildLevel); } } }
/** * <summary>Scans a content level looking for images.</summary> */ /* * NOTE: Page contents are represented by a sequence of content objects, * possibly nested into multiple levels. */ private void Scan( ContentScanner level, Page page ) { if (level == null) { return; } while (level.MoveNext()) { ContentObject current = level.Current; if (current is ContainerObject) { // Scan the inner level! Scan( level.ChildLevel, page ); } else { ContentScanner.GraphicsObjectWrapper objectWrapper = level.CurrentWrapper; if (objectWrapper == null) { continue; } /* * NOTE: Images can be represented on a page either as * external objects (XObject) or inline objects. */ SizeF?imageSize = null; // Image native size. if (objectWrapper is ContentScanner.XObjectWrapper) { ContentScanner.XObjectWrapper xObjectWrapper = (ContentScanner.XObjectWrapper)objectWrapper; xObjects::XObject xObject = xObjectWrapper.XObject; // Is the external object an image? if (xObject is xObjects::ImageXObject) { Console.Write( "External Image '" + xObjectWrapper.Name + "' (" + xObject.BaseObject + ")" // Image key and indirect reference. ); imageSize = xObject.Size; // Image native size. } } else if (objectWrapper is ContentScanner.InlineImageWrapper) { Console.Write("Inline Image"); InlineImage inlineImage = ((ContentScanner.InlineImageWrapper)objectWrapper).InlineImage; imageSize = inlineImage.Size; // Image native size. } if (imageSize.HasValue) { RectangleF box = objectWrapper.Box.Value; // Image position (location and size) on the page. Console.WriteLine( " on page " + page.Number + " (" + page.BaseObject + ")" // Page index and indirect reference. ); Console.WriteLine(" Coordinates:"); Console.WriteLine(" x: " + Math.Round(box.X)); Console.WriteLine(" y: " + Math.Round(box.Y)); Console.WriteLine(" width: " + Math.Round(box.Width) + " (native: " + Math.Round(imageSize.Value.Width) + ")"); Console.WriteLine(" height: " + Math.Round(box.Height) + " (native: " + Math.Round(imageSize.Value.Height) + ")"); } } } }
private void Extract(ContentScanner level, List <Entry> entries) { if (level != null) { while (level.MoveNext()) { var content = level.Current; if (content is Text textContent) { var i = 0; var text = (ContentScanner.TextWrapper)level.CurrentWrapper; if (text.TextStrings.Count > 5) { if (text.TextStrings[0].Style.FontSize == 12 && (text.TextStrings[2].Text.StartsWith("(") || text.TextStrings[1].Text.StartsWith(" (") || text.TextStrings[i + 1].Text.StartsWith("("))) { if (currentSkill != "") { entries.Add(new Entry { Name = currentSkill, Description = currentDescription }); } currentSkill = text.TextStrings[0].Text; currentDescription = ""; for (; i < text.TextStrings.Count; i++) { if (text.TextStrings[i].Style.Font.Name.EndsWith("Italic")) { i++; break; } } } } if (!string.IsNullOrEmpty(currentSkill)) { for (; i < text.TextStrings.Count; i++) { if (text.TextStrings.Count > i + 5) { if (text.TextStrings[i].Style.FontSize == 12 && (text.TextStrings[i + 2].Text.StartsWith("(") || text.TextStrings[i + 1].Text.StartsWith(" (") || text.TextStrings[i + 1].Text.StartsWith("("))) { if (currentSkill != "") { entries.Add(new Entry { Name = currentSkill, Description = currentDescription }); } currentSkill = text.TextStrings[i].Text; currentDescription = ""; for (; i < text.TextStrings.Count; i++) { if (text.TextStrings[i].Style.Font.Name.EndsWith("Italic")) { i++; break; } } } } var textString = text.TextStrings[i]; if (textString.Text == "-" || Math.Abs(textString.Style.FontSize - 9) > 0.1) { continue; } if (textString.Style.Font.Name.EndsWith("Italic")) { currentDescription += "<i>" + textString.Text + "</i>"; } else if (textString.Style.Font.Name.EndsWith("Bold")) { currentDescription += "<b>" + textString.Text + "</b>"; } else if (textString.BaseDataObject.Operator.Equals("TJ") && i > 0 && text.TextStrings[i - 1].Text.EndsWith(".")) { currentDescription += "<br/>" + textString.Text; } else { currentDescription += textString.Text; } } } } else if (content is XObject) { Extract(((XObject)content).GetScanner(level), entries); } else if (content is ContainerObject) { Extract(level.ChildLevel, entries); } else { var contentType = content.GetType(); var wrapper = level.CurrentWrapper; if (wrapper != null) { var wrapperType = wrapper.GetType(); } } } } }
/** <summary>Scans a content level looking for images.</summary> */ /* NOTE: Page contents are represented by a sequence of content objects, possibly nested into multiple levels. */ private void Scan( ContentScanner level, Page page ) { if(level == null) return; while(level.MoveNext()) { ContentObject current = level.Current; if(current is ContainerObject) { // Scan the inner level! Scan( level.ChildLevel, page ); } else { ContentScanner.GraphicsObjectWrapper objectWrapper = level.CurrentWrapper; if(objectWrapper == null) continue; /* NOTE: Images can be represented on a page either as external objects (XObject) or inline objects. */ SizeF? imageSize = null; // Image native size. if(objectWrapper is ContentScanner.XObjectWrapper) { ContentScanner.XObjectWrapper xObjectWrapper = (ContentScanner.XObjectWrapper)objectWrapper; xObjects::XObject xObject = xObjectWrapper.XObject; // Is the external object an image? if(xObject is xObjects::ImageXObject) { Console.Write( "External Image '" + xObjectWrapper.Name + "' (" + xObject.BaseObject + ")" // Image key and indirect reference. ); imageSize = xObject.Size; // Image native size. } } else if(objectWrapper is ContentScanner.InlineImageWrapper) { Console.Write("Inline Image"); InlineImage inlineImage = ((ContentScanner.InlineImageWrapper)objectWrapper).InlineImage; imageSize = inlineImage.Size; // Image native size. } if(imageSize.HasValue) { RectangleF box = objectWrapper.Box.Value; // Image position (location and size) on the page. Console.WriteLine( " on page " + (page.Index + 1) + " (" + page.BaseObject + ")" // Page index and indirect reference. ); Console.WriteLine(" Coordinates:"); Console.WriteLine(" x: " + Math.Round(box.X)); Console.WriteLine(" y: " + Math.Round(box.Y)); Console.WriteLine(" width: " + Math.Round(box.Width) + " (native: " + Math.Round(imageSize.Value.Width) + ")"); Console.WriteLine(" height: " + Math.Round(box.Height) + " (native: " + Math.Round(imageSize.Value.Height) + ")"); } } } }
private void RefreshAppearance( ) { Widget widget = Widgets[0]; FormXObject normalAppearance; { AppearanceStates normalAppearances = widget.Appearance.Normal; normalAppearance = normalAppearances[null]; if (normalAppearance == null) { normalAppearances[null] = normalAppearance = new FormXObject(Document, widget.Box.Size); } } PdfName fontName = null; double fontSize = 0; { PdfString defaultAppearanceState = DefaultAppearanceState; if (defaultAppearanceState == null) { // Retrieving the font to define the default appearance... fonts::Font defaultFont = null; PdfName defaultFontName = null; { // Field fonts. FontResources normalAppearanceFonts = normalAppearance.Resources.Fonts; foreach (KeyValuePair <PdfName, fonts::Font> entry in normalAppearanceFonts) { if (!entry.Value.Symbolic) { defaultFont = entry.Value; defaultFontName = entry.Key; break; } } if (defaultFontName == null) { // Common fonts. FontResources formFonts = Document.Form.Resources.Fonts; foreach (KeyValuePair <PdfName, fonts::Font> entry in formFonts) { if (!entry.Value.Symbolic) { defaultFont = entry.Value; defaultFontName = entry.Key; break; } } if (defaultFontName == null) { //TODO:manage name collision! formFonts[ defaultFontName = new PdfName("default") ] = defaultFont = new fonts::StandardType1Font( Document, fonts::StandardType1Font.FamilyEnum.Helvetica, false, false ); } normalAppearanceFonts[defaultFontName] = defaultFont; } } bytes::Buffer buffer = new bytes::Buffer(); new SetFont(defaultFontName, IsMultiline ? 10 : 0).WriteTo(buffer, Document); widget.BaseDataObject[PdfName.DA] = defaultAppearanceState = new PdfString(buffer.ToByteArray()); } // Retrieving the font to use... ContentParser parser = new ContentParser(defaultAppearanceState.ToByteArray()); foreach (ContentObject content in parser.ParseContentObjects()) { if (content is SetFont) { SetFont setFontOperation = (SetFont)content; fontName = setFontOperation.Name; fontSize = setFontOperation.Size; break; } } normalAppearance.Resources.Fonts[fontName] = Document.Form.Resources.Fonts[fontName]; } // Refreshing the field appearance... /* * TODO: resources MUST be resolved both through the apperance stream resource dictionary and * from the DR-entry acroform resource dictionary */ PrimitiveComposer baseComposer = new PrimitiveComposer(normalAppearance); BlockComposer composer = new BlockComposer(baseComposer); ContentScanner currentLevel = composer.Scanner; bool textShown = false; while (currentLevel != null) { if (!currentLevel.MoveNext()) { currentLevel = currentLevel.ParentLevel; continue; } ContentObject content = currentLevel.Current; if (content is MarkedContent) { MarkedContent markedContent = (MarkedContent)content; if (PdfName.Tx.Equals(((BeginMarkedContent)markedContent.Header).Tag)) { // Remove old text representation! markedContent.Objects.Clear(); // Add new text representation! baseComposer.Scanner = currentLevel.ChildLevel; // Ensures the composer places new contents within the marked content block. ShowText(composer, fontName, fontSize); textShown = true; } } else if (content is Text) { currentLevel.Remove(); } else if (currentLevel.ChildLevel != null) { currentLevel = currentLevel.ChildLevel; } } if (!textShown) { baseComposer.BeginMarkedContent(PdfName.Tx); ShowText(composer, fontName, fontSize); baseComposer.End(); } baseComposer.Flush(); }
private void Extract(ContentScanner level, List <TalentEntry> entries) { if (level != null) { while (level.MoveNext()) { var content = level.Current; if (content is Text) { var i = 0; var text = (ContentScanner.TextWrapper)level.CurrentWrapper; if (Math.Abs(text.TextStrings[0].Style.FontSize - 18) < 0.1 && text.ToString() == "lista talentów") { talentsStarted = true; continue; } if (talentsStarted) { for (; i < text.TextStrings.Count; i++) { if (Math.Abs(text.TextStrings[i].Style.FontSize - 11) < 0.1) { if (currentTalent != "") { entries.Add(new TalentEntry { Name = currentTalent, Description = currentDescription, Tests = test }); } currentTalent = text.TextStrings[i].Text; currentDescription = ""; test = ""; i += 2; //Maximum; if (text.TextStrings[i + 1].Text.StartsWith("Testy")) { test = text.TextStrings[i + 2].Text.TrimStart(':').TrimStart(); i += 2; } continue; } var textString = text.TextStrings[i]; if (textString.Text == "-" || Math.Abs(textString.Style.FontSize - 9) > 0.1) { continue; } if (textString.Style.Font.Name.EndsWith("Italic")) { currentDescription += "<i>" + textString.Text + "</i>"; } else if (textString.Style.Font.Name.EndsWith("Bold")) { currentDescription += "<b>" + textString.Text + "</b>"; } else if (textString.BaseDataObject.Operator.Equals("TJ") && i > 0 && text.TextStrings[i - 1].Text.EndsWith(".")) { currentDescription += "<br/>" + textString.Text; } else { currentDescription += textString.Text; } } } } else if (content is XObject) { Extract(((XObject)content).GetScanner(level), entries); } else if (content is ContainerObject) { Extract(level.ChildLevel, entries); } else { var contentType = content.GetType(); var wrapper = level.CurrentWrapper; if (wrapper != null) { var wrapperType = wrapper.GetType(); } } } } }