/** * <summary>Scans a content level looking for text.</summary> */ /* * NOTE: Page contents are represented by a sequence of content objects, * possibly nested into multiple levels. */ private void Extract( ContentScanner level, PrimitiveComposer composer ) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is Text) { ContentScanner.TextWrapper text = (ContentScanner.TextWrapper)level.CurrentWrapper; int colorIndex = 0; foreach (ContentScanner.TextStringWrapper textString in text.TextStrings) { RectangleF textStringBox = textString.Box.Value; Console.WriteLine( "Text [" + "x:" + Math.Round(textStringBox.X) + "," + "y:" + Math.Round(textStringBox.Y) + "," + "w:" + Math.Round(textStringBox.Width) + "," + "h:" + Math.Round(textStringBox.Height) + "] [font size:" + Math.Round(textString.Style.FontSize) + "]: " + textString.Text ); // Drawing text character bounding boxes... colorIndex = (colorIndex + 1) % textCharBoxColors.Length; composer.SetStrokeColor(textCharBoxColors[colorIndex]); foreach (TextChar textChar in textString.TextChars) { /* * NOTE: You can get further text information * (font, font size, text color, text rendering mode) * through textChar.style. */ composer.DrawRectangle(textChar.Box); composer.Stroke(); } // Drawing text string bounding box... composer.BeginLocalState(); composer.SetLineDash(new LineDash(new double[] { 5, 5 })); composer.SetStrokeColor(textStringBoxColor); composer.DrawRectangle(textString.Box.Value); composer.Stroke(); composer.End(); } } else if (content is ContainerObject) { // Scan the inner level! Extract(level.ChildLevel, composer); } } }
/// <summary> /// Extraccción del texto de la página pasada por parámetro /// con sus respectivas propiedades /// (font, font size, text color, text rendering mode, text bounding box, etc.). /// Este escaneo se realiza por niveles, ya que las páginas están representadas /// por una secuencia de Content Objects, posiblemente anidados en múltiples niveles. /// </summary> /// <param name="level">Nivel que estamos iterando</param> private void Extract(ContentScanner level) { if (level == null) { return; } while (level.MoveNext()) { ContentObject content = level.Current; if (content is Text) { //Guardamos los TextStrings con sus distintas propiedades ContentScanner.TextWrapper text = (ContentScanner.TextWrapper)level.CurrentWrapper; foreach (ContentScanner.TextStringWrapper textString in text.TextStrings) { _PdfTextStrings.Add(new PdfClownTextString( textString.Text, textString.Style.FillColor, textString.Style.StrokeColor, textString.Style.Font, textString.Style.FontSize) { Rectangle = textString.Box }); } } else if (content is XObject) { //Scanning the external level try { Extract(((XObject)content).GetScanner(level)); } catch (Exception e) { Console.WriteLine(e.StackTrace); Console.WriteLine("Soy la excepción de XObject"); } } else if (content is ContainerObject) { //Scanning the inner level try { Extract(level.ChildLevel); } catch (Exception e) { Console.WriteLine(e.StackTrace); Console.WriteLine("Soy la excepción de ContainerObject"); } } } }