private void ParseSizes() { double glyphSpaceToTextSpace = 1000.0; // TODO: PdfFont.ParseSizes: SubType:Type3 Uses a FontMatrix that may not correspond to 1/1000th _widths = new Dictionary <char, double>(); char firstChar = (char)_baseData.GetParamAsInt("FirstChar"); char lastChar = (char)_baseData.GetParamAsInt("LastChar"); PdfArray widths = _baseData.Values["Widths"] as PdfArray; char actualChar = firstChar; foreach (IPdfElement elem in widths.Values) { double width = PdfElementUtils.GetReal(elem, 500); if (width < 0.0001f && width > -0.0001f) { width = 500; } _widths.Add(actualChar, width / glyphSpaceToTextSpace); actualChar++; } // FIMXE: Calculate real height }
private void OpTextPutMultiple(PdfArray array) { if (inText == false) { return; } foreach (IPdfElement elem in array.Values) { if (elem is PdfString) { OpTextPut(((PdfString)elem).Value); } else if (elem is PdfInteger || elem is PdfReal) { double spacing = PdfElementUtils.GetReal(elem, 0); _textWidth -= (spacing / 1000) * _fontSize; } else if (elem is PdfArray) { OpTextPutMultiple(((PdfArray)elem)); } } }
private void ProcessPageContent() { int unknowCount = 0; int lineCount = 0; int strokeCount = 0; int pathCount = 0; for (int i = 0; i < _page.ContentActions.Count; i++) { PdfContentAction action = _page.ContentActions[i]; // Special graphics state if (action.Token == "q") { OpPushGraphState(); } else if (action.Token == "Q") { OpPopGraphState(); } else if (action.Token == "cm") { double a = PdfElementUtils.GetReal(action.Parameters[0], 0); double b = PdfElementUtils.GetReal(action.Parameters[1], 0); double c = PdfElementUtils.GetReal(action.Parameters[2], 0); double d = PdfElementUtils.GetReal(action.Parameters[3], 0); double e = PdfElementUtils.GetReal(action.Parameters[4], 0); double f = PdfElementUtils.GetReal(action.Parameters[5], 0); OpSetGraphMatrix(a, b, c, d, e, f); } // Text Operations else if (action.Token == "BT") { OpBeginText(); } else if (action.Token == "ET") { OpEndText(); } else if (action.Token == "Tc") { double charSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); OpTextCharSpacing(charSpacing); } else if (action.Token == "Tw") { double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); OpTextWordSpacing(wordSpacing); } else if (action.Token == "Tz") { // TODO: PdfTextExtractor: Horizontal Scale } else if (action.Token == "Tf") { string fontName = PdfElementUtils.GetString(action.Parameters[0], string.Empty); double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0); OpTextFont(fontName, fontSize); } else if (action.Token == "TL") { double leading = PdfElementUtils.GetReal(action.Parameters[0], 0); OpTextLeading(leading); } else if (action.Token == "Tr") { // TODO: PdfTextExtractor: Rendering mode } else if (action.Token == "Ts") { // TODO: PdfTextExtractor: Text rise } else if (action.Token == "Td") { double x = PdfElementUtils.GetReal(action.Parameters[0], 0); double y = PdfElementUtils.GetReal(action.Parameters[1], 0); OpTextDisplace(x, y); } else if (action.Token == "TD") { double x = PdfElementUtils.GetReal(action.Parameters[0], 0); double y = PdfElementUtils.GetReal(action.Parameters[1], 0); OpTextLeading(-y); OpTextDisplace(x, y); } else if (action.Token == "Tm") { double a = PdfElementUtils.GetReal(action.Parameters[0], 0); double b = PdfElementUtils.GetReal(action.Parameters[1], 0); double c = PdfElementUtils.GetReal(action.Parameters[2], 0); double d = PdfElementUtils.GetReal(action.Parameters[3], 0); double e = PdfElementUtils.GetReal(action.Parameters[4], 0); double f = PdfElementUtils.GetReal(action.Parameters[5], 0); OpSetTextMatrix(a, b, c, d, e, f); } else if (action.Token == "T*") { OpTextLineFeed(); } else if (action.Token == "Tj") { string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty); OpTextPut(text); } else if (action.Token == "'") { string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty); OpTextLineFeed(); OpTextPut(text); } else if (action.Token == "\"") { double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0); string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty); OpTextCharSpacing(charSpacing); OpTextWordSpacing(wordSpacing); OpTextPut(text); } else if (action.Token == "TJ") { OpTextPutMultiple(((PdfArray)action.Parameters[0])); } else if (action.Token == "re") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "f") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "g") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "rg") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "BI") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "ID") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "EI") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "W") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "n") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "Do") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "m") { // TODO: PdfTextExtractor: Interpret this "moveto: Begin new subpath" } else if (action.Token == "l") { // TODO: PdfTextExtractor: Interpret this "lineto: Append straight line segment to path" lineCount++; } else if (action.Token == "h") { // TODO: PdfTextExtractor: Interpret this "closepath: Close subpath" pathCount++; } else if (action.Token == "W") { // TODO: PdfTextExtractor: Interpret this "clip: Set clipping path using nonzero winding number rule" } else if (action.Token == "W*") { // TODO: PdfTextExtractor: Interpret this "eoclip: Set clipping path using even-odd rule" } else if (action.Token == "w") { // TODO: PdfTextExtractor: Interpret this "setlinewidth: Set line width" } else if (action.Token == "G") { // TODO: PdfTextExtractor: Interpret this "setgray: Set gray level for stroking operations" } else if (action.Token == "S") { // TODO: PdfTextExtractor: Interpret this "stroke: Stroke path" strokeCount++; } else if (action.Token == "M") { // TODO: PdfTextExtractor: Interpret this "setmiterlimit: Set miter limit" } else { unknowCount++; } } FlushTextElement(); }
private void btnProcess_Click(object sender, EventArgs e) { if (System.IO.File.Exists(txtPdfPath.Text) == false) { MessageBox.Show("File does not exist"); return; } PdfDocument doc = PdfDocument.Load(txtPdfPath.Text); int nObjects = doc.Objects.Count; int nRootObject = doc.Objects.Where(obj => obj.UsageCount == 0).Count(); List <PdfStream> streams = doc.Objects .Where(obj => obj.Data.Type == PdfElementTypes.Stream) .Select(obj => (PdfStream)obj.Data) .ToList(); int nStreams = streams.Count; int nPages = doc.Pages.Count; List <string> lines = new List <string>(); lines.Add(string.Format("Filename : {0}", System.IO.Path.GetFileNameWithoutExtension(txtPdfPath.Text))); lines.Add(string.Format("Number of Objects : {0}", nObjects)); lines.Add(string.Format("Number of Roots : {0}", nRootObject)); lines.Add(string.Format("Number of Streams : {0}", nStreams)); lines.Add(string.Format("Number of Pages : {0}", nPages)); int pageNumber = 1; foreach (PdfDocumentPage page in doc.Pages) { lines.Add("-----------------------------------------------------------------------------------------"); if (page.BaseData.Values.ContainsKey("CropBox")) { PdfArray cropBox = page.BaseData.Values["CropBox"] as PdfArray; lines.Add(string.Format("Page({0} of {1}): {2} {3} {4} {5}", pageNumber, doc.Pages.Count, PdfElementUtils.GetReal(cropBox.Values[0], 0), PdfElementUtils.GetReal(cropBox.Values[1], 0), PdfElementUtils.GetReal(cropBox.Values[2], 0), PdfElementUtils.GetReal(cropBox.Values[3], 0))); } else { lines.Add(string.Format("Page({0} of {1}): ", pageNumber, doc.Pages.Count)); } pageNumber++; PdfTextExtractor extractor = new PdfTextExtractor(page); foreach (PdfTextElement textElement in extractor.Elements) { string fontName = textElement.Font == null ? "#NULL#" : textElement.Font.Name; if (fontName == "#NULL#" && textElement.Childs.Count > 0) { var fontNames = textElement.Childs.Select(c => c.Font == null ? "#NULL#" : c.Font.Name); StringBuilder sbFontName = new StringBuilder(); foreach (string fontNameAux in fontNames) { if (sbFontName.Length > 0) { sbFontName.Append(";"); } sbFontName.Append(fontNameAux); } fontName = sbFontName.ToString(); } lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"", Math.Round(textElement.Matrix.Matrix[0, 2], 2), Math.Round(textElement.Matrix.Matrix[1, 2], 2), Math.Round(textElement.VisibleWidth, 2), Math.Round(textElement.VisibleHeight, 2), fontName, textElement.VisibleText)); } } txtOutput.Lines = lines.ToArray(); }