예제 #1
0
        private void ParseSizes()
        {
            double glyphSpaceToTextSpace = 1000.0; // TODO: PdfFont.ParseSizes: SubType:Type3 Uses a FontMatrix that may not correspond to 1/1000th

            _widths = new Dictionary <char, double>();
            char     firstChar  = (char)_baseData.GetParamAsInt("FirstChar");
            char     lastChar   = (char)_baseData.GetParamAsInt("LastChar");
            PdfArray widths     = _baseData.Values["Widths"] as PdfArray;
            char     actualChar = firstChar;

            foreach (IPdfElement elem in widths.Values)
            {
                double width = PdfElementUtils.GetReal(elem, 500);
                if (width < 0.0001f && width > -0.0001f)
                {
                    width = 500;
                }
                _widths.Add(actualChar, width / glyphSpaceToTextSpace);
                actualChar++;
            }
            // FIMXE: Calculate real height
        }
예제 #2
0
 private void OpTextPutMultiple(PdfArray array)
 {
     if (inText == false)
     {
         return;
     }
     foreach (IPdfElement elem in array.Values)
     {
         if (elem is PdfString)
         {
             OpTextPut(((PdfString)elem).Value);
         }
         else if (elem is PdfInteger || elem is PdfReal)
         {
             double spacing = PdfElementUtils.GetReal(elem, 0);
             _textWidth -= (spacing / 1000) * _fontSize;
         }
         else if (elem is PdfArray)
         {
             OpTextPutMultiple(((PdfArray)elem));
         }
     }
 }
예제 #3
0
        private void ProcessPageContent()
        {
            int unknowCount = 0;
            int lineCount   = 0;
            int strokeCount = 0;
            int pathCount   = 0;

            for (int i = 0; i < _page.ContentActions.Count; i++)
            {
                PdfContentAction action = _page.ContentActions[i];

                // Special graphics state
                if (action.Token == "q")
                {
                    OpPushGraphState();
                }
                else if (action.Token == "Q")
                {
                    OpPopGraphState();
                }
                else if (action.Token == "cm")
                {
                    double a = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double b = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    double c = PdfElementUtils.GetReal(action.Parameters[2], 0);
                    double d = PdfElementUtils.GetReal(action.Parameters[3], 0);
                    double e = PdfElementUtils.GetReal(action.Parameters[4], 0);
                    double f = PdfElementUtils.GetReal(action.Parameters[5], 0);
                    OpSetGraphMatrix(a, b, c, d, e, f);
                }

                // Text Operations
                else if (action.Token == "BT")
                {
                    OpBeginText();
                }
                else if (action.Token == "ET")
                {
                    OpEndText();
                }
                else if (action.Token == "Tc")
                {
                    double charSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    OpTextCharSpacing(charSpacing);
                }
                else if (action.Token == "Tw")
                {
                    double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    OpTextWordSpacing(wordSpacing);
                }
                else if (action.Token == "Tz")
                {
                    // TODO: PdfTextExtractor: Horizontal Scale
                }
                else if (action.Token == "Tf")
                {
                    string fontName = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    OpTextFont(fontName, fontSize);
                }
                else if (action.Token == "TL")
                {
                    double leading = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    OpTextLeading(leading);
                }
                else if (action.Token == "Tr")
                {
                    // TODO: PdfTextExtractor: Rendering mode
                }
                else if (action.Token == "Ts")
                {
                    // TODO: PdfTextExtractor: Text rise
                }
                else if (action.Token == "Td")
                {
                    double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    OpTextDisplace(x, y);
                }
                else if (action.Token == "TD")
                {
                    double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    OpTextLeading(-y);
                    OpTextDisplace(x, y);
                }
                else if (action.Token == "Tm")
                {
                    double a = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double b = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    double c = PdfElementUtils.GetReal(action.Parameters[2], 0);
                    double d = PdfElementUtils.GetReal(action.Parameters[3], 0);
                    double e = PdfElementUtils.GetReal(action.Parameters[4], 0);
                    double f = PdfElementUtils.GetReal(action.Parameters[5], 0);
                    OpSetTextMatrix(a, b, c, d, e, f);
                }
                else if (action.Token == "T*")
                {
                    OpTextLineFeed();
                }
                else if (action.Token == "Tj")
                {
                    string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    OpTextPut(text);
                }
                else if (action.Token == "'")
                {
                    string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    OpTextLineFeed();
                    OpTextPut(text);
                }
                else if (action.Token == "\"")
                {
                    double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    string text        = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    OpTextCharSpacing(charSpacing);
                    OpTextWordSpacing(wordSpacing);
                    OpTextPut(text);
                }
                else if (action.Token == "TJ")
                {
                    OpTextPutMultiple(((PdfArray)action.Parameters[0]));
                }
                else if (action.Token == "re")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "f")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "g")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "rg")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "BI")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "ID")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "EI")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "W")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "n")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "Do")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "m")
                {
                    // TODO: PdfTextExtractor: Interpret this "moveto: Begin new subpath"
                }
                else if (action.Token == "l")
                {
                    // TODO: PdfTextExtractor: Interpret this "lineto: Append straight line segment to path"
                    lineCount++;
                }
                else if (action.Token == "h")
                {
                    // TODO: PdfTextExtractor: Interpret this "closepath: Close subpath"
                    pathCount++;
                }
                else if (action.Token == "W")
                {
                    // TODO: PdfTextExtractor: Interpret this "clip: Set clipping path using nonzero winding number rule"
                }
                else if (action.Token == "W*")
                {
                    // TODO: PdfTextExtractor: Interpret this "eoclip: Set clipping path using even-odd rule"
                }
                else if (action.Token == "w")
                {
                    // TODO: PdfTextExtractor: Interpret this "setlinewidth: Set line width"
                }
                else if (action.Token == "G")
                {
                    // TODO: PdfTextExtractor: Interpret this "setgray: Set gray level for stroking operations"
                }
                else if (action.Token == "S")
                {
                    // TODO: PdfTextExtractor: Interpret this "stroke: Stroke path"
                    strokeCount++;
                }
                else if (action.Token == "M")
                {
                    // TODO: PdfTextExtractor: Interpret this "setmiterlimit: Set miter limit"
                }
                else
                {
                    unknowCount++;
                }
            }
            FlushTextElement();
        }
예제 #4
0
        private void btnProcess_Click(object sender, EventArgs e)
        {
            if (System.IO.File.Exists(txtPdfPath.Text) == false)
            {
                MessageBox.Show("File does not exist");
                return;
            }

            PdfDocument doc = PdfDocument.Load(txtPdfPath.Text);

            int nObjects             = doc.Objects.Count;
            int nRootObject          = doc.Objects.Where(obj => obj.UsageCount == 0).Count();
            List <PdfStream> streams = doc.Objects
                                       .Where(obj => obj.Data.Type == PdfElementTypes.Stream)
                                       .Select(obj => (PdfStream)obj.Data)
                                       .ToList();
            int nStreams = streams.Count;
            int nPages   = doc.Pages.Count;

            List <string> lines = new List <string>();

            lines.Add(string.Format("Filename : {0}", System.IO.Path.GetFileNameWithoutExtension(txtPdfPath.Text)));
            lines.Add(string.Format("Number of Objects : {0}", nObjects));
            lines.Add(string.Format("Number of Roots   : {0}", nRootObject));
            lines.Add(string.Format("Number of Streams : {0}", nStreams));
            lines.Add(string.Format("Number of Pages   : {0}", nPages));

            int pageNumber = 1;

            foreach (PdfDocumentPage page in doc.Pages)
            {
                lines.Add("-----------------------------------------------------------------------------------------");
                if (page.BaseData.Values.ContainsKey("CropBox"))
                {
                    PdfArray cropBox = page.BaseData.Values["CropBox"] as PdfArray;
                    lines.Add(string.Format("Page({0} of {1}): {2} {3} {4} {5}", pageNumber, doc.Pages.Count,
                                            PdfElementUtils.GetReal(cropBox.Values[0], 0),
                                            PdfElementUtils.GetReal(cropBox.Values[1], 0),
                                            PdfElementUtils.GetReal(cropBox.Values[2], 0),
                                            PdfElementUtils.GetReal(cropBox.Values[3], 0)));
                }
                else
                {
                    lines.Add(string.Format("Page({0} of {1}): ", pageNumber, doc.Pages.Count));
                }
                pageNumber++;

                PdfTextExtractor extractor = new PdfTextExtractor(page);
                foreach (PdfTextElement textElement in extractor.Elements)
                {
                    string fontName = textElement.Font == null ? "#NULL#" : textElement.Font.Name;
                    if (fontName == "#NULL#" && textElement.Childs.Count > 0)
                    {
                        var           fontNames  = textElement.Childs.Select(c => c.Font == null ? "#NULL#" : c.Font.Name);
                        StringBuilder sbFontName = new StringBuilder();
                        foreach (string fontNameAux in fontNames)
                        {
                            if (sbFontName.Length > 0)
                            {
                                sbFontName.Append(";");
                            }
                            sbFontName.Append(fontNameAux);
                        }
                        fontName = sbFontName.ToString();
                    }

                    lines.Add(string.Format("Text({0}, {1})({2}, {3})[{4}]: \"{5}\"",
                                            Math.Round(textElement.Matrix.Matrix[0, 2], 2),
                                            Math.Round(textElement.Matrix.Matrix[1, 2], 2),
                                            Math.Round(textElement.VisibleWidth, 2),
                                            Math.Round(textElement.VisibleHeight, 2),
                                            fontName,
                                            textElement.VisibleText));
                }
            }

            txtOutput.Lines = lines.ToArray();
        }