Example #1
0
        public List <PdfContentAction> ParseContent()
        {
            List <PdfContentAction> actions = new List <PdfContentAction>();
            List <IPdfElement>      elems   = new List <IPdfElement>();

            do
            {
                SkipWhitespace();
                IPdfElement elem = ParseElement();
                if (elem != null)
                {
                    elems.Add(elem);
                }
                else
                {
                    string token = ParseToken();
                    if (string.IsNullOrEmpty(token))
                    {
                        break;
                    }
                    PdfContentAction action = new PdfContentAction(token, elems);
                    elems = new List <IPdfElement>();
                    actions.Add(action);
                    if (action.Token == "ID")
                    {
                        // Embbed inline image
                        byte lineFeed       = 0x0A;
                        byte carriageReturn = 0x0D;
                        long distToObject   = MeasureToMarkers(new char[][] {
                            new char[] { (char)lineFeed, 'E', 'I' },
                            new char[] { (char)carriageReturn, (char)lineFeed, 'E', 'I' },
                        });
                        byte[] imageBody = GetRawData(distToObject);
                        SkipEndOfLine();
                        string endToken = ParseToken();
                        action.Parameters.Add(new PdfStream {
                            OriginalData = imageBody,
                        });
                    }
                }
            } while (IsEndOfStream() == false);
            return(actions);
        }
Example #2
0
        private void ProcessPageContent()
        {
            int unknowCount = 0;
            int lineCount   = 0;
            int strokeCount = 0;
            int pathCount   = 0;

            for (int i = 0; i < _page.ContentActions.Count; i++)
            {
                PdfContentAction action = _page.ContentActions[i];

                // Special graphics state
                if (action.Token == "q")
                {
                    OpPushGraphState();
                }
                else if (action.Token == "Q")
                {
                    OpPopGraphState();
                }
                else if (action.Token == "cm")
                {
                    double a = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double b = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    double c = PdfElementUtils.GetReal(action.Parameters[2], 0);
                    double d = PdfElementUtils.GetReal(action.Parameters[3], 0);
                    double e = PdfElementUtils.GetReal(action.Parameters[4], 0);
                    double f = PdfElementUtils.GetReal(action.Parameters[5], 0);
                    OpSetGraphMatrix(a, b, c, d, e, f);
                }

                // Text Operations
                else if (action.Token == "BT")
                {
                    OpBeginText();
                }
                else if (action.Token == "ET")
                {
                    OpEndText();
                }
                else if (action.Token == "Tc")
                {
                    double charSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    OpTextCharSpacing(charSpacing);
                }
                else if (action.Token == "Tw")
                {
                    double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    OpTextWordSpacing(wordSpacing);
                }
                else if (action.Token == "Tz")
                {
                    // TODO: PdfTextExtractor: Horizontal Scale
                }
                else if (action.Token == "Tf")
                {
                    string fontName = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    OpTextFont(fontName, fontSize);
                }
                else if (action.Token == "TL")
                {
                    double leading = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    OpTextLeading(leading);
                }
                else if (action.Token == "Tr")
                {
                    // TODO: PdfTextExtractor: Rendering mode
                }
                else if (action.Token == "Ts")
                {
                    // TODO: PdfTextExtractor: Text rise
                }
                else if (action.Token == "Td")
                {
                    double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    OpTextDisplace(x, y);
                }
                else if (action.Token == "TD")
                {
                    double x = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double y = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    OpTextLeading(-y);
                    OpTextDisplace(x, y);
                }
                else if (action.Token == "Tm")
                {
                    double a = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double b = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    double c = PdfElementUtils.GetReal(action.Parameters[2], 0);
                    double d = PdfElementUtils.GetReal(action.Parameters[3], 0);
                    double e = PdfElementUtils.GetReal(action.Parameters[4], 0);
                    double f = PdfElementUtils.GetReal(action.Parameters[5], 0);
                    OpSetTextMatrix(a, b, c, d, e, f);
                }
                else if (action.Token == "T*")
                {
                    OpTextLineFeed();
                }
                else if (action.Token == "Tj")
                {
                    string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    OpTextPut(text);
                }
                else if (action.Token == "'")
                {
                    string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    OpTextLineFeed();
                    OpTextPut(text);
                }
                else if (action.Token == "\"")
                {
                    double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0);
                    double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0);
                    string text        = PdfElementUtils.GetString(action.Parameters[0], string.Empty);
                    OpTextCharSpacing(charSpacing);
                    OpTextWordSpacing(wordSpacing);
                    OpTextPut(text);
                }
                else if (action.Token == "TJ")
                {
                    OpTextPutMultiple(((PdfArray)action.Parameters[0]));
                }
                else if (action.Token == "re")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "f")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "g")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "rg")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "BI")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "ID")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "EI")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "W")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "n")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "Do")
                {
                    // TODO: PdfTextExtractor: Interpret this
                }
                else if (action.Token == "m")
                {
                    // TODO: PdfTextExtractor: Interpret this "moveto: Begin new subpath"
                }
                else if (action.Token == "l")
                {
                    // TODO: PdfTextExtractor: Interpret this "lineto: Append straight line segment to path"
                    lineCount++;
                }
                else if (action.Token == "h")
                {
                    // TODO: PdfTextExtractor: Interpret this "closepath: Close subpath"
                    pathCount++;
                }
                else if (action.Token == "W")
                {
                    // TODO: PdfTextExtractor: Interpret this "clip: Set clipping path using nonzero winding number rule"
                }
                else if (action.Token == "W*")
                {
                    // TODO: PdfTextExtractor: Interpret this "eoclip: Set clipping path using even-odd rule"
                }
                else if (action.Token == "w")
                {
                    // TODO: PdfTextExtractor: Interpret this "setlinewidth: Set line width"
                }
                else if (action.Token == "G")
                {
                    // TODO: PdfTextExtractor: Interpret this "setgray: Set gray level for stroking operations"
                }
                else if (action.Token == "S")
                {
                    // TODO: PdfTextExtractor: Interpret this "stroke: Stroke path"
                    strokeCount++;
                }
                else if (action.Token == "M")
                {
                    // TODO: PdfTextExtractor: Interpret this "setmiterlimit: Set miter limit"
                }
                else
                {
                    unknowCount++;
                }
            }
            FlushTextElement();
        }