public List <PdfContentAction> ParseContent() { List <PdfContentAction> actions = new List <PdfContentAction>(); List <IPdfElement> elems = new List <IPdfElement>(); do { SkipWhitespace(); IPdfElement elem = ParseElement(); if (elem != null) { elems.Add(elem); } else { string token = ParseToken(); if (string.IsNullOrEmpty(token)) { break; } PdfContentAction action = new PdfContentAction(token, elems); elems = new List <IPdfElement>(); actions.Add(action); if (action.Token == "ID") { // Embbed inline image byte lineFeed = 0x0A; byte carriageReturn = 0x0D; long distToObject = MeasureToMarkers(new char[][] { new char[] { (char)lineFeed, 'E', 'I' }, new char[] { (char)carriageReturn, (char)lineFeed, 'E', 'I' }, }); byte[] imageBody = GetRawData(distToObject); SkipEndOfLine(); string endToken = ParseToken(); action.Parameters.Add(new PdfStream { OriginalData = imageBody, }); } } } while (IsEndOfStream() == false); return(actions); }
private void ProcessPageContent() { int unknowCount = 0; int lineCount = 0; int strokeCount = 0; int pathCount = 0; for (int i = 0; i < _page.ContentActions.Count; i++) { PdfContentAction action = _page.ContentActions[i]; // Special graphics state if (action.Token == "q") { OpPushGraphState(); } else if (action.Token == "Q") { OpPopGraphState(); } else if (action.Token == "cm") { double a = PdfElementUtils.GetReal(action.Parameters[0], 0); double b = PdfElementUtils.GetReal(action.Parameters[1], 0); double c = PdfElementUtils.GetReal(action.Parameters[2], 0); double d = PdfElementUtils.GetReal(action.Parameters[3], 0); double e = PdfElementUtils.GetReal(action.Parameters[4], 0); double f = PdfElementUtils.GetReal(action.Parameters[5], 0); OpSetGraphMatrix(a, b, c, d, e, f); } // Text Operations else if (action.Token == "BT") { OpBeginText(); } else if (action.Token == "ET") { OpEndText(); } else if (action.Token == "Tc") { double charSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); OpTextCharSpacing(charSpacing); } else if (action.Token == "Tw") { double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); OpTextWordSpacing(wordSpacing); } else if (action.Token == "Tz") { // TODO: PdfTextExtractor: Horizontal Scale } else if (action.Token == "Tf") { string fontName = PdfElementUtils.GetString(action.Parameters[0], string.Empty); double fontSize = PdfElementUtils.GetReal(action.Parameters[1], 0); OpTextFont(fontName, fontSize); } else if (action.Token == "TL") { double leading = PdfElementUtils.GetReal(action.Parameters[0], 0); OpTextLeading(leading); } else if (action.Token == "Tr") { // TODO: PdfTextExtractor: Rendering mode } else if (action.Token == "Ts") { // TODO: PdfTextExtractor: Text rise } else if (action.Token == "Td") { double x = PdfElementUtils.GetReal(action.Parameters[0], 0); double y = PdfElementUtils.GetReal(action.Parameters[1], 0); OpTextDisplace(x, y); } else if (action.Token == "TD") { double x = PdfElementUtils.GetReal(action.Parameters[0], 0); double y = PdfElementUtils.GetReal(action.Parameters[1], 0); OpTextLeading(-y); OpTextDisplace(x, y); } else if (action.Token == "Tm") { double a = PdfElementUtils.GetReal(action.Parameters[0], 0); double b = PdfElementUtils.GetReal(action.Parameters[1], 0); double c = PdfElementUtils.GetReal(action.Parameters[2], 0); double d = PdfElementUtils.GetReal(action.Parameters[3], 0); double e = PdfElementUtils.GetReal(action.Parameters[4], 0); double f = PdfElementUtils.GetReal(action.Parameters[5], 0); OpSetTextMatrix(a, b, c, d, e, f); } else if (action.Token == "T*") { OpTextLineFeed(); } else if (action.Token == "Tj") { string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty); OpTextPut(text); } else if (action.Token == "'") { string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty); OpTextLineFeed(); OpTextPut(text); } else if (action.Token == "\"") { double wordSpacing = PdfElementUtils.GetReal(action.Parameters[0], 0); double charSpacing = PdfElementUtils.GetReal(action.Parameters[1], 0); string text = PdfElementUtils.GetString(action.Parameters[0], string.Empty); OpTextCharSpacing(charSpacing); OpTextWordSpacing(wordSpacing); OpTextPut(text); } else if (action.Token == "TJ") { OpTextPutMultiple(((PdfArray)action.Parameters[0])); } else if (action.Token == "re") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "f") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "g") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "rg") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "BI") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "ID") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "EI") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "W") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "n") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "Do") { // TODO: PdfTextExtractor: Interpret this } else if (action.Token == "m") { // TODO: PdfTextExtractor: Interpret this "moveto: Begin new subpath" } else if (action.Token == "l") { // TODO: PdfTextExtractor: Interpret this "lineto: Append straight line segment to path" lineCount++; } else if (action.Token == "h") { // TODO: PdfTextExtractor: Interpret this "closepath: Close subpath" pathCount++; } else if (action.Token == "W") { // TODO: PdfTextExtractor: Interpret this "clip: Set clipping path using nonzero winding number rule" } else if (action.Token == "W*") { // TODO: PdfTextExtractor: Interpret this "eoclip: Set clipping path using even-odd rule" } else if (action.Token == "w") { // TODO: PdfTextExtractor: Interpret this "setlinewidth: Set line width" } else if (action.Token == "G") { // TODO: PdfTextExtractor: Interpret this "setgray: Set gray level for stroking operations" } else if (action.Token == "S") { // TODO: PdfTextExtractor: Interpret this "stroke: Stroke path" strokeCount++; } else if (action.Token == "M") { // TODO: PdfTextExtractor: Interpret this "setmiterlimit: Set miter limit" } else { unknowCount++; } } FlushTextElement(); }