/// <seealso cref= "PdfOperator.Process(OCGParser, PdfLiteral, List{T})"> </seealso> virtual public void Process(OCGParser parser, PdfLiteral @operator, IList <PdfObject> operands) { if ("BDC".Equals(@operator.ToString()) && operands.Count > 1 && PdfName.OC.Equals(operands[0])) { parser.CheckMarkedContentStart((PdfName)operands[1]); } else if ("BMC".Equals(@operator.ToString())) { parser.CheckMarkedContentStart(null); } parser.Process(@operator, operands, true); if ("EMC".Equals(@operator.ToString())) { parser.CheckMarkedContentEnd(); } }
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral oper, List <PdfObject> operands) { var operatorString = oper.ToString(); if ("Tj".Equals(operatorString) || "TJ".Equals(operatorString)) { for (var i = 0; i < operands.Count; i++) { if (!operands[i].IsString()) { continue; } //remove zero bytes (used in allwinner watermarks) var text = Encoding.UTF8.GetString(operands[i].GetBytes().Where(b => b != 0).ToArray()); if (_containsMatch != null && !string.IsNullOrWhiteSpace(text) && text.Contains(_containsMatch)) { operands[i] = new PdfString(_replacePattern); } else if (_matcher?.IsMatch(text) ?? false) { operands[i] = new PdfString(_matcher.Replace(text, _replacePattern)); } } } base.Write(processor, oper, operands); }
/** * Processes PDF syntax. * <b>Note:</b> If you re-use a given {@link PdfContentStreamProcessor}, you must call {@link PdfContentStreamProcessor#reset()} * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources) { this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List <iTextSharp.text.pdf.PdfObject> operands = new List <iTextSharp.text.pdf.PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral oper = (PdfLiteral)operands[operands.Count - 1]; // w.GetOperatorInfo(oper) //w.wr.Print("operator info {0} type {1} string {2}", oper.GetType().ToString(), oper.Type, oper.ToString()); if ("BI".Equals(oper.ToString())) { // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent PdfDictionary colorSpaceDic = resources != null?resources.GetAsDict(PdfName.COLORSPACE) : null; // 'iTextSharp.text.pdf.parser.ImageRenderInfo.CreateForEmbeddedImage(iTextSharp.text.pdf.parser.Matrix, iTextSharp.text.pdf.parser.InlineImageInfo, iTextSharp.text.pdf.PdfDictionary)' is inaccessible due to its protection level ImageRenderInfo renderInfo = ImageRenderInfo.CreateForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps, colorSpaceDic), colorSpaceDic); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
/** * Processes PDF syntax. * <b>Note:</b> If you re-use a given {@link PdfContentStreamProcessor}, you must call {@link PdfContentStreamProcessor#reset()} * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources) { this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral oper = (PdfLiteral)operands[operands.Count - 1]; if ("BI".Equals(oper.ToString())) { // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent PdfDictionary colorSpaceDic = resources != null?resources.GetAsDict(PdfName.COLORSPACE) : null; ImageRenderInfo renderInfo = ImageRenderInfo.CreateForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps, colorSpaceDic), colorSpaceDic); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
public byte[] Modify(byte[] contentBytes, PdfDictionary resourcesDictionary) { _contentStreamBuilderStack.Push(new PdfContentStreamBuilder()); _resourceDictionaryStack.Push(resourcesDictionary); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes)); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral oper = (PdfLiteral)operands[operands.Count - 1]; //System.Diagnostics.Debug.WriteLine("[Debug] Opr: " + oper.ToString()); PdfContentOperatorHandler operHandler = null; if (_operators.TryGetValue(oper.ToString(), out operHandler)) { operands = operHandler(oper, operands); } _contentStreamBuilderStack.Peek().Push(operands); } _resourceDictionaryStack.Pop(); return(_contentStreamBuilderStack.Pop().GetBytes()); }
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral operatorLit, List <PdfObject> operands) { if (start) { initializeCapAndJoin(processor); start = false; } if (CAP_AND_JOIN_OPERATORS.Contains(operatorLit.ToString())) { return; } base.Write(processor, operatorLit, operands); if (GSTATE_OPERATOR == operatorLit.ToString()) { initializeCapAndJoin(processor); } }
public void Invoke(PdfContentStreamProcessor processor, PdfLiteral oper, List <PdfObject> operands) { if (_originalOperator != null && !"Do".Equals(oper.ToString())) { _originalOperator.Invoke(processor, oper, operands); } ((PdfContentStreamEditor)processor).Write(processor, oper, operands); }
/// <summary> /// Processes an operator. </summary> /// <param name="parser"> the parser that needs to process the operator </param> /// <param name="operator"> the operator </param> /// <param name="operands"> its operands </param> /// <exception cref="Exception"> </exception> protected internal static void ProcessOperator(OCGParser parser, PdfLiteral @operator, IList <PdfObject> operands) { PdfOperator op; if (!operators.TryGetValue(@operator.ToString(), out op) || op == null) { op = operators[DEFAULTOPERATOR]; } op.Process(parser, @operator, operands); }
// Operator actions /** * Processes an operator, for instance: write the operator and its operands to baos. * @param operator the operator * @param operands the operator's operands * @throws IOException * @throws DocumentException */ protected void ProcessOperator(PdfLiteral operatora, List <PdfObject> operands) { PdfOperator op; operators.TryGetValue(operatora.ToString(), out op); if (op == null) { operators.TryGetValue(DEFAULTOPERATOR, out op); } op.Process(this, operatora, operands); }
/** * Invokes an oper. * @param oper the PDF Syntax of the oper * @param operands a list with operands */ private void InvokeOperator(PdfLiteral oper, List <PdfObject> operands) { IContentOperator op; operators.TryGetValue(oper.ToString(), out op); if (op == null) { op = operators[DEFAULTOPERATOR]; } op.Invoke(this, oper, operands); }
/** * Invokes an oper. * @param oper the PDF Syntax of the oper * @param operands a list with operands */ private void InvokeOperator(PdfLiteral oper, List <iTextSharp.text.pdf.PdfObject> operands) { IContentOperator op; operators.TryGetValue(oper.ToString(), out op); if (op == null) { op = operators[DEFAULTOPERATOR]; } //else // if (w.log) w._tr.WriteLine("operator type {0} string {1}", op.GetType().ToString(), oper.ToString()); op.Invoke(this, oper, operands); }
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral operatorLit, List <PdfObject> operands) { if (TEXT_SHOWING_OPERATORS.Contains(operatorLit.ToString())) { Vector fontSizeVector = new Vector(0, Gs().FontSize, 0); Matrix textMatrix = (Matrix)textMatrixField.GetValue(this); Matrix curentTransformationMatrix = Gs().GetCtm(); Vector transformedVector = fontSizeVector.Cross(textMatrix).Cross(curentTransformationMatrix); float transformedFontSize = transformedVector.Length; if (transformedFontSize > 40) { return; } } base.Write(processor, operatorLit, operands); }
public void Invoke(PdfContentStreamProcessor processor, PdfLiteral oper, List <PdfObject> operands) { var proc = (PdfContentStreamEditor)processor; var isXObject = "Do" == oper.ToString(); if (isXObject) { if (proc.XObjectLevel == 0) { proc.IsWatermark = false; } proc.XObjectLevel++; } if (!isXObject && operands.Count == 2 && proc.XObjectLevel > 0 && proc.RemoveXObjects) { var op = operands[1].ToString(); if ((op == "Tj" || op == "TJ") && proc.Watermark.IsMatch(operands[0].ToString())) { proc.IsWatermark = true; proc.WatermarkCount++; } } OriginalOperator?.Invoke(processor, oper, operands); if (isXObject) { proc.XObjectLevel--; } if (isXObject && proc.IsWatermark && proc.RemoveXObjects) { return; } if (proc.XObjectLevel == 0) { ((PdfContentStreamEditor)processor).Write(processor, oper, operands); } }
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral oper, List <PdfObject> operands) { var operatorString = oper.ToString(); if ("Tj".Equals(operatorString) || "TJ".Equals(operatorString)) { for (var i = 0; i < operands.Count; i++) { if (!operands[i].IsString()) { continue; } var text = operands[i].ToString(); if (Regex.IsMatch(text, _matchPattern)) { operands[i] = new PdfString(Regex.Replace(text, _matchPattern, _replacePattern)); } } } base.Write(processor, oper, operands); }
public virtual void Invoke(PdfContentStreamProcessor pdfContentStreamProcessor, PdfLiteral oper, List <PdfObject> operands) { String operatorStr = oper.ToString(); PdfContentByte canvas = cleanUpStrategy.Context.Canvas; PRStream xFormStream = null; bool disableOutput = pathConstructionOperators.Contains(operatorStr) || pathPaintingOperators.Contains(operatorStr) || clippingPathOperators.Contains(operatorStr); GraphicsState gs = pdfContentStreamProcessor.Gs(); // key - number of a string in the TJ operator, value - number following the string; the first number without string (if it's presented) is stored under 0. // BE AWARE: zero-length strings are ignored!!! IDictionary <int, float> structuredTJoperands = null; if ("Do" == operatorStr) { if (operands.Count == 2 && operands[0].IsName()) { PdfDictionary xObjResources = cleanUpStrategy.Context.Resources.GetAsDict(PdfName.XOBJECT); if (xObjResources != null) { PdfStream xObj = xObjResources.GetAsStream((PdfName)operands[0]); if (xObj is PRStream && xObj.GetAsName(PdfName.SUBTYPE) != null && xObj.GetAsName(PdfName.SUBTYPE).CompareTo(PdfName.FORM) == 0) { xFormStream = (PRStream)xObj; cleanUpStrategy.RegisterNewContext(xObj.GetAsDict(PdfName.RESOURCES), null); } } } } originalContentOperator.Invoke(pdfContentStreamProcessor, oper, operands); IList <PdfCleanUpContentChunk> chunks = cleanUpStrategy.Chunks; if (xFormStream != null) { xFormStream.SetData(cleanUpStrategy.Context.Canvas.ToPdf(cleanUpStrategy.Context.Canvas.PdfWriter)); cleanUpStrategy.PopContext(); canvas = cleanUpStrategy.Context.Canvas; } if ("Do" == operatorStr) { if (chunks.Count > 0 && chunks[0] is PdfCleanUpContentChunk.Image) { PdfCleanUpContentChunk.Image chunk = (PdfCleanUpContentChunk.Image)chunks[0]; if (chunk.Visible) { PdfDictionary xObjResources = cleanUpStrategy.Context.Resources.GetAsDict(PdfName.XOBJECT); PRStream imageStream = (PRStream)xObjResources.GetAsStream((PdfName)operands[0]); UpdateImageStream(imageStream, chunk.NewImageData); } else { disableOutput = true; } } } else if (lineStyleOperators.Contains(operatorStr)) { disableOutput = true; } else if (textShowingOperators.Contains(operatorStr) && !AllChunksAreVisible(cleanUpStrategy.Chunks)) { disableOutput = true; if ("'" == operatorStr) { canvas.InternalBuffer.Append(TStar); } else if ("\"" == operatorStr) { operands[0].ToPdf(canvas.PdfWriter, canvas.InternalBuffer); canvas.InternalBuffer.Append(Tw); operands[1].ToPdf(canvas.PdfWriter, canvas.InternalBuffer); canvas.InternalBuffer.Append(TcTStar); } else if ("TJ" == operatorStr) { structuredTJoperands = StructureTJarray((PdfArray)operands[0]); } WriteTextChunks(structuredTJoperands, chunks, canvas, gs.CharacterSpacing, gs.WordSpacing, gs.FontSize, gs.HorizontalScaling); } else if (pathPaintingOperators.Contains(operatorStr)) { WritePath(operatorStr, canvas, gs.ColorSpaceStroke); } else if (strokeColorOperators.Contains(operatorStr)) { // Replace current color with the new one. cleanUpStrategy.Context.PopStrokeColor(); cleanUpStrategy.Context.PushStrokeColor(operands); } else if ("q" == operatorStr) { cleanUpStrategy.Context.PushStrokeColor(cleanUpStrategy.Context.PeekStrokeColor()); } else if ("Q" == operatorStr) { cleanUpStrategy.Context.PopStrokeColor(); } if (!disableOutput) { WriteOperands(canvas, operands); } cleanUpStrategy.ClearChunks(); }
public void Invoke(PdfContentStreamProcessor pdfContentStreamProcessor, PdfLiteral @operator, List <PdfObject> operands) { String operatorStr = @operator.ToString(); PdfContentByte canvas = cleanUpStrategy.Context.Canvas; PRStream xFormStream = null; // key - number of a string in the TJ operator, value - number following the string; the first number without string (if it's presented) is stored under 0. // BE AWARE: zero-length strings are ignored!!! IDictionary <int, float> structuredTJoperands = null; if ("Do" == operatorStr) { if (operands.Count == 2 && operands[0].IsName()) { PdfDictionary xObjResources = cleanUpStrategy.Context.Resources.GetAsDict(PdfName.XOBJECT); if (xObjResources != null) { PdfStream xObj = xObjResources.GetAsStream((PdfName)operands[0]); if (xObj is PRStream && xObj.GetAsName(PdfName.SUBTYPE) != null && xObj.GetAsName(PdfName.SUBTYPE).CompareTo(PdfName.FORM) == 0) { xFormStream = (PRStream)xObj; cleanUpStrategy.RegisterNewContext(xObj.GetAsDict(PdfName.RESOURCES), null); } } } } originalContentOperator.Invoke(pdfContentStreamProcessor, @operator, operands); IList <PdfCleanUpContentChunk> chunks = cleanUpStrategy.Chunks; bool disableOutput = false; if (xFormStream != null) { xFormStream.SetData(cleanUpStrategy.Context.Canvas.ToPdf(cleanUpStrategy.Context.Canvas.PdfWriter)); cleanUpStrategy.PopContext(); canvas = cleanUpStrategy.Context.Canvas; } if ("Do" == operatorStr) { if (chunks.Count > 0 && chunks[0].IsImage()) { PdfCleanUpContentChunk chunk = chunks[0]; if (chunk.IsVisible()) { PdfDictionary xObjResources = cleanUpStrategy.Context.Resources.GetAsDict(PdfName.XOBJECT); PRStream imageStream = (PRStream)xObjResources.GetAsStream((PdfName)operands[0]); UpdateImage(imageStream, chunk.NewImageData); } else { disableOutput = true; } } } else if ("q" == operatorStr) { cleanUpStrategy.Context.SaveGraphicsState(); } else if ("Q" == operatorStr) { cleanUpStrategy.Context.RestoreGraphicsState(); } else if ("Tf" == operatorStr) { cleanUpStrategy.Context.FontSize = ((PdfNumber)operands[1]).FloatValue; } else if ("Tc" == operatorStr) { cleanUpStrategy.Context.CharacterSpacing = ((PdfNumber)operands[0]).FloatValue; } else if ("Tw" == operatorStr) { cleanUpStrategy.Context.WordSpacing = ((PdfNumber)operands[0]).FloatValue; } else if ("Tz" == operatorStr) { cleanUpStrategy.Context.HorizontalScaling = ((PdfNumber)operands[0]).FloatValue; } else if (textShowingOperators.Contains(operatorStr) && !AllChunksAreVisible(cleanUpStrategy.Chunks)) { disableOutput = true; if ("'" == operatorStr) { canvas.InternalBuffer.Append(TStar); } else if ("\"" == operatorStr) { operands[0].ToPdf(canvas.PdfWriter, canvas.InternalBuffer); canvas.InternalBuffer.Append(Tw); operands[1].ToPdf(canvas.PdfWriter, canvas.InternalBuffer); canvas.InternalBuffer.Append(TcTStar); cleanUpStrategy.Context.CharacterSpacing = ((PdfNumber)operands[1]).FloatValue; } else if ("TJ" == operatorStr) { structuredTJoperands = StructureTJarray((PdfArray)operands[0]); } RenderChunks(structuredTJoperands, chunks, canvas); } else if ("\"" == operatorStr) { cleanUpStrategy.Context.CharacterSpacing = ((PdfNumber)operands[1]).FloatValue; } if (!disableOutput) { int index = 0; foreach (PdfObject o in operands) { ToPdf(o, canvas.PdfWriter, canvas.InternalBuffer); canvas.InternalBuffer.Append(operands.Count > ++index ? (byte)' ' : (byte)'\n'); } } cleanUpStrategy.ClearChunks(); }
/// <summary> /// Parses a stream object and removes OCGs. </summary> /// <param name="stream"> a stream object </param> /// <param name="resources"> the resources dictionary of that object (containing info about the OCGs) </param> public virtual void Parse(PRStream stream, PdfDictionary resources) { baos = new MemoryStream(); properties = resources.GetAsDict(PdfName.PROPERTIES); xobj = new HashSet2 <PdfName>(); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); if (xobjects != null) { // remove XObject (form or image) that belong to an OCG that needs to be removed foreach (PdfName name in xobjects.Keys) { PRStream xobject = (PRStream)xobjects.GetAsStream(name); PdfDictionary oc = xobject.GetAsDict(PdfName.OC); if (oc != null) { PdfString ocname = oc.GetAsString(PdfName.NAME); if (ocname != null && ocgs.Contains(ocname.ToString())) { xobj.Add(name); } } } foreach (PdfName name in xobj) { xobjects.Remove(name); } } // parse the content stream byte[] contentBytes = PdfReader.GetStreamBytes(stream); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes)); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral @operator = (PdfLiteral)operands[operands.Count - 1]; ProcessOperator(this, @operator, operands); if ("BI".Equals(@operator.ToString())) { int found = 0; int ch; bool immediateAfterBI = true; while ((ch = tokeniser.Read()) != -1) { if (!immediateAfterBI || !PRTokeniser.IsWhitespace(ch)) { baos.WriteByte((byte)ch); } immediateAfterBI = false; if (found == 0 && PRTokeniser.IsWhitespace(ch)) { found++; } else if (found == 1 && ch == 'E') { found++; } else if (found == 1 && PRTokeniser.IsWhitespace(ch)) { // this clause is needed if we have a white space character that is part of the image data // followed by a whitespace character that precedes the EI operator. In this case, we need // to flush the first whitespace, then treat the current whitespace as the first potential // character for the end of stream check. Note that we don't increment 'found' here. } else if (found == 2 && ch == 'I') { found++; } else if (found == 3 && PRTokeniser.IsWhitespace(ch)) { break; } else { found = 0; } } } } baos.Flush(); baos.Close(); stream.SetData(baos.GetBuffer()); }
/** * Invokes an oper. * @param oper the PDF Syntax of the oper * @param operands a list with operands */ private void InvokeOperator(PdfLiteral oper, List<PdfObject> operands) { IContentOperator op; operators.TryGetValue(oper.ToString(), out op); if (op == null) op = operators[DEFAULTOPERATOR]; op.Invoke(this, oper, operands); }
// Operator actions /** * Processes an operator, for instance: write the operator and its operands to baos. * @param operator the operator * @param operands the operator's operands * @throws IOException * @throws DocumentException */ virtual protected void ProcessOperator(PdfLiteral operatora, List<PdfObject> operands) { PdfOperator op; operators.TryGetValue(operatora.ToString(), out op); if (op == null) operators.TryGetValue(DEFAULTOPERATOR, out op); op.Process(this, operatora, operands); }
/** * Processes PDF syntax * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources) { this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral oper = (PdfLiteral)operands[operands.Count - 1]; // special handling for embedded images. If we hit an ID oper, we need // to skip all content until we reach an EI oper surrounded by whitespace. // The following algorithm has one potential issue: what if the image stream // contains <ws>EI<ws> ? // it sounds like we would have to actually decode the content stream, which // I'd rather avoid right now. if ("ID".Equals(oper.ToString())) { MemoryStream baos = new MemoryStream(); MemoryStream accumulated = new MemoryStream(); int ch; int found = 0; while ((ch = tokeniser.Read()) != -1) { if (found == 0 && PRTokeniser.IsWhitespace(ch)) { found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && ch == 'E') { found++; accumulated.WriteByte((byte)ch); } else if (found == 2 && ch == 'I') { found++; accumulated.WriteByte((byte)ch); } else if (found == 3 && PRTokeniser.IsWhitespace(ch)) { operands = new List <PdfObject>(); operands.Add(new PdfLiteral("ID")); InvokeOperator((PdfLiteral)operands[operands.Count - 1], operands); // we should probably eventually do something to make the accumulated image content stream available operands = new List <PdfObject>(); operands.Add(new PdfLiteral("EI")); InvokeOperator((PdfLiteral)operands[operands.Count - 1], operands); break; } else { accumulated.WriteTo(baos); accumulated.SetLength(0); baos.WriteByte((byte)ch); found = 0; } } } InvokeOperator(oper, operands); } this.resources.Pop(); }