private static void ParseCid(String cmapName, AbstractCMap cmap, ICidLocation location, int level) { if (level >= MAXLEVEL) return; PRTokeniser inp = location.GetLocation(cmapName); try { List<PdfObject> list = new List<PdfObject>(); PdfContentParser cp = new PdfContentParser(inp); int maxExc = 50; while (true) { try { cp.Parse(list); } catch { if (--maxExc < 0) break; continue; } if (list.Count == 0) break; String last = list[list.Count - 1].ToString(); if (level == 0 && list.Count == 3 && last.Equals(DEF)) { PdfObject key = list[0]; if (PdfName.REGISTRY.Equals(key)) cmap.Registry = list[1].ToString(); else if (PdfName.ORDERING.Equals(key)) cmap.Ordering = list[1].ToString(); else if (CMAPNAME.Equals(key)) cmap.Name = list[1].ToString(); else if (PdfName.SUPPLEMENT.Equals(key)) { try { cmap.Supplement = ((PdfNumber)list[1]).IntValue; } catch {} } } else if ((last.Equals(ENDCIDCHAR) || last.Equals(ENDBFCHAR)) && list.Count >= 3) { int lmax = list.Count - 2; for (int k = 0; k < lmax; k += 2) { if (list[k] is PdfString) { cmap.AddChar((PdfString)list[k], list[k + 1]); } } } else if ((last.Equals(ENDCIDRANGE) || last.Equals(ENDBFRANGE)) && list.Count >= 4) { int lmax = list.Count - 3; for (int k = 0; k < lmax; k += 3) { if (list[k] is PdfString && list[k + 1] is PdfString) { cmap.AddRange((PdfString)list[k], (PdfString)list[k + 1], list[k + 2]); } } } else if (last.Equals(USECMAP) && list.Count == 2 && list[0] is PdfName) { ParseCid(PdfName.DecodeName(list[0].ToString()), cmap, location, level + 1); } } } finally { inp.Close(); } }
private void FillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(touni)))); PdfObject ob = null; bool notFound = true; int nestLevel = 0; int maxExc = 50; while ((notFound || nestLevel > 0)) { try { ob = ps.ReadPRObject(); } catch { if (--maxExc < 0) break; continue; } if (ob == null) break; if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("begin")) { notFound = false; nestLevel++; } else if (ob.ToString().Equals("end")) { nestLevel--; } else if (ob.ToString().Equals("beginbfchar")) { while (true) { PdfObject nx = ps.ReadPRObject(); if (nx.ToString().Equals("endbfchar")) break; String cid = DecodeString((PdfString)nx); String uni = DecodeString((PdfString)ps.ReadPRObject()); if (uni.Length == 1) { int cidc = (int)cid[0]; int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) w = widths[cidc]; metrics[unic] = new int[]{cidc, w}; } } } else if (ob.ToString().Equals("beginbfrange")) { while (true) { PdfObject nx = ps.ReadPRObject(); if (nx.ToString().Equals("endbfrange")) break; String cid1 = DecodeString((PdfString)nx); String cid2 = DecodeString((PdfString)ps.ReadPRObject()); int cid1c = (int)cid1[0]; int cid2c = (int)cid2[0]; PdfObject ob2 = ps.ReadPRObject(); if (ob2.IsString()) { String uni = DecodeString((PdfString)ob2); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; for (; cid1c <= cid2c; cid1c++, unic++) { int w = dw; if (widths.ContainsKey(cid1c)) w = widths[cid1c]; metrics[unic] = new int[]{cid1c, w}; } } } else { PdfArray a = (PdfArray)ob2; for (int j = 0; j < a.Size; ++j, ++cid1c) { String uni = DecodeString(a.GetAsString(j)); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1c)) w = widths[cid1c]; metrics[unic] = new int[]{cid1c, w}; } } } } } } } }
private void FillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PRTokeniser(touni)); PdfObject ob = null; PdfObject last = null; while ((ob = ps.ReadPRObject()) != null) { if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("beginbfchar")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid = DecodeString((PdfString)ps.ReadPRObject()); String uni = DecodeString((PdfString)ps.ReadPRObject()); if (uni.Length == 1) { int cidc = (int)cid[0]; int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) { w = widths[cidc]; } metrics[unic] = new int[] { cidc, w }; } } } else if (ob.ToString().Equals("beginbfrange")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid1 = DecodeString((PdfString)ps.ReadPRObject()); String cid2 = DecodeString((PdfString)ps.ReadPRObject()); int cid1c = (int)cid1[0]; int cid2c = (int)cid2[0]; PdfObject ob2 = ps.ReadPRObject(); if (ob2.IsString()) { String uni = DecodeString((PdfString)ob2); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; for (; cid1c <= cid2c; cid1c++, unic++) { int w = dw; if (widths.ContainsKey(cid1c)) { w = widths[cid1c]; } metrics[unic] = new int[] { cid1c, w }; } } } else { ArrayList ar = ((PdfArray)ob2).ArrayList; for (int j = 0; j < ar.Count; ++j, ++cid1c) { String uni = DecodeString((PdfString)ar[j]); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1c)) { w = widths[cid1c]; } metrics[unic] = new int[] { cid1c, w }; } } } } } } else { last = ob; } } }
/** * Parses the samples of the image from the underlying content parser, accounting for filters * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * <b>Note:</b>This implementation does not actually apply the filters at this time * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseInlineImageSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) { // by the time we get to here, we have already parsed the ID operator if (!imageDictionary.Contains(PdfName.FILTER)){ return ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps); } // read all content until we reach an EI operator surrounded by whitespace. // The following algorithm has two potential issues: what if the image stream // contains <ws>EI<ws> ? // Plus, there are some streams that don't have the <ws> before the EI operator // it sounds like we would have to actually decode the content stream, which // I'd rather avoid right now. MemoryStream baos = new MemoryStream(); MemoryStream accumulated = new MemoryStream(); int ch; int found = 0; PRTokeniser tokeniser = ps.GetTokeniser(); byte[] ff = null; while ((ch = tokeniser.Read()) != -1){ if (found == 0 && PRTokeniser.IsWhitespace(ch)){ found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && ch == 'E'){ found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && PRTokeniser.IsWhitespace(ch)){ // this clause is needed if we have a white space character that is part of the image data // followed by a whitespace character that precedes the EI operator. In this case, we need // to flush the first whitespace, then treat the current whitespace as the first potential // character for the end of stream check. Note that we don't increment 'found' here. baos.Write(ff = accumulated.ToArray(), 0, ff.Length); accumulated.SetLength(0); accumulated.WriteByte((byte)ch); } else if (found == 2 && ch == 'I'){ found++; accumulated.WriteByte((byte)ch); } else if (found == 3 && PRTokeniser.IsWhitespace(ch)){ return baos.ToArray(); } else { baos.Write(ff = accumulated.ToArray(), 0, ff.Length); accumulated.SetLength(0); baos.WriteByte((byte)ch); found = 0; } } throw new InlineImageParseException("Could not find image data or EI"); }
/** * Parses the samples of the image from the underlying content parser, ignoring all filters. * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * This is primarily useful if no filters have been applied. * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) { // special case: when no filter is specified, we just read the number of bits // per component, multiplied by the width and height. if (imageDictionary.Contains(PdfName.FILTER)) throw new ArgumentException("Dictionary contains filters"); PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT); int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue; byte[] bytes = new byte[bytesToRead]; PRTokeniser tokeniser = ps.GetTokeniser(); int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this) // from the PDF spec: Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data. // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it int startIndex = 0; if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0){ // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't) bytes[0] = (byte)shouldBeWhiteSpace; startIndex++; } for (int i = startIndex; i < bytesToRead; i++){ int ch = tokeniser.Read(); if (ch == -1) throw new InlineImageParseException("End of content stream reached before end of image data"); bytes[i] = (byte)ch; } PdfObject ei = ps.ReadPRObject(); if (!ei.ToString().Equals("EI")) throw new InlineImageParseException("EI not found after end of image data"); return bytes; }
/** * Parses the samples of the image from the underlying content parser, ignoring all filters. * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * This is primarily useful if no filters have been applied. * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfContentParser ps) { // special case: when no filter is specified, we just read the number of bits // per component, multiplied by the width and height. if (imageDictionary.Contains(PdfName.FILTER)) throw new ArgumentException("Dictionary contains filters"); PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT); int bytesToRead = ComputeBytesPerRow(imageDictionary) * h.IntValue; byte[] bytes = new byte[bytesToRead]; PRTokeniser tokeniser = ps.GetTokeniser(); tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this) for (int i = 0; i < bytesToRead; i++){ int ch = tokeniser.Read(); if (ch == -1) throw new InlineImageParseException("End of content stream reached before end of image data"); bytes[i] = (byte)ch; } PdfObject ei = ps.ReadPRObject(); if (!ei.ToString().Equals("EI")) throw new InlineImageParseException("EI not found after end of image data"); return bytes; }
/** * Parses the next inline image dictionary from the parser. The parser must be positioned immediately following the EI operator. * The parser will be left with position immediately following the whitespace character that follows the ID operator that ends the inline image dictionary. * @param ps the parser to extract the embedded image information from * @return the dictionary for the inline image, with any abbreviations converted to regular image dictionary keys and values * @throws IOException if the parse fails */ private static PdfDictionary ParseInlineImageDictionary(PdfContentParser ps) { // by the time we get to here, we have already parsed the BI operator PdfDictionary dictionary = new PdfDictionary(); for (PdfObject key = ps.ReadPRObject(); key != null && !"ID".Equals(key.ToString()); key = ps.ReadPRObject()){ PdfObject value = ps.ReadPRObject(); PdfName resolvedKey; inlineImageEntryAbbreviationMap.TryGetValue((PdfName)key, out resolvedKey); if (resolvedKey == null) resolvedKey = (PdfName)key; dictionary.Put(resolvedKey, GetAlternateValue(resolvedKey, value)); } int ch = ps.GetTokeniser().Read(); if (!PRTokeniser.IsWhitespace(ch)) throw new IOException("Unexpected character " + ch + " found after ID in inline image"); return dictionary; }
/** * Processes PDF syntax. * <b>Note:</b> If you re-use a given {@link PdfContentStreamProcessor}, you must call {@link PdfContentStreamProcessor#reset()} * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources){ this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(contentBytes))); PdfContentParser ps = new PdfContentParser(tokeniser); List<PdfObject> operands = new List<PdfObject>(); while (ps.Parse(operands).Count > 0){ PdfLiteral oper = (PdfLiteral)operands[operands.Count-1]; if ("BI".Equals(oper.ToString())){ // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent PdfDictionary colorSpaceDic = resources != null ? resources.GetAsDict(PdfName.COLORSPACE) : null; ImageRenderInfo renderInfo = ImageRenderInfo.CreateForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps, colorSpaceDic), colorSpaceDic); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
/** * Parses an inline image from the provided content parser. The parser must be positioned immediately following the BI operator in the content stream. * The parser will be left with current position immediately following the EI operator that terminates the inline image * @param ps the content parser to use for reading the image. * @return the parsed image * @throws IOException if anything goes wring with the parsing * @throws InlineImageParseException if parsing of the inline image failed due to issues specific to inline image processing */ public static PdfImageObject ParseInlineImage(PdfContentParser ps, PdfDictionary colorSpaceDic) { PdfDictionary inlineImageDictionary = ParseInlineImageDictionary(ps); byte[] samples = ParseInlineImageSamples(inlineImageDictionary, colorSpaceDic, ps); return new PdfImageObject(inlineImageDictionary, samples); }
private void FillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PRTokeniser(touni)); PdfObject ob = null; PdfObject last = null; while ((ob = ps.ReadPRObject()) != null) { if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("beginbfchar")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid = DecodeString((PdfString)ps.ReadPRObject()); String uni = DecodeString((PdfString)ps.ReadPRObject()); if (uni.Length == 1) { int cidc = (int)cid[0]; int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) w = widths[cidc]; metrics[unic] = new int[]{cidc, w}; } } } else if (ob.ToString().Equals("beginbfrange")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid1 = DecodeString((PdfString)ps.ReadPRObject()); String cid2 = DecodeString((PdfString)ps.ReadPRObject()); int cid1c = (int)cid1[0]; int cid2c = (int)cid2[0]; PdfObject ob2 = ps.ReadPRObject(); if (ob2.IsString()) { String uni = DecodeString((PdfString)ob2); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; for (; cid1c <= cid2c; cid1c++, unic++) { int w = dw; if (widths.ContainsKey(cid1c)) w = widths[cid1c]; metrics[unic] = new int[]{cid1c, w}; } } } else { ArrayList ar = ((PdfArray)ob2).ArrayList; for (int j = 0; j < ar.Count; ++j, ++cid1c) { String uni = DecodeString((PdfString)ar[j]); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1c)) w = widths[cid1c]; metrics[unic] = new int[]{cid1c, w}; } } } } } } else last = ob; } }
/// <summary> /// Parses a stream object and removes OCGs. </summary> /// <param name="stream"> a stream object </param> /// <param name="resources"> the resources dictionary of that object (containing info about the OCGs) </param> public virtual void Parse(PRStream stream, PdfDictionary resources) { baos = new MemoryStream(); properties = resources.GetAsDict(PdfName.PROPERTIES); xobj = new HashSet2<PdfName>(); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); if (xobjects != null) { // remove XObject (form or image) that belong to an OCG that needs to be removed foreach (PdfName name in xobjects.Keys) { PRStream xobject = (PRStream) xobjects.GetAsStream(name); PdfDictionary oc = xobject.GetAsDict(PdfName.OC); if (oc != null) { PdfString ocname = oc.GetAsString(PdfName.NAME); if (ocname != null && ocgs.Contains(ocname.ToString())) { xobj.Add(name); } } } foreach (PdfName name in xobj) { xobjects.Remove(name); } } // parse the content stream byte[] contentBytes = PdfReader.GetStreamBytes(stream); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes)); PdfContentParser ps = new PdfContentParser(tokeniser); List<PdfObject> operands = new List<PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral @operator = (PdfLiteral) operands[operands.Count - 1]; ProcessOperator(this, @operator, operands); } baos.Flush(); baos.Close(); stream.SetData(baos.GetBuffer()); }
private void FillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PRTokeniser(touni)); PdfObject ob = null; PdfObject last = null; bool notFound = true; int nestLevel = 0; while ((notFound || nestLevel > 0) && (ob = ps.ReadPRObject()) != null) { if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("begin")) { notFound = false; nestLevel++; } else if (ob.ToString().Equals("end")) { nestLevel--; } else if (ob.ToString().Equals("beginbfchar")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid = DecodeString((PdfString)ps.ReadPRObject()); String uni = DecodeString((PdfString)ps.ReadPRObject()); if (uni.Length == 1) { int cidc = (int)cid[0]; int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) { w = widths[cidc]; } metrics[unic] = new int[] { cidc, w }; } } } else if (ob.ToString().Equals("beginbfrange")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid1 = DecodeString((PdfString)ps.ReadPRObject()); String cid2 = DecodeString((PdfString)ps.ReadPRObject()); int cid1c = (int)cid1[0]; int cid2c = (int)cid2[0]; PdfObject ob2 = ps.ReadPRObject(); if (ob2.IsString()) { String uni = DecodeString((PdfString)ob2); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; for (; cid1c <= cid2c; cid1c++, unic++) { int w = dw; if (widths.ContainsKey(cid1c)) { w = widths[cid1c]; } metrics[unic] = new int[] { cid1c, w }; } } } else { PdfArray a = (PdfArray)ob2; for (int j = 0; j < a.Size; ++j, ++cid1c) { String uni = DecodeString(a.GetAsString(j)); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1c)) { w = widths[cid1c]; } metrics[unic] = new int[] { cid1c, w }; } } } } } } else { last = ob; } } }
/** * Processes PDF syntax * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources){ this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List<PdfObject> operands = new List<PdfObject>(); while (ps.Parse(operands).Count > 0){ PdfLiteral oper = (PdfLiteral)operands[operands.Count-1]; if ("BI".Equals(oper.ToString())){ // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent ImageRenderInfo renderInfo = ImageRenderInfo.CreatedForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps)); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
/** * Processes PDF syntax. * <b>Note:</b> If you re-use a given {@link PdfContentStreamProcessor}, you must call {@link PdfContentStreamProcessor#reset()} * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources){ this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List<iTextSharp.text.pdf.PdfObject> operands = new List<iTextSharp.text.pdf.PdfObject>(); while (ps.Parse(operands).Count > 0){ PdfLiteral oper = (PdfLiteral)operands[operands.Count-1]; // w.GetOperatorInfo(oper) //w.wr.Print("operator info {0} type {1} string {2}", oper.GetType().ToString(), oper.Type, oper.ToString()); if ("BI".Equals(oper.ToString())){ // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent PdfDictionary colorSpaceDic = resources != null ? resources.GetAsDict(PdfName.COLORSPACE) : null; // 'iTextSharp.text.pdf.parser.ImageRenderInfo.CreateForEmbeddedImage(iTextSharp.text.pdf.parser.Matrix, iTextSharp.text.pdf.parser.InlineImageInfo, iTextSharp.text.pdf.PdfDictionary)' is inaccessible due to its protection level ImageRenderInfo renderInfo = ImageRenderInfo.CreateForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps, colorSpaceDic), colorSpaceDic); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
private void FillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PRTokeniser(touni)); PdfObject ob = null; PdfObject last = null; bool notFound = true; int nestLevel = 0; while ((notFound || nestLevel > 0) && (ob = ps.ReadPRObject()) != null) { if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("begin")) { notFound = false; nestLevel++; } else if (ob.ToString().Equals("end")) { nestLevel--; } else if (ob.ToString().Equals("beginbfchar")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid = DecodeString((PdfString)ps.ReadPRObject()); String uni = DecodeString((PdfString)ps.ReadPRObject()); if (uni.Length == 1) { int cidc = (int)cid[0]; int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) w = widths[cidc]; metrics[unic] = new int[]{cidc, w}; } } } else if (ob.ToString().Equals("beginbfrange")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { String cid1 = DecodeString((PdfString)ps.ReadPRObject()); String cid2 = DecodeString((PdfString)ps.ReadPRObject()); int cid1c = (int)cid1[0]; int cid2c = (int)cid2[0]; PdfObject ob2 = ps.ReadPRObject(); if (ob2.IsString()) { String uni = DecodeString((PdfString)ob2); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; for (; cid1c <= cid2c; cid1c++, unic++) { int w = dw; if (widths.ContainsKey(cid1c)) w = widths[cid1c]; metrics[unic] = new int[]{cid1c, w}; } } } else { PdfArray a = (PdfArray)ob2; for (int j = 0; j < a.Size; ++j, ++cid1c) { String uni = DecodeString(a.GetAsString(j)); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1c)) w = widths[cid1c]; metrics[unic] = new int[]{cid1c, w}; } } } } } } else last = ob; } }
/** * Parses an inline image from the provided content parser. The parser must be positioned immediately following the BI operator in the content stream. * The parser will be left with current position immediately following the EI operator that terminates the inline image * @param ps the content parser to use for reading the image. * @return the parsed image * @throws IOException if anything goes wring with the parsing * @throws InlineImageParseException if parsing of the inline image failed due to issues specific to inline image processing */ public static InlineImageInfo ParseInlineImage(PdfContentParser ps, PdfDictionary colorSpaceDic) { PdfDictionary inlineImageDictionary = ParseInlineImageDictionary(ps); byte[] samples = ParseInlineImageSamples(inlineImageDictionary, colorSpaceDic, ps); return new InlineImageInfo(samples, inlineImageDictionary); }
private void FillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(touni)))); PdfObject ob = null; bool notFound = true; int nestLevel = 0; int maxExc = 50; while ((notFound || nestLevel > 0)) { try { ob = ps.ReadPRObject(); } catch { if (--maxExc < 0) { break; } continue; } if (ob == null) { break; } if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("begin")) { notFound = false; nestLevel++; } else if (ob.ToString().Equals("end")) { nestLevel--; } else if (ob.ToString().Equals("beginbfchar")) { while (true) { PdfObject nx = ps.ReadPRObject(); if (nx.ToString().Equals("endbfchar")) { break; } String cid = DecodeString((PdfString)nx); String uni = DecodeString((PdfString)ps.ReadPRObject()); if (uni.Length == 1) { int cidc = (int)cid[0]; int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) { w = widths[cidc]; } metrics[unic] = new int[] { cidc, w }; } } } else if (ob.ToString().Equals("beginbfrange")) { while (true) { PdfObject nx = ps.ReadPRObject(); if (nx.ToString().Equals("endbfrange")) { break; } String cid1 = DecodeString((PdfString)nx); String cid2 = DecodeString((PdfString)ps.ReadPRObject()); int cid1c = (int)cid1[0]; int cid2c = (int)cid2[0]; PdfObject ob2 = ps.ReadPRObject(); if (ob2.IsString()) { String uni = DecodeString((PdfString)ob2); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; for (; cid1c <= cid2c; cid1c++, unic++) { int w = dw; if (widths.ContainsKey(cid1c)) { w = widths[cid1c]; } metrics[unic] = new int[] { cid1c, w }; } } } else { PdfArray a = (PdfArray)ob2; for (int j = 0; j < a.Size; ++j, ++cid1c) { String uni = DecodeString(a.GetAsString(j)); if (uni.Length == 1) { int unic = (int)uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1c)) { w = widths[cid1c]; } metrics[unic] = new int[] { cid1c, w }; } } } } } } } }
private void fillMetrics(byte[] touni, IntHashtable widths, int dw) { PdfContentParser ps = new PdfContentParser(new PrTokeniser(touni)); PdfObject ob = null; PdfObject last = null; while ((ob = ps.ReadPrObject()) != null) { if (ob.Type == PdfContentParser.COMMAND_TYPE) { if (ob.ToString().Equals("beginbfchar")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { string cid = decodeString((PdfString)ps.ReadPrObject()); string uni = decodeString((PdfString)ps.ReadPrObject()); if (uni.Length == 1) { int cidc = cid[0]; int unic = uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cidc)) { w = widths[cidc]; } _metrics[unic] = new[] { cidc, w }; } } } else if (ob.ToString().Equals("beginbfrange")) { int n = ((PdfNumber)last).IntValue; for (int k = 0; k < n; ++k) { string cid1 = decodeString((PdfString)ps.ReadPrObject()); string cid2 = decodeString((PdfString)ps.ReadPrObject()); int cid1C = cid1[0]; int cid2C = cid2[0]; PdfObject ob2 = ps.ReadPrObject(); if (ob2.IsString()) { string uni = decodeString((PdfString)ob2); if (uni.Length == 1) { int unic = uni[uni.Length - 1]; for (; cid1C <= cid2C; cid1C++, unic++) { int w = dw; if (widths.ContainsKey(cid1C)) { w = widths[cid1C]; } _metrics[unic] = new[] { cid1C, w }; } } } else { PdfArray a = (PdfArray)ob2; for (int j = 0; j < a.Size; ++j, ++cid1C) { string uni = decodeString(a.GetAsString(j)); if (uni.Length == 1) { int unic = uni[uni.Length - 1]; int w = dw; if (widths.ContainsKey(cid1C)) { w = widths[cid1C]; } _metrics[unic] = new[] { cid1C, w }; } } } } } } else { last = ob; } } }