public static Info ReadDocInfo(string fileName) { Info result = new Info(); try { PDDocument pDoc = PDDocument.load(fileName); PDDocumentInformation docInfo = pDoc.getDocumentInformation(); if (docInfo != null) { var author = docInfo.getAuthor(); var title = docInfo.getTitle(); var summary = docInfo.getSubject(); var keywords = docInfo.getKeywords(); result.Author = author; result.Title = title; result.Summary = summary; result.Keywords = keywords; } } catch (Exception ex) { } return(result); }
internal PDFDocumentInformation GetDocumentInformation(PDDocument pdfDocument) { PDDocumentInformation documentInformation = pdfDocument.getDocumentInformation(); PDFDocumentInformation pDFDocumentInformation = new PDFDocumentInformation() { Author = documentInformation.getAuthor() }; documentInformation.getCreationDate(); pDFDocumentInformation.CreationDate = this.ConvertJavaDateToCSharp(documentInformation.getCreationDate()); pDFDocumentInformation.Creator = documentInformation.getCreator(); pDFDocumentInformation.Keywords = documentInformation.getKeywords(); pDFDocumentInformation.ModifiedDate = this.ConvertJavaDateToCSharp(documentInformation.getModificationDate()); pDFDocumentInformation.Producer = documentInformation.getProducer(); pDFDocumentInformation.Subject = documentInformation.getSubject(); pDFDocumentInformation.Title = documentInformation.getTitle(); pDFDocumentInformation.Trapped = documentInformation.getTrapped(); return(pDFDocumentInformation); }
private void DoMetadata() { java.util.TimeZone timeZone = java.util.TimeZone.getTimeZone("GMT"); Calendar instance = Calendar.getInstance(); instance.setTimeZone(timeZone); PDDocumentInformation documentInformation = this.doc.getDocumentInformation(); documentInformation.setModificationDate(instance); if (documentInformation.getAuthor() == null) { documentInformation.setAuthor("Aquaforest"); } if ((string.IsNullOrEmpty(documentInformation.getProducer()) ? true : documentInformation.getProducer() == " ")) { documentInformation.setProducer("Aquaforest PDFA - http://www.aquaforest.com"); } if (documentInformation.getKeywords() == null) { documentInformation.setKeywords(""); } documentInformation.getCreationDate(); instance.setTime(this.GetDate(documentInformation.getCreationDate())); documentInformation.setCreationDate(instance); this.doc.setDocumentInformation(documentInformation); this.doc.save(this.tempFileName); documentInformation = this.doc.getDocumentInformation(); PDDocumentCatalog documentCatalog = this.doc.getDocumentCatalog(); XMPMetadata xMPMetadatum = XMPMetadata.createXMPMetadata(); if (this.ConformanceLevel.Trim().ToLower() == "a") { PDMarkInfo pDMarkInfo = new PDMarkInfo(); pDMarkInfo.setMarked(true); documentCatalog.setMarkInfo(pDMarkInfo); } PDFAIdentificationSchema pDFAIdentificationSchema = xMPMetadatum.createAndAddPFAIdentificationSchema(); pDFAIdentificationSchema.setConformance(this.ConformanceLevel); pDFAIdentificationSchema.setPart(new Integer(this.pdfaversion)); DublinCoreSchema dublinCoreSchema = xMPMetadatum.createAndAddDublinCoreSchema(); string title = documentInformation.getTitle(); if (title != null) { dublinCoreSchema.setTitle(title); } title = documentInformation.getSubject(); if (title != null) { dublinCoreSchema.setDescription(title); } title = documentInformation.getAuthor(); if (title != null) { dublinCoreSchema.addCreator(title); } AdobePDFSchema adobePDFSchema = xMPMetadatum.createAndAddAdobePDFSchema(); title = documentInformation.getProducer(); if (title != null) { adobePDFSchema.setProducer(title); } title = documentInformation.getKeywords(); if (title != null) { adobePDFSchema.setKeywords(title); } XMPBasicSchema xMPBasicSchema = xMPMetadatum.createAndAddXMPBasicSchema(); title = documentInformation.getCreator(); if (title != null) { xMPBasicSchema.setCreatorTool(title); } if (documentInformation.getCreationDate() != null) { xMPBasicSchema.setCreateDate(documentInformation.getCreationDate()); } if (documentInformation.getModificationDate() != null) { xMPBasicSchema.setModifyDate(documentInformation.getModificationDate()); } xMPBasicSchema.setMetadataDate(new GregorianCalendar()); XmpSerializer xmpSerializer = new XmpSerializer(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); SAXParserFactoryImpl sAXParserFactoryImpl = new SAXParserFactoryImpl(); TransformerFactoryImpl transformerFactoryImpl = new TransformerFactoryImpl(); xmpSerializer.serialize(xMPMetadatum, byteArrayOutputStream, false); PDMetadata pDMetadatum = new PDMetadata(this.doc); pDMetadatum.importXMPMetadata(byteArrayOutputStream.toByteArray()); documentCatalog.setMetadata(pDMetadatum); this.doc.save(this.tempFileName); documentInformation = this.doc.getDocumentInformation(); }
public PDFDocumentInformation() { this.documentInformation = new PDDocumentInformation(); }
public static Document ParseDocument(string filePath, GEN_FILE doc) { string author = null; string keywords = null; string summary = null; string text = null; try { PDFTextStripper stripper = new PDFTextStripper(); PDDocument document = PDDocument.load(filePath); text = stripper.getText(document); PDDocumentInformation info = document.getDocumentInformation(); author = info.getAuthor(); keywords = info.getKeywords(); summary = info.getSubject(); document.close(); } catch (Exception ex) { Debug.WriteLine("Exception in reading file: " + filePath + " ex: " + ex.Message); } Document lucDoc = new Document(); string filename = Path.GetFileNameWithoutExtension(doc.File_Name); string short_name = doc.Short_Name; string title = doc.Title; string header = doc.Summary; string doc_id = doc.Gen_File_Id.ToString(); Debug.WriteLine("DocID: " + doc_id); StringBuilder keyTextBuilder = new StringBuilder(); foreach (FILE_KEYWORDS keywordobj in doc.FILE_KEYWORDS.ToList()) { keyTextBuilder.Append(keywordobj.Keyword + " "); } string keyword = keyTextBuilder.ToString(); lucDoc.Add(new Field(FieldNames.FILE_NAME, filename, Field.Store.YES, Field.Index.ANALYZED)); if (author != null && author.Trim() != "") { lucDoc.Add(new Field(FieldNames.AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED)); } if (keywords != null && keywords.Trim() != "") { lucDoc.Add(new Field(FieldNames.KEYWORDS, keywords, Field.Store.YES, Field.Index.ANALYZED)); } if (summary != null && summary.Trim() != "") { lucDoc.Add(new Field(FieldNames.SUMMARY, summary, Field.Store.YES, Field.Index.ANALYZED)); } lucDoc.Add(new Field(FieldNames.SHORT_NAME, short_name, Field.Store.YES, Field.Index.ANALYZED)); lucDoc.Add(new Field(FieldNames.TITLE, title, Field.Store.YES, Field.Index.ANALYZED)); if (!String.IsNullOrWhiteSpace(header)) { lucDoc.Add(new Field(FieldNames.HEADER, header, Field.Store.YES, Field.Index.ANALYZED)); } if (text != null && text.Trim() != "") { lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED)); } lucDoc.Add(new Field(FieldNames.DOC_ID, doc_id, Field.Store.YES, Field.Index.NO)); lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Resource_Doc.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return(lucDoc); }
private PDDocumentCatalog makeDocPDFA3compliant(String producer, String creator, bool attachZugferdHeaders) { String fullProducer = producer + " (via mustangproject.org " + versionStr + ")"; PDDocumentCatalog cat = doc.getDocumentCatalog(); PDMetadata metadata = new PDMetadata(doc); cat.setMetadata(metadata); // we're using the jempbox org.apache.jempbox.xmp.XMPMetadata version, // not the xmpbox one XMPMetadata xmp = new XMPMetadata(); XMPSchemaPDFAId pdfaid = new XMPSchemaPDFAId(xmp); pdfaid.setAbout(""); //$NON-NLS-1$ xmp.addSchema(pdfaid); XMPSchemaDublinCore dc = xmp.addDublinCoreSchema(); dc.addCreator(creator); dc.setAbout(""); //$NON-NLS-1$ XMPSchemaBasic xsb = xmp.addBasicSchema(); xsb.setAbout(""); //$NON-NLS-1$ xsb.setCreatorTool(creator); xsb.setCreateDate(GregorianCalendar.getInstance()); // PDDocumentInformation pdi=doc.getDocumentInformation(); PDDocumentInformation pdi = new PDDocumentInformation(); pdi.setProducer(fullProducer); pdi.setAuthor(creator); doc.setDocumentInformation(pdi); XMPSchemaPDF pdf = xmp.addPDFSchema(); pdf.setProducer(fullProducer); pdf.setAbout(""); //$NON-NLS-1$ /* * // Mandatory: PDF/A3-a is tagged PDF which has to be expressed using * a // MarkInfo dictionary (PDF A/3 Standard sec. 6.7.2.2) PDMarkInfo * markinfo = new PDMarkInfo(); markinfo.setMarked(true); * doc.getDocumentCatalog().setMarkInfo(markinfo); */ /* * * To be on the safe side, we use level B without Markinfo because we * can not guarantee that the user correctly tagged the templates for * the PDF. */ pdfaid.setConformance(conformanceLevel);//$NON-NLS-1$ //$NON-NLS-1$ pdfaid.setPart(new java.lang.Integer(3)); if (attachZugferdHeaders) { addZugferdXMP(xmp); /* * this is the only line where we do something * Zugferd-specific, i.e. add PDF metadata * specifically for Zugferd, not generically for * a embedded file */ } metadata.importXMPMetadata(xmp); return(cat); }