public static Info ReadDocInfo(string fileName) { Info result = new Info(); try { PDDocument pDoc = PDDocument.load(fileName); PDDocumentInformation docInfo = pDoc.getDocumentInformation(); if (docInfo != null) { var author = docInfo.getAuthor(); var title = docInfo.getTitle(); var summary = docInfo.getSubject(); var keywords = docInfo.getKeywords(); result.Author = author; result.Title = title; result.Summary = summary; result.Keywords = keywords; } } catch (Exception ex) { } return(result); }
internal PDFDocumentInformation GetDocumentInformation(PDDocument pdfDocument) { PDDocumentInformation documentInformation = pdfDocument.getDocumentInformation(); PDFDocumentInformation pDFDocumentInformation = new PDFDocumentInformation() { Author = documentInformation.getAuthor() }; documentInformation.getCreationDate(); pDFDocumentInformation.CreationDate = this.ConvertJavaDateToCSharp(documentInformation.getCreationDate()); pDFDocumentInformation.Creator = documentInformation.getCreator(); pDFDocumentInformation.Keywords = documentInformation.getKeywords(); pDFDocumentInformation.ModifiedDate = this.ConvertJavaDateToCSharp(documentInformation.getModificationDate()); pDFDocumentInformation.Producer = documentInformation.getProducer(); pDFDocumentInformation.Subject = documentInformation.getSubject(); pDFDocumentInformation.Title = documentInformation.getTitle(); pDFDocumentInformation.Trapped = documentInformation.getTrapped(); return(pDFDocumentInformation); }
private void DoMetadata() { java.util.TimeZone timeZone = java.util.TimeZone.getTimeZone("GMT"); Calendar instance = Calendar.getInstance(); instance.setTimeZone(timeZone); PDDocumentInformation documentInformation = this.doc.getDocumentInformation(); documentInformation.setModificationDate(instance); if (documentInformation.getAuthor() == null) { documentInformation.setAuthor("Aquaforest"); } if ((string.IsNullOrEmpty(documentInformation.getProducer()) ? true : documentInformation.getProducer() == " ")) { documentInformation.setProducer("Aquaforest PDFA - http://www.aquaforest.com"); } if (documentInformation.getKeywords() == null) { documentInformation.setKeywords(""); } documentInformation.getCreationDate(); instance.setTime(this.GetDate(documentInformation.getCreationDate())); documentInformation.setCreationDate(instance); this.doc.setDocumentInformation(documentInformation); this.doc.save(this.tempFileName); documentInformation = this.doc.getDocumentInformation(); PDDocumentCatalog documentCatalog = this.doc.getDocumentCatalog(); XMPMetadata xMPMetadatum = XMPMetadata.createXMPMetadata(); if (this.ConformanceLevel.Trim().ToLower() == "a") { PDMarkInfo pDMarkInfo = new PDMarkInfo(); pDMarkInfo.setMarked(true); documentCatalog.setMarkInfo(pDMarkInfo); } PDFAIdentificationSchema pDFAIdentificationSchema = xMPMetadatum.createAndAddPFAIdentificationSchema(); pDFAIdentificationSchema.setConformance(this.ConformanceLevel); pDFAIdentificationSchema.setPart(new Integer(this.pdfaversion)); DublinCoreSchema dublinCoreSchema = xMPMetadatum.createAndAddDublinCoreSchema(); string title = documentInformation.getTitle(); if (title != null) { dublinCoreSchema.setTitle(title); } title = documentInformation.getSubject(); if (title != null) { dublinCoreSchema.setDescription(title); } title = documentInformation.getAuthor(); if (title != null) { dublinCoreSchema.addCreator(title); } AdobePDFSchema adobePDFSchema = xMPMetadatum.createAndAddAdobePDFSchema(); title = documentInformation.getProducer(); if (title != null) { adobePDFSchema.setProducer(title); } title = documentInformation.getKeywords(); if (title != null) { adobePDFSchema.setKeywords(title); } XMPBasicSchema xMPBasicSchema = xMPMetadatum.createAndAddXMPBasicSchema(); title = documentInformation.getCreator(); if (title != null) { xMPBasicSchema.setCreatorTool(title); } if (documentInformation.getCreationDate() != null) { xMPBasicSchema.setCreateDate(documentInformation.getCreationDate()); } if (documentInformation.getModificationDate() != null) { xMPBasicSchema.setModifyDate(documentInformation.getModificationDate()); } xMPBasicSchema.setMetadataDate(new GregorianCalendar()); XmpSerializer xmpSerializer = new XmpSerializer(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); SAXParserFactoryImpl sAXParserFactoryImpl = new SAXParserFactoryImpl(); TransformerFactoryImpl transformerFactoryImpl = new TransformerFactoryImpl(); xmpSerializer.serialize(xMPMetadatum, byteArrayOutputStream, false); PDMetadata pDMetadatum = new PDMetadata(this.doc); pDMetadatum.importXMPMetadata(byteArrayOutputStream.toByteArray()); documentCatalog.setMetadata(pDMetadatum); this.doc.save(this.tempFileName); documentInformation = this.doc.getDocumentInformation(); }
public static Document ParseDocument(string filePath, GEN_FILE doc) { string author = null; string keywords = null; string summary = null; string text = null; try { PDFTextStripper stripper = new PDFTextStripper(); PDDocument document = PDDocument.load(filePath); text = stripper.getText(document); PDDocumentInformation info = document.getDocumentInformation(); author = info.getAuthor(); keywords = info.getKeywords(); summary = info.getSubject(); document.close(); } catch (Exception ex) { Debug.WriteLine("Exception in reading file: " + filePath + " ex: " + ex.Message); } Document lucDoc = new Document(); string filename = Path.GetFileNameWithoutExtension(doc.File_Name); string short_name = doc.Short_Name; string title = doc.Title; string header = doc.Summary; string doc_id = doc.Gen_File_Id.ToString(); Debug.WriteLine("DocID: " + doc_id); StringBuilder keyTextBuilder = new StringBuilder(); foreach (FILE_KEYWORDS keywordobj in doc.FILE_KEYWORDS.ToList()) { keyTextBuilder.Append(keywordobj.Keyword + " "); } string keyword = keyTextBuilder.ToString(); lucDoc.Add(new Field(FieldNames.FILE_NAME, filename, Field.Store.YES, Field.Index.ANALYZED)); if (author != null && author.Trim() != "") { lucDoc.Add(new Field(FieldNames.AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED)); } if (keywords != null && keywords.Trim() != "") { lucDoc.Add(new Field(FieldNames.KEYWORDS, keywords, Field.Store.YES, Field.Index.ANALYZED)); } if (summary != null && summary.Trim() != "") { lucDoc.Add(new Field(FieldNames.SUMMARY, summary, Field.Store.YES, Field.Index.ANALYZED)); } lucDoc.Add(new Field(FieldNames.SHORT_NAME, short_name, Field.Store.YES, Field.Index.ANALYZED)); lucDoc.Add(new Field(FieldNames.TITLE, title, Field.Store.YES, Field.Index.ANALYZED)); if (!String.IsNullOrWhiteSpace(header)) { lucDoc.Add(new Field(FieldNames.HEADER, header, Field.Store.YES, Field.Index.ANALYZED)); } if (text != null && text.Trim() != "") { lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED)); } lucDoc.Add(new Field(FieldNames.DOC_ID, doc_id, Field.Store.YES, Field.Index.NO)); lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Resource_Doc.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return(lucDoc); }