コード例 #1
0
        public static Info ReadDocInfo(string fileName)
        {
            Info result = new Info();

            try
            {
                PDDocument pDoc = PDDocument.load(fileName);

                PDDocumentInformation docInfo = pDoc.getDocumentInformation();

                if (docInfo != null)
                {
                    var author   = docInfo.getAuthor();
                    var title    = docInfo.getTitle();
                    var summary  = docInfo.getSubject();
                    var keywords = docInfo.getKeywords();

                    result.Author   = author;
                    result.Title    = title;
                    result.Summary  = summary;
                    result.Keywords = keywords;
                }
            }
            catch (Exception ex)
            {
            }
            return(result);
        }
コード例 #2
0
        internal PDFDocumentInformation GetDocumentInformation(PDDocument pdfDocument)
        {
            PDDocumentInformation  documentInformation    = pdfDocument.getDocumentInformation();
            PDFDocumentInformation pDFDocumentInformation = new PDFDocumentInformation()
            {
                Author = documentInformation.getAuthor()
            };

            documentInformation.getCreationDate();
            pDFDocumentInformation.CreationDate = this.ConvertJavaDateToCSharp(documentInformation.getCreationDate());
            pDFDocumentInformation.Creator      = documentInformation.getCreator();
            pDFDocumentInformation.Keywords     = documentInformation.getKeywords();
            pDFDocumentInformation.ModifiedDate = this.ConvertJavaDateToCSharp(documentInformation.getModificationDate());
            pDFDocumentInformation.Producer     = documentInformation.getProducer();
            pDFDocumentInformation.Subject      = documentInformation.getSubject();
            pDFDocumentInformation.Title        = documentInformation.getTitle();
            pDFDocumentInformation.Trapped      = documentInformation.getTrapped();
            return(pDFDocumentInformation);
        }
コード例 #3
0
        private void DoMetadata()
        {
            java.util.TimeZone timeZone = java.util.TimeZone.getTimeZone("GMT");
            Calendar           instance = Calendar.getInstance();

            instance.setTimeZone(timeZone);
            PDDocumentInformation documentInformation = this.doc.getDocumentInformation();

            documentInformation.setModificationDate(instance);
            if (documentInformation.getAuthor() == null)
            {
                documentInformation.setAuthor("Aquaforest");
            }
            if ((string.IsNullOrEmpty(documentInformation.getProducer()) ? true : documentInformation.getProducer() == " "))
            {
                documentInformation.setProducer("Aquaforest PDFA - http://www.aquaforest.com");
            }
            if (documentInformation.getKeywords() == null)
            {
                documentInformation.setKeywords("");
            }
            documentInformation.getCreationDate();
            instance.setTime(this.GetDate(documentInformation.getCreationDate()));
            documentInformation.setCreationDate(instance);
            this.doc.setDocumentInformation(documentInformation);
            this.doc.save(this.tempFileName);
            documentInformation = this.doc.getDocumentInformation();
            PDDocumentCatalog documentCatalog = this.doc.getDocumentCatalog();
            XMPMetadata       xMPMetadatum    = XMPMetadata.createXMPMetadata();

            if (this.ConformanceLevel.Trim().ToLower() == "a")
            {
                PDMarkInfo pDMarkInfo = new PDMarkInfo();
                pDMarkInfo.setMarked(true);
                documentCatalog.setMarkInfo(pDMarkInfo);
            }
            PDFAIdentificationSchema pDFAIdentificationSchema = xMPMetadatum.createAndAddPFAIdentificationSchema();

            pDFAIdentificationSchema.setConformance(this.ConformanceLevel);
            pDFAIdentificationSchema.setPart(new Integer(this.pdfaversion));
            DublinCoreSchema dublinCoreSchema = xMPMetadatum.createAndAddDublinCoreSchema();
            string           title            = documentInformation.getTitle();

            if (title != null)
            {
                dublinCoreSchema.setTitle(title);
            }
            title = documentInformation.getSubject();
            if (title != null)
            {
                dublinCoreSchema.setDescription(title);
            }
            title = documentInformation.getAuthor();
            if (title != null)
            {
                dublinCoreSchema.addCreator(title);
            }
            AdobePDFSchema adobePDFSchema = xMPMetadatum.createAndAddAdobePDFSchema();

            title = documentInformation.getProducer();
            if (title != null)
            {
                adobePDFSchema.setProducer(title);
            }
            title = documentInformation.getKeywords();
            if (title != null)
            {
                adobePDFSchema.setKeywords(title);
            }
            XMPBasicSchema xMPBasicSchema = xMPMetadatum.createAndAddXMPBasicSchema();

            title = documentInformation.getCreator();
            if (title != null)
            {
                xMPBasicSchema.setCreatorTool(title);
            }
            if (documentInformation.getCreationDate() != null)
            {
                xMPBasicSchema.setCreateDate(documentInformation.getCreationDate());
            }
            if (documentInformation.getModificationDate() != null)
            {
                xMPBasicSchema.setModifyDate(documentInformation.getModificationDate());
            }
            xMPBasicSchema.setMetadataDate(new GregorianCalendar());
            XmpSerializer          xmpSerializer          = new XmpSerializer();
            ByteArrayOutputStream  byteArrayOutputStream  = new ByteArrayOutputStream();
            SAXParserFactoryImpl   sAXParserFactoryImpl   = new SAXParserFactoryImpl();
            TransformerFactoryImpl transformerFactoryImpl = new TransformerFactoryImpl();

            xmpSerializer.serialize(xMPMetadatum, byteArrayOutputStream, false);
            PDMetadata pDMetadatum = new PDMetadata(this.doc);

            pDMetadatum.importXMPMetadata(byteArrayOutputStream.toByteArray());
            documentCatalog.setMetadata(pDMetadatum);
            this.doc.save(this.tempFileName);
            documentInformation = this.doc.getDocumentInformation();
        }
コード例 #4
0
 public PDFDocumentInformation()
 {
     this.documentInformation = new PDDocumentInformation();
 }
コード例 #5
0
ファイル: PDFParser.cs プロジェクト: tmcmil/cset
        public static Document ParseDocument(string filePath, GEN_FILE doc)
        {
            string author   = null;
            string keywords = null;
            string summary  = null;
            string text     = null;

            try
            {
                PDFTextStripper stripper = new PDFTextStripper();
                PDDocument      document = PDDocument.load(filePath);
                text = stripper.getText(document);
                PDDocumentInformation info = document.getDocumentInformation();
                author   = info.getAuthor();
                keywords = info.getKeywords();
                summary  = info.getSubject();
                document.close();
            }
            catch (Exception ex)
            {
                Debug.WriteLine("Exception in reading file: " + filePath + " ex: " + ex.Message);
            }
            Document lucDoc     = new Document();
            string   filename   = Path.GetFileNameWithoutExtension(doc.File_Name);
            string   short_name = doc.Short_Name;
            string   title      = doc.Title;
            string   header     = doc.Summary;
            string   doc_id     = doc.Gen_File_Id.ToString();

            Debug.WriteLine("DocID: " + doc_id);

            StringBuilder keyTextBuilder = new StringBuilder();

            foreach (FILE_KEYWORDS keywordobj in doc.FILE_KEYWORDS.ToList())
            {
                keyTextBuilder.Append(keywordobj.Keyword + " ");
            }
            string keyword = keyTextBuilder.ToString();

            lucDoc.Add(new Field(FieldNames.FILE_NAME, filename, Field.Store.YES, Field.Index.ANALYZED));
            if (author != null && author.Trim() != "")
            {
                lucDoc.Add(new Field(FieldNames.AUTHOR, author, Field.Store.YES, Field.Index.ANALYZED));
            }

            if (keywords != null && keywords.Trim() != "")
            {
                lucDoc.Add(new Field(FieldNames.KEYWORDS, keywords, Field.Store.YES, Field.Index.ANALYZED));
            }

            if (summary != null && summary.Trim() != "")
            {
                lucDoc.Add(new Field(FieldNames.SUMMARY, summary, Field.Store.YES, Field.Index.ANALYZED));
            }
            lucDoc.Add(new Field(FieldNames.SHORT_NAME, short_name, Field.Store.YES, Field.Index.ANALYZED));
            lucDoc.Add(new Field(FieldNames.TITLE, title, Field.Store.YES, Field.Index.ANALYZED));

            if (!String.IsNullOrWhiteSpace(header))
            {
                lucDoc.Add(new Field(FieldNames.HEADER, header, Field.Store.YES, Field.Index.ANALYZED));
            }
            if (text != null && text.Trim() != "")
            {
                lucDoc.Add(new Field(FieldNames.TEXT, text, Field.Store.YES, Field.Index.ANALYZED));
            }

            lucDoc.Add(new Field(FieldNames.DOC_ID, doc_id, Field.Store.YES, Field.Index.NO));
            lucDoc.Add(new Field(FieldNames.RESOURCE_TYPE, ResourceTypeEnum.Resource_Doc.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));


            return(lucDoc);
        }
        private PDDocumentCatalog makeDocPDFA3compliant(String producer,
                                                        String creator, bool attachZugferdHeaders)
        {
            String fullProducer = producer + " (via mustangproject.org "
                                  + versionStr + ")";

            PDDocumentCatalog cat      = doc.getDocumentCatalog();
            PDMetadata        metadata = new PDMetadata(doc);

            cat.setMetadata(metadata);
            // we're using the jempbox org.apache.jempbox.xmp.XMPMetadata version,
            // not the xmpbox one
            XMPMetadata xmp = new XMPMetadata();

            XMPSchemaPDFAId pdfaid = new XMPSchemaPDFAId(xmp);

            pdfaid.setAbout(""); //$NON-NLS-1$
            xmp.addSchema(pdfaid);

            XMPSchemaDublinCore dc = xmp.addDublinCoreSchema();

            dc.addCreator(creator);
            dc.setAbout(""); //$NON-NLS-1$

            XMPSchemaBasic xsb = xmp.addBasicSchema();

            xsb.setAbout(""); //$NON-NLS-1$

            xsb.setCreatorTool(creator);
            xsb.setCreateDate(GregorianCalendar.getInstance());
            // PDDocumentInformation pdi=doc.getDocumentInformation();
            PDDocumentInformation pdi = new PDDocumentInformation();

            pdi.setProducer(fullProducer);
            pdi.setAuthor(creator);
            doc.setDocumentInformation(pdi);

            XMPSchemaPDF pdf = xmp.addPDFSchema();

            pdf.setProducer(fullProducer);
            pdf.setAbout(""); //$NON-NLS-1$

            /*
             * // Mandatory: PDF/A3-a is tagged PDF which has to be expressed using
             * a // MarkInfo dictionary (PDF A/3 Standard sec. 6.7.2.2) PDMarkInfo
             * markinfo = new PDMarkInfo(); markinfo.setMarked(true);
             * doc.getDocumentCatalog().setMarkInfo(markinfo);
             */
            /*
             *
             * To be on the safe side, we use level B without Markinfo because we
             * can not guarantee that the user correctly tagged the templates for
             * the PDF.
             */
            pdfaid.setConformance(conformanceLevel);//$NON-NLS-1$ //$NON-NLS-1$

            pdfaid.setPart(new java.lang.Integer(3));

            if (attachZugferdHeaders)
            {
                addZugferdXMP(xmp); /*
                                     * this is the only line where we do something
                                     * Zugferd-specific, i.e. add PDF metadata
                                     * specifically for Zugferd, not generically for
                                     * a embedded file
                                     */
            }

            metadata.importXMPMetadata(xmp);
            return(cat);
        }