예제 #1
0
        /// <summary>
        /// Extrae los metadatos del documento
        /// </summary>
        public override FileMetadata AnalyzeFile()
        {
            try
            {
                this.foundMetadata = new FileMetadata();
                using (PdfDocument doc = PdfReader.Open(this.fileStream, PdfDocumentOpenMode.InformationOnly))
                {
                    int imageNumber = 0;
                    //Read embedded images
                    foreach (PdfDictionary item in doc.Internals.GetAllObjects().Where(p => p is PdfDictionary d && d.Stream != null && "/Image".Equals(d.Elements["/Subtype"]?.ToString())))
                    {
                        try
                        {
                            using (MemoryStream msJPG = new MemoryStream(item.Stream.Value))
                            {
                                using (EXIFDocument eDoc = new EXIFDocument(msJPG))
                                {
                                    FileMetadata exifMetadata = eDoc.AnalyzeFile();
                                    //Ignore images which only contain 'Adobe JPEG' makernotes
                                    if (exifMetadata != null && exifMetadata.HasMetadata() && !exifMetadata.Makernotes.All(p => p.Key == "Adobe JPEG"))
                                    {
                                        foundMetadata.EmbeddedImages.Add(imageNumber.ToString(), exifMetadata);
                                        imageNumber++;
                                        this.foundMetadata.AddRange(exifMetadata.Users.ToArray());
                                        this.foundMetadata.AddRange(exifMetadata.Applications.ToArray());
                                    }
                                }
                            }
                        }
                        catch (Exception)
                        {
                        }
                    }

                    ReadXMPMetadata(doc);
                    if (doc.Info.Title != string.Empty)
                    {
                        this.foundMetadata.Title = Functions.ToPlainText(doc.Info.Title);
                        if (Uri.IsWellFormedUriString(doc.Info.Title, UriKind.Absolute))
                        {
                            this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(doc.Info.Title), true));
                        }
                    }

                    if (doc.Info.Subject != string.Empty)
                    {
                        this.foundMetadata.Subject = Functions.ToPlainText(doc.Info.Subject);
                    }
                    if (doc.Info.Author != string.Empty)
                    {
                        this.foundMetadata.Add(new User(Functions.ToPlainText(doc.Info.Author), true));
                    }
                    if (doc.Info.Keywords != string.Empty)
                    {
                        this.foundMetadata.Keywords = Functions.ToPlainText(doc.Info.Keywords);
                    }

                    if (doc.Info.Creator != string.Empty)
                    {
                        string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Creator));
                        if (strSoftware.Trim() != string.Empty)
                        {
                            this.foundMetadata.Add(new Application(strSoftware));
                        }
                        //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada
                        else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Creator)))
                        {
                            this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Creator).Trim()));
                        }
                    }

                    if (!String.IsNullOrWhiteSpace(doc.Info.Producer))
                    {
                        string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Producer));
                        if (!String.IsNullOrWhiteSpace(strSoftware))
                        {
                            this.foundMetadata.Add(new Application(strSoftware));
                        }
                        //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada
                        else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Producer)))
                        {
                            this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Producer).Trim()));
                        }
                    }

                    try
                    {
                        if (doc.Info.CreationDate != DateTime.MinValue)
                        {
                            this.foundMetadata.Dates.CreationDate = doc.Info.CreationDate;
                        }
                    }
                    catch (InvalidCastException)
                    {
                    }

                    try
                    {
                        if (doc.Info.ModificationDate != DateTime.MinValue)
                        {
                            this.foundMetadata.Dates.ModificationDate = doc.Info.ModificationDate;
                        }
                    }
                    catch (InvalidCastException)
                    {
                    }
                }

                SearchPathsLinksAndEmails(this.fileStream);

                //Find users in paths
                foreach (Diagrams.Path path in this.foundMetadata.Paths)
                {
                    string strUser = PathAnalysis.ExtractUserFromPath(path.Value);
                    this.foundMetadata.Add(new User(strUser, path.IsComputerFolder));
                }

                //Also search software in the title (only pdf). It is added only if the software is known.
                if (!String.IsNullOrEmpty(foundMetadata.Title))
                {
                    string strSoftware = ApplicationAnalysis.GetApplicationsFromString(foundMetadata.Title);
                    if (!String.IsNullOrWhiteSpace(strSoftware))
                    {
                        this.foundMetadata.Add(new Application(strSoftware));
                    }
                }
            }
            catch (PdfReaderException)
            { }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.WriteLine(ex.ToString());
            }
            finally
            {
                if (foundMetadata == null)
                {
                    this.foundMetadata = new FileMetadata();
                }

                if (fileStream != null)
                {
                    this.fileStream.Dispose();
                }
            }
            return(this.foundMetadata);
        }