Пример #1
0
        private void GetImagesPpt(OleDocument doc)
        {
            using (Stream stmPictures = doc.OpenStream("Pictures"))
            {
                if (stmPictures == null)
                {
                    return;
                }
                int ImagesFound = 0;
                stmPictures.Seek(0, SeekOrigin.Begin);
                while (stmPictures.Position < stmPictures.Length - 0x19)
                {
                    stmPictures.Seek(0x4, SeekOrigin.Current);
                    BinaryReader brData    = new BinaryReader(stmPictures);
                    UInt32       PICLength = brData.ReadUInt32();
                    if (PICLength == 0 || stmPictures.Position + PICLength > stmPictures.Length)
                    {
                        break;
                    }
                    byte[] bufferPIC    = brData.ReadBytes((int)PICLength);
                    string strImageName = "Image" + ImagesFound++;

                    using (MemoryStream msJPG = new MemoryStream(bufferPIC, 0x11, bufferPIC.Length - 0x11))
                    {
                        FileMetadata exifMetadata = null;
                        using (EXIFDocument eDoc = new EXIFDocument(msJPG, ".jpg"))
                        {
                            exifMetadata = eDoc.AnalyzeFile();
                        }
                        if (exifMetadata != null)
                        {
                            foundMetadata.EmbeddedImages.Add(strImageName, exifMetadata);

                            this.foundMetadata.AddRange(exifMetadata.Users.ToArray());
                            this.foundMetadata.AddRange(exifMetadata.Applications.ToArray());
                        }
                    }
                }
            }
        }
Пример #2
0
        public static DocumentExtractor Create(string extension, Stream file)
        {
            if (file == null)
            {
                throw new ArgumentNullException(nameof(file));
            }
            string normalizedExtension = NormalizeExtension(extension);

            if (IsSupportedExtension(normalizedExtension))
            {
                DocumentExtractor document = null;
                switch (normalizedExtension)
                {
                case ".sxw":
                case ".odt":
                case ".ods":
                case ".odg":
                case ".odp":
                    document = new OpenOfficeDocument(file, extension);
                    break;

                case ".docx":
                case ".xlsx":
                case ".pptx":
                case ".ppsx":
                    document = new OfficeOpenXMLDocument(file, extension);
                    break;

                case ".doc":
                case ".xls":
                case ".ppt":
                case ".pps":
                    document = new Office972003(file);
                    break;

                case ".pdf":
                    document = new PDFDocument(file);
                    break;

                case ".wpd":
                    document = new WPDDocument(file);
                    break;

                case ".raw":
                case ".cr2":
                case ".crw":
                case ".jpg":
                case ".jpeg":
                    document = new EXIFDocument(file, extension);
                    break;

                case ".svg":
                case ".svgz":
                    document = new SVGDocument(file);
                    break;

                case ".indd":
                    document = new InDDDocument(file);
                    break;

                case ".rdp":
                    document = new RDPDocument(file);
                    break;

                case ".ica":
                    document = new ICADocument(file);
                    break;

                default:
                    throw new ArgumentException("Extension not supported", nameof(extension));
                }
                return(document);
            }
            else
            {
                throw new ArgumentException("Extension not supported", nameof(extension));
            }
        }
Пример #3
0
        /// <summary>
        /// Extrae los metadatos del documento
        /// </summary>
        public override FileMetadata AnalyzeFile()
        {
            try
            {
                this.foundMetadata = new FileMetadata();
                using (PdfDocument doc = PdfReader.Open(this.fileStream, PdfDocumentOpenMode.InformationOnly))
                {
                    int imageNumber = 0;
                    //Read embedded images
                    foreach (PdfDictionary item in doc.Internals.GetAllObjects().Where(p => p is PdfDictionary d && d.Stream != null && "/Image".Equals(d.Elements["/Subtype"]?.ToString())))
                    {
                        try
                        {
                            using (MemoryStream msJPG = new MemoryStream(item.Stream.Value))
                            {
                                using (EXIFDocument eDoc = new EXIFDocument(msJPG))
                                {
                                    FileMetadata exifMetadata = eDoc.AnalyzeFile();
                                    //Ignore images which only contain 'Adobe JPEG' makernotes
                                    if (exifMetadata != null && exifMetadata.HasMetadata() && !exifMetadata.Makernotes.All(p => p.Key == "Adobe JPEG"))
                                    {
                                        foundMetadata.EmbeddedImages.Add(imageNumber.ToString(), exifMetadata);
                                        imageNumber++;
                                        this.foundMetadata.AddRange(exifMetadata.Users.ToArray());
                                        this.foundMetadata.AddRange(exifMetadata.Applications.ToArray());
                                    }
                                }
                            }
                        }
                        catch (Exception)
                        {
                        }
                    }

                    ReadXMPMetadata(doc);
                    if (doc.Info.Title != string.Empty)
                    {
                        this.foundMetadata.Title = Functions.ToPlainText(doc.Info.Title);
                        if (Uri.IsWellFormedUriString(doc.Info.Title, UriKind.Absolute))
                        {
                            this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(doc.Info.Title), true));
                        }
                    }

                    if (doc.Info.Subject != string.Empty)
                    {
                        this.foundMetadata.Subject = Functions.ToPlainText(doc.Info.Subject);
                    }
                    if (doc.Info.Author != string.Empty)
                    {
                        this.foundMetadata.Add(new User(Functions.ToPlainText(doc.Info.Author), true));
                    }
                    if (doc.Info.Keywords != string.Empty)
                    {
                        this.foundMetadata.Keywords = Functions.ToPlainText(doc.Info.Keywords);
                    }

                    if (doc.Info.Creator != string.Empty)
                    {
                        string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Creator));
                        if (strSoftware.Trim() != string.Empty)
                        {
                            this.foundMetadata.Add(new Application(strSoftware));
                        }
                        //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada
                        else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Creator)))
                        {
                            this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Creator).Trim()));
                        }
                    }

                    if (!String.IsNullOrWhiteSpace(doc.Info.Producer))
                    {
                        string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Producer));
                        if (!String.IsNullOrWhiteSpace(strSoftware))
                        {
                            this.foundMetadata.Add(new Application(strSoftware));
                        }
                        //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada
                        else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Producer)))
                        {
                            this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Producer).Trim()));
                        }
                    }

                    try
                    {
                        if (doc.Info.CreationDate != DateTime.MinValue)
                        {
                            this.foundMetadata.Dates.CreationDate = doc.Info.CreationDate;
                        }
                    }
                    catch (InvalidCastException)
                    {
                    }

                    try
                    {
                        if (doc.Info.ModificationDate != DateTime.MinValue)
                        {
                            this.foundMetadata.Dates.ModificationDate = doc.Info.ModificationDate;
                        }
                    }
                    catch (InvalidCastException)
                    {
                    }
                }

                SearchPathsLinksAndEmails(this.fileStream);

                //Find users in paths
                foreach (Diagrams.Path path in this.foundMetadata.Paths)
                {
                    string strUser = PathAnalysis.ExtractUserFromPath(path.Value);
                    this.foundMetadata.Add(new User(strUser, path.IsComputerFolder));
                }

                //Also search software in the title (only pdf). It is added only if the software is known.
                if (!String.IsNullOrEmpty(foundMetadata.Title))
                {
                    string strSoftware = ApplicationAnalysis.GetApplicationsFromString(foundMetadata.Title);
                    if (!String.IsNullOrWhiteSpace(strSoftware))
                    {
                        this.foundMetadata.Add(new Application(strSoftware));
                    }
                }
            }
            catch (PdfReaderException)
            { }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.WriteLine(ex.ToString());
            }
            finally
            {
                if (foundMetadata == null)
                {
                    this.foundMetadata = new FileMetadata();
                }

                if (fileStream != null)
                {
                    this.fileStream.Dispose();
                }
            }
            return(this.foundMetadata);
        }
Пример #4
0
 /// <summary>
 /// Extrae los metadatos del documento
 /// </summary>
 public override FileMetadata AnalyzeFile()
 {
     try
     {
         this.foundMetadata = new FileMetadata();
         using (ZipFile zip = ZipFile.Read(this.fileStream))
         {
             string strFile = "meta.xml";
             if (zip.EntryFileNames.Contains(strFile))
             {
                 using (Stream stmXML = new MemoryStream())
                 {
                     zip.Extract(strFile, stmXML);
                     stmXML.Seek(0, SeekOrigin.Begin);
                     AnalizeFileMeta(stmXML);
                 }
             }
             strFile = "settings.xml";
             if (zip.EntryFileNames.Contains(strFile))
             {
                 using (Stream stmXML = new MemoryStream())
                 {
                     zip.Extract(strFile, stmXML);
                     stmXML.Seek(0, SeekOrigin.Begin);
                     analizeFileSettings(stmXML);
                 }
             }
             strFile = "content.xml";
             if (zip.EntryFileNames.Contains(strFile))
             {
                 using (Stream stmXML = new MemoryStream())
                 {
                     zip.Extract(strFile, stmXML);
                     stmXML.Seek(0, SeekOrigin.Begin);
                     AnalizeFileContent(stmXML);
                 }
             }
             strFile = "VersionList.xml";
             if (zip.EntryFileNames.Contains(strFile))
             {
                 using (Stream stmXML = new MemoryStream())
                 {
                     zip.Extract(strFile, stmXML);
                     stmXML.Seek(0, SeekOrigin.Begin);
                     AnalizeFileVersionList(stmXML, zip);
                 }
             }
             //Extrae inforamción EXIF de las imágenes embebidas en el documento
             foreach (string strFileName in zip.EntryFileNames)
             {
                 string strFileNameLo = strFileName.ToLower();
                 //Filtro que obtiene las imagenes *.jpg, *.jpeg dentro de la carpeta "Pictures/"
                 if (strFileNameLo.StartsWith("pictures/") &&
                     (strFileNameLo.EndsWith(".jpg") || strFileNameLo.EndsWith(".jpeg")))
                 {
                     using (Stream stmXML = new MemoryStream())
                     {
                         zip.Extract(strFileName, stmXML);
                         stmXML.Seek(0, SeekOrigin.Begin);
                         using (EXIFDocument eDoc = new EXIFDocument(stmXML, System.IO.Path.GetExtension(strFileNameLo)))
                         {
                             FileMetadata exifMetadata = eDoc.AnalyzeFile();
                             //Añadimos al diccionario la imagen encontrada junto con la información EXIF de la misma
                             this.foundMetadata.EmbeddedImages.Add(System.IO.Path.GetFileName(strFileName), exifMetadata);
                             //Los usuarios de la información EXIF se añaden a los usuarios del documento
                             this.foundMetadata.AddRange(exifMetadata.Users.ToArray());
                             this.foundMetadata.AddRange(exifMetadata.Applications.ToArray());
                         }
                     }
                 }
             }
         }
         //Buscamos usuarios en las rutas del documento
         foreach (Diagrams.Path ri in this.foundMetadata.Paths)
         {
             string strUser = PathAnalysis.ExtractUserFromPath(ri.Value);
             if (!string.IsNullOrEmpty(strUser))
             {
                 this.foundMetadata.Add(new User(strUser, ri.IsComputerFolder, "Path: " + ri.Value));
             }
         }
     }
     catch (Exception e)
     {
         System.Diagnostics.Debug.WriteLine(String.Format("Error analyzing OpenOffice document ({0})", e.ToString()));
     }
     return(this.foundMetadata);
 }
Пример #5
0
        public override FileMetadata AnalyzeFile()
        {
            try
            {
                this.foundMetadata = new FileMetadata();
                using (Package pZip = Package.Open(this.fileStream))
                {
                    Uri uriFile = new Uri("/docProps/core.xml", UriKind.Relative);
                    if (pZip.PartExists(uriFile))
                    {
                        PackagePart pDocument = pZip.GetPart(uriFile);
                        using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                        {
                            AnalizeFileCore(stmDoc);
                        }
                    }
                    uriFile = new Uri("/docProps/app.xml", UriKind.Relative);
                    if (pZip.PartExists(uriFile))
                    {
                        PackagePart pDocument = pZip.GetPart(uriFile);
                        using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                        {
                            AnalizeFileApp(stmDoc);
                        }
                    }
                    //Control de versiones
                    if (strExtlo == ".docx")
                    {
                        uriFile = new Uri("/word/document.xml", UriKind.Relative);
                        if (pZip.PartExists(uriFile))
                        {
                            PackagePart pDocument = pZip.GetPart(uriFile);
                            using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                            {
                                AnalizeFileDocument(stmDoc);
                            }
                        }
                        //Consulta el fichero settings para recuperar el idioma del documento
                        if (foundMetadata.Language == string.Empty)
                        {
                            uriFile = new Uri("/word/settings.xml", UriKind.Relative);
                            if (pZip.PartExists(uriFile))
                            {
                                PackagePart pDocument = pZip.GetPart(uriFile);
                                using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                                {
                                    AnalizeFileSettings(stmDoc);
                                }
                            }
                        }
                        //Consulta el fichero document.xml.rels para obtener los links del documento
                        uriFile = new Uri("/word/_rels/document.xml.rels", UriKind.Relative);
                        if (pZip.PartExists(uriFile))
                        {
                            PackagePart pDocument = pZip.GetPart(uriFile);
                            using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                            {
                                AnalizeLinks(stmDoc);
                            }
                        }
                    }
                    //Obtiene el nombre de las impresoras y los links de los documentos xlsx
                    else if (strExtlo == ".xlsx")
                    {
                        List <Uri> lstFiles = new List <Uri>();
                        foreach (PackagePart pp in pZip.GetParts())
                        {
                            if (pp.Uri.ToString().StartsWith("/xl/printerSettings/printerSettings"))
                            {
                                PackagePart pDocument = pZip.GetPart(pp.Uri);
                                if (pDocument != null)
                                {
                                    char[] name = new char[32];
                                    using (StreamReader sr = new StreamReader(pDocument.GetStream(FileMode.Open, FileAccess.Read), Encoding.Unicode))
                                    {
                                        sr.Read(name, 0, 32);
                                    }
                                    this.foundMetadata.Add(new Printer(Functions.FilterPrinter((new string(name).Replace("\0", "")))));
                                }
                            }
                            if (pp.Uri.ToString().StartsWith("/xl/worksheets/_rels/"))
                            {
                                PackagePart pDocument = pZip.GetPart(pp.Uri);
                                using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                                {
                                    AnalizeLinks(stmDoc);
                                }
                            }
                        }
                    }
                    else if (strExtlo == ".pptx")
                    {
                        List <Uri> lstFiles = new List <Uri>();
                        foreach (PackagePart pp in pZip.GetParts())
                        {
                            if (pp.Uri.ToString().StartsWith("/ppt/slides/_rels/"))
                            {
                                PackagePart pDocument = pZip.GetPart(pp.Uri);
                                using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read))
                                {
                                    AnalizeLinks(stmDoc);
                                }
                            }
                        }
                    }
                    //Extraer información EXIF de cada imagen
                    foreach (PackagePart pp in pZip.GetParts())
                    {
                        string strFileName   = pp.Uri.ToString();
                        string strFileNameLo = strFileName.ToLower();
                        //Filtro que se queda con todas las imagenes *.jpg y *.jpeg de las 3 posibles carpetas
                        if ((strFileNameLo.StartsWith("/word/media/") ||
                             strFileNameLo.StartsWith("/ppt/media/") ||
                             strFileNameLo.StartsWith("/xl/media/")) &&
                            (strFileNameLo.EndsWith(".jpg") ||
                             strFileNameLo.EndsWith(".jpeg")))
                        {
                            using (EXIFDocument eDoc = new EXIFDocument(pp.GetStream(FileMode.Open, FileAccess.Read), System.IO.Path.GetExtension(strFileNameLo)))
                            {
                                FileMetadata exifMetadata = eDoc.AnalyzeFile();
                                foundMetadata.EmbeddedImages.Add(System.IO.Path.GetFileName(strFileName), exifMetadata);
                                //Copiamos los metadatos sobre usuarios y Applications de la imagen al documento
                                this.foundMetadata.AddRange(exifMetadata.Users.ToArray());
                                this.foundMetadata.AddRange(exifMetadata.Applications.ToArray());
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                System.Diagnostics.Debug.WriteLine(e.ToString());
            }

            return(this.foundMetadata);
        }
Пример #6
0
        private void GetImagesDoc(OleDocument doc)
        {
            using (Stream WordDocument = doc.OpenStream("WordDocument"))
            {
                using (Stream stmData = doc.OpenStream("Data"))
                {
                    if (WordDocument == null || stmData == null)
                    {
                        return;
                    }
                    WordDocument.Seek(0x18, SeekOrigin.Begin);
                    BinaryReader br       = new BinaryReader(WordDocument);
                    Int32        fcMin    = br.ReadInt32();
                    Int32        fcMac    = br.ReadInt32();
                    Int32        FKPStart = fcMac % 0x200 == 0 ? fcMac : (fcMac - fcMac % 0x200) + 0x200;
                    WordDocument.Seek(FKPStart, SeekOrigin.Begin);
                    int imagesFound = 0;

                    while (WordDocument.Position + 0x200 < WordDocument.Length)
                    {
                        byte[] FKP = br.ReadBytes(0x200);
                        if (FKP[0x1FF] == 00)
                        {
                            break;
                        }
                        foreach (int offset in Functions.SearchBytesInBytes(FKP, new byte[] { 0x03, 0x6A }))
                        {
                            if (offset < 0x200 - 5)
                            {
                                int PICOffset = FKP[offset + 5] * 0x1000000 + FKP[offset + 4] * 0x10000 + FKP[offset + 3] * 0x100 + FKP[offset + 2];
                                if (PICOffset >= 0 && PICOffset < stmData.Length)
                                {
                                    stmData.Seek(PICOffset, SeekOrigin.Begin);
                                    BinaryReader brData    = new BinaryReader(stmData);
                                    UInt32       PICLength = brData.ReadUInt32();
                                    long         posOri    = stmData.Position;
                                    int          bufferLen = PICLength < stmData.Length - stmData.Position ? (int)PICLength - 4 : (int)(stmData.Length - stmData.Position);
                                    if (bufferLen <= 0)
                                    {
                                        continue;
                                    }
                                    byte[] bufferPIC = brData.ReadBytes(bufferLen);

                                    string strImageName = String.Empty;

                                    using (StreamReader sr = new StreamReader(new MemoryStream(bufferPIC), Encoding.Unicode))
                                    {
                                        String sRead = sr.ReadToEnd();
                                        foreach (Match m in Regex.Matches(sRead, @"([a-z]:|\\)\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+", RegexOptions.IgnoreCase))
                                        {
                                            String path = m.Value.Trim();
                                            this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(path), true));
                                            strImageName = System.IO.Path.GetFileName(path);
                                        }
                                    }

                                    if (String.IsNullOrEmpty(strImageName) || foundMetadata.EmbeddedImages.ContainsKey(strImageName))
                                    {
                                        strImageName = "Image" + imagesFound++;
                                    }


                                    List <int> lstJPEG = Functions.SearchBytesInBytes(bufferPIC, new byte[] { 0xFF, 0xD8, 0xFF });
                                    if (lstJPEG.Count > 0)
                                    {
                                        using (MemoryStream msJPG = new MemoryStream(bufferPIC, lstJPEG[0], bufferPIC.Length - lstJPEG[0]))
                                        {
                                            using (EXIFDocument eDoc = new EXIFDocument(msJPG, ".jpg"))
                                            {
                                                FileMetadata exifMetadata = eDoc.AnalyzeFile();
                                                foundMetadata.EmbeddedImages.Add(strImageName, exifMetadata);
                                                this.foundMetadata.AddRange(exifMetadata.Users.ToArray());
                                                this.foundMetadata.AddRange(exifMetadata.Applications.ToArray());
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }