private void GetImagesPpt(OleDocument doc) { using (Stream stmPictures = doc.OpenStream("Pictures")) { if (stmPictures == null) { return; } int ImagesFound = 0; stmPictures.Seek(0, SeekOrigin.Begin); while (stmPictures.Position < stmPictures.Length - 0x19) { stmPictures.Seek(0x4, SeekOrigin.Current); BinaryReader brData = new BinaryReader(stmPictures); UInt32 PICLength = brData.ReadUInt32(); if (PICLength == 0 || stmPictures.Position + PICLength > stmPictures.Length) { break; } byte[] bufferPIC = brData.ReadBytes((int)PICLength); string strImageName = "Image" + ImagesFound++; using (MemoryStream msJPG = new MemoryStream(bufferPIC, 0x11, bufferPIC.Length - 0x11)) { EXIFDocument eDoc = new EXIFDocument(msJPG, ".jpg"); eDoc.analyzeFile(); eDoc.Close(); if (eDoc.Thumbnail != null) { lon += eDoc.Thumbnail.Length; } cont++; System.Diagnostics.Debug.WriteLine(cont.ToString()); System.Diagnostics.Debug.WriteLine(lon / (1024 * 1024) + " Megacas"); dicPictureEXIF.Add(strImageName, eDoc); foreach (UserItem uiEXIF in eDoc.FoundUsers.Items) { FoundUsers.AddUniqueItem(uiEXIF.Name, false, uiEXIF.Notes); } foreach (ApplicationsItem Application in eDoc.FoundMetaData.Applications.Items) { string strApplication = Application.Name; if (!string.IsNullOrEmpty(strApplication.Trim()) && !FoundMetaData.Applications.Items.Any(A => A.Name == strApplication.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strApplication.Trim())); } } } } } }
public override void analyzeFile() { try { using (Package pZip = Package.Open(stm)) { Uri uriFile = new Uri("/docProps/core.xml", UriKind.Relative); if (pZip.PartExists(uriFile)) { PackagePart pDocument = pZip.GetPart(uriFile); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeFileCore(stmDoc); } } uriFile = new Uri("/docProps/app.xml", UriKind.Relative); if (pZip.PartExists(uriFile)) { PackagePart pDocument = pZip.GetPart(uriFile); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeFileApp(stmDoc); } } //Control de versiones if (strExtlo == ".docx") { uriFile = new Uri("/word/document.xml", UriKind.Relative); if (pZip.PartExists(uriFile)) { PackagePart pDocument = pZip.GetPart(uriFile); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeFileDocument(stmDoc); } } //Consulta el fichero settings para recuperar el idioma del documento if (FoundMetaData.Language == string.Empty) { uriFile = new Uri("/word/settings.xml", UriKind.Relative); if (pZip.PartExists(uriFile)) { PackagePart pDocument = pZip.GetPart(uriFile); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeFileSettings(stmDoc); } } } //Consulta el fichero document.xml.rels para obtener los links del documento uriFile = new Uri("/word/_rels/document.xml.rels", UriKind.Relative); if (pZip.PartExists(uriFile)) { PackagePart pDocument = pZip.GetPart(uriFile); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeLinks(stmDoc); } } } //Obtiene el nombre de las impresoras y los links de los documentos xlsx else if (strExtlo == ".xlsx") { List <Uri> lstFiles = new List <Uri>(); foreach (PackagePart pp in pZip.GetParts()) { if (pp.Uri.ToString().StartsWith("/xl/printerSettings/printerSettings")) { PackagePart pDocument = pZip.GetPart(pp.Uri); if (pDocument != null) { char[] name = new char[32]; using (StreamReader sr = new StreamReader(pDocument.GetStream(FileMode.Open, FileAccess.Read), Encoding.Unicode)) { sr.Read(name, 0, 32); } FoundPrinters.AddUniqueItem(Functions.FilterPrinter((new string(name).Replace("\0", "")))); } } if (pp.Uri.ToString().StartsWith("/xl/worksheets/_rels/")) { PackagePart pDocument = pZip.GetPart(pp.Uri); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeLinks(stmDoc); } } } } else if (strExtlo == ".pptx") { List <Uri> lstFiles = new List <Uri>(); foreach (PackagePart pp in pZip.GetParts()) { if (pp.Uri.ToString().StartsWith("/ppt/slides/_rels/")) { PackagePart pDocument = pZip.GetPart(pp.Uri); using (Stream stmDoc = pDocument.GetStream(FileMode.Open, FileAccess.Read)) { analizeLinks(stmDoc); } } } } //Extraer información EXIF de cada imagen foreach (PackagePart pp in pZip.GetParts()) { string strFileName = pp.Uri.ToString(); string strFileNameLo = strFileName.ToLower(); //Filtro que se queda con todas las imagenes *.jpg y *.jpeg de las 3 posibles carpetas if ((strFileNameLo.StartsWith("/word/media/") || strFileNameLo.StartsWith("/ppt/media/") || strFileNameLo.StartsWith("/xl/media/")) && (strFileNameLo.EndsWith(".jpg") || strFileNameLo.EndsWith(".jpeg"))) { EXIFDocument eDoc = new EXIFDocument(pp.GetStream(FileMode.Open, FileAccess.Read), Path.GetExtension(strFileNameLo)); eDoc.analyzeFile(); dicPictureEXIF.Add(Path.GetFileName(strFileName), eDoc); //Copiamos los metadatos sobre usuarios y Applications de la imagen al documento foreach (UserItem uiEXIF in eDoc.FoundUsers.Items) { FoundUsers.AddUniqueItem(uiEXIF.Name, false, uiEXIF.Notes); } foreach (ApplicationsItem Application in eDoc.FoundMetaData.Applications.Items) { string strApplication = Application.Name; if (!FoundMetaData.Applications.Items.Any(A => A.Name == strApplication.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strApplication.Trim())); } } } } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.ToString()); } }
public static MetaExtractor Create(string extension, Stream file) { if (String.IsNullOrWhiteSpace(extension)) { throw new ArgumentNullException(nameof(extension)); } if (file == null) { throw new ArgumentNullException(nameof(file)); } MetaExtractor document = null; switch (extension.ToLowerInvariant().Trim()) { case ".sxw": case ".odt": case ".ods": case ".odg": case ".odp": document = new OpenOfficeDocument(file, extension); break; case ".docx": case ".xlsx": case ".pptx": case ".ppsx": document = new OfficeOpenXMLDocument(file, extension); break; case ".doc": case ".xls": case ".ppt": case ".pps": document = new Office972003(file); break; case ".pdf": document = new PDFDocument(file); break; case ".wpd": document = new WPDDocument(file); break; case ".raw": case ".cr2": case ".crw": case ".jpg": case ".jpeg": document = new EXIFDocument(file, extension); break; case ".svg": case ".svgz": document = new SVGDocument(file); break; case ".indd": document = new InDDDocument(file); break; case ".rdp": document = new RDPDocument(file); break; case ".ica": document = new ICADocument(file); break; default: throw new ArgumentException("Extension not allowed", nameof(extension)); } return(document); }
/// <summary> /// Extrae los metadatos del documento /// </summary> public override void analyzeFile() { try { using (ZipFile zip = ZipFile.Read(stm)) { string strFile = "meta.xml"; if (zip.EntryFileNames.Contains(strFile)) { using (Stream stmXML = new MemoryStream()) { zip.Extract(strFile, stmXML); stmXML.Seek(0, SeekOrigin.Begin); analizeFileMeta(stmXML); } } strFile = "settings.xml"; if (zip.EntryFileNames.Contains(strFile)) { using (Stream stmXML = new MemoryStream()) { zip.Extract(strFile, stmXML); stmXML.Seek(0, SeekOrigin.Begin); analizeFileSettings(stmXML); } } strFile = "content.xml"; if (zip.EntryFileNames.Contains(strFile)) { using (Stream stmXML = new MemoryStream()) { zip.Extract(strFile, stmXML); stmXML.Seek(0, SeekOrigin.Begin); analizeFileContent(stmXML); } } strFile = "VersionList.xml"; if (zip.EntryFileNames.Contains(strFile)) { using (Stream stmXML = new MemoryStream()) { zip.Extract(strFile, stmXML); stmXML.Seek(0, SeekOrigin.Begin); analizeFileVersionList(stmXML, zip); } } //Extrae inforamción EXIF de las imágenes embebidas en el documento foreach (string strFileName in zip.EntryFileNames) { string strFileNameLo = strFileName.ToLower(); //Filtro que obtiene las imagenes *.jpg, *.jpeg dentro de la carpeta "Pictures/" if (strFileNameLo.StartsWith("pictures/") && (strFileNameLo.EndsWith(".jpg") || strFileNameLo.EndsWith(".jpeg"))) { using (Stream stmXML = new MemoryStream()) { zip.Extract(strFileName, stmXML); stmXML.Seek(0, SeekOrigin.Begin); EXIFDocument eDoc = new EXIFDocument(stmXML, Path.GetExtension(strFileNameLo)); eDoc.analyzeFile(); //Añadimos al diccionario la imagen encontrada junto con la información EXIF de la misma dicPictureEXIF.Add(Path.GetFileName(strFileName), eDoc); //Los usuarios de la información EXIF se añaden a los usuarios del documento foreach (UserItem uiEXIF in eDoc.FoundUsers.Items) { FoundUsers.AddUniqueItem(uiEXIF.Name, false, "EXIF"); } //Añadir el software encontrado en la información EXIF al software usado para generar el documento foreach (ApplicationsItem Application in eDoc.FoundMetaData.Applications.Items) { string strApplication = Application.Name; if (!FoundMetaData.Applications.Items.Any(A => A.Name == strApplication.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strApplication.Trim())); } } } } } } //Buscamos usuarios en las rutas del documento foreach (PathsItem ri in FoundPaths.Items) { string strUser = PathAnalysis.ExtractUserFromPath(ri.Path); if (!string.IsNullOrEmpty(strUser)) { FoundUsers.AddUniqueItem(strUser, ri.IsComputerFolder, "Path: " + ri.Path); } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error analyzing OpenOffice document ({0})", e.ToString())); } }
public static MetaExtractor Create(string extension, Stream file) { if (file == null) { throw new ArgumentNullException(nameof(file)); } string normalizedExtension = NormalizeExtension(extension); if (IsSupportedExtension(normalizedExtension)) { MetaExtractor document = null; switch (normalizedExtension) { case ".sxw": case ".odt": case ".ods": case ".odg": case ".odp": document = new OpenOfficeDocument(file, extension); break; case ".docx": case ".xlsx": case ".pptx": case ".ppsx": document = new OfficeOpenXMLDocument(file, extension); break; case ".doc": case ".xls": case ".ppt": case ".pps": document = new Office972003(file); break; case ".pdf": document = new PDFDocument(file); break; case ".wpd": document = new WPDDocument(file); break; case ".raw": case ".cr2": case ".crw": case ".jpg": case ".jpeg": document = new EXIFDocument(file, extension); break; case ".svg": case ".svgz": document = new SVGDocument(file); break; case ".indd": document = new InDDDocument(file); break; case ".rdp": document = new RDPDocument(file); break; case ".ica": document = new ICADocument(file); break; default: throw new ArgumentException("Extension not supported", nameof(extension)); } return(document); } else { throw new ArgumentException("Extension not supported", nameof(extension)); } }
private void GetImagesDoc(OleDocument doc) { using (Stream WordDocument = doc.OpenStream("WordDocument")) { using (Stream stmData = doc.OpenStream("Data")) { if (WordDocument == null || stmData == null) { return; } WordDocument.Seek(0x18, SeekOrigin.Begin); BinaryReader br = new BinaryReader(WordDocument); Int32 fcMin = br.ReadInt32(); Int32 fcMac = br.ReadInt32(); Int32 FKPStart = fcMac % 0x200 == 0 ? fcMac : (fcMac - fcMac % 0x200) + 0x200; WordDocument.Seek(FKPStart, SeekOrigin.Begin); int ImagesFound = 0; while (WordDocument.Position + 0x200 < WordDocument.Length) { byte[] FKP = br.ReadBytes(0x200); if (FKP[0x1FF] == 00) { break; } foreach (int offset in Functions.SearchBytesInBytes(FKP, new byte[] { 0x03, 0x6A })) { if (offset < 0x200 - 5) { int PICOffset = FKP[offset + 5] * 0x1000000 + FKP[offset + 4] * 0x10000 + FKP[offset + 3] * 0x100 + FKP[offset + 2]; if (PICOffset >= 0 && PICOffset < stmData.Length) { stmData.Seek(PICOffset, SeekOrigin.Begin); BinaryReader brData = new BinaryReader(stmData); UInt32 PICLength = brData.ReadUInt32(); long posOri = stmData.Position; int bufferLen = PICLength < stmData.Length - stmData.Position ? (int)PICLength - 4 : (int)(stmData.Length - stmData.Position); if (bufferLen == 0) { continue; } byte[] bufferPIC = brData.ReadBytes(bufferLen); string strImageName = "Image" + ImagesFound++; using (StreamReader sr = new StreamReader(new MemoryStream(bufferPIC), Encoding.Unicode)) { String sRead = sr.ReadToEnd(); foreach (Match m in Regex.Matches(sRead, @"([a-z]:|\\)\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+", RegexOptions.IgnoreCase)) { String path = m.Value.Trim(); FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(path), true); strImageName = Path.GetFileName(path); } } List <int> lstJPEG = Functions.SearchBytesInBytes(bufferPIC, new byte[] { 0xFF, 0xD8 }); if (lstJPEG.Count > 0) { using (MemoryStream msJPG = new MemoryStream(bufferPIC, lstJPEG[0], bufferPIC.Length - lstJPEG[0])) { EXIFDocument eDoc = new EXIFDocument(msJPG, ".jpg"); eDoc.analyzeFile(); dicPictureEXIF.Add(strImageName, eDoc); foreach (UserItem uiEXIF in eDoc.FoundUsers.Items) { FoundUsers.AddUniqueItem(uiEXIF.Name, false, uiEXIF.Notes); } foreach (ApplicationsItem Application in eDoc.FoundMetaData.Applications.Items) { string strApplication = Application.Name; if (!string.IsNullOrEmpty(strApplication.Trim()) && !FoundMetaData.Applications.Items.Any(A => A.Name == strApplication.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strApplication.Trim())); } } eDoc.Close(); } } } } } } } } }