private void AnalizeFileContent(Stream stm) { try { XmlDocument doc = new XmlDocument(); doc.XmlResolver = null; doc.Load(stm); XmlNodeList xnl = doc.GetElementsByTagName("text:a"); if (xnl != null) { List <String> links = new List <String>(); foreach (XmlNode xn in xnl) { String href = xn.Attributes.GetNamedItem("xlink:href").Value; if (href != string.Empty && this.IsInterestingLink(href)) { links.Add(href); } } if (links.Count != 0) { foreach (String link in links) { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(link), true));//false); } } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error reading file content.xml ({0}).", e.ToString())); } }
private IEnumerable <Diagrams.Path> BinarySearchLinks(Stream stm) { stm.Seek(0, SeekOrigin.Begin); StreamReader sr = new StreamReader(stm); String sRead = sr.ReadToEnd(); List <string> links = new List <string>(); foreach (Match m in Regex.Matches(sRead, @"http://[^)]*", RegexOptions.IgnoreCase)) { String href = m.Value.Trim(); if (IsInterestingLink(href)) { if (!links.Contains(href)) { links.Add(href); } } } foreach (Match m in Regex.Matches(sRead, @"file:///[^)]*", RegexOptions.IgnoreCase)) { if (!links.Contains(m.Value)) { links.Add(m.Value); } } if (links.Count != 0) { foreach (String link in links) { yield return(new Diagrams.Path(PathAnalysis.CleanPath(link), true)); } } }
private void AnalizarTitulo() { if (FoundMetaData.Title != null) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(FoundMetaData.Title), true); } }
private void AnalizarPlantilla() { if (FoundMetaData.Template != null && FoundMetaData.Template.Trim().Length > 1) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(FoundMetaData.Template), false); } }
private void GetPathPpt(OleDocument doc) { using (var WordDocument = doc.OpenStream("PowerPoint Document")) { if (WordDocument == null) { return; } try { WordDocument.Seek(0, SeekOrigin.Begin); using (var sr = new StreamReader(doc.OpenStream("PowerPoint Document"), Encoding.Unicode)) { foreach (Match m in Regex.Matches(sr.ReadToEnd(), @"([a-z]:|\\)\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+", RegexOptions.IgnoreCase)) { string path = m.Value.Trim(); FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(path), true); } } } catch (Exception) { } } }
private void SearchForLinksAndEmails(string fileInString) { foreach (Match match in emailsAndLinksRegex.Matches(fileInString)) { String valueFound = match.Groups["value"].Value; if (valueFound.Contains("mailto")) { string email = valueFound.Remove(0, "mailto:".Length); if (!this.foundMetadata.Emails.Any(x => x.Value == email)) { this.foundMetadata.Emails.Add(new Email(email)); } } else { string link = PathAnalysis.CleanPath(valueFound); if (!this.foundMetadata.Paths.Any(x => x.Value == link)) { this.foundMetadata.Paths.Add(new Diagrams.Path(link, false)); } } } }
private void AnalizarTitulo() { if (this.foundMetadata.Title != null) { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(this.foundMetadata.Title), true)); } }
private void AnalizarPlantilla() { if (this.foundMetadata.Template != null && this.foundMetadata.Template.Trim().Length > 1) { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(this.foundMetadata.Template), false)); } }
public override FileMetadata AnalyzeFile() { try { this.foundMetadata = new FileMetadata(); if (IsWPD(this.fileStream)) { long entryPoint = 0; MetadataType tipo; while ((entryPoint = EntryPointString(this.fileStream, out tipo)) > -1) { this.fileStream.Seek(entryPoint + 2, SeekOrigin.Begin); var aux = ReadBinaryString16(this.fileStream); if (!IsPossibleString(aux)) { continue; } if (tipo == MetadataType.Unknown && !PathAnalysis.IsValidPath(aux)) { if (aux.ToLower().Contains("jet") || aux.ToLower().Contains("printer") || aux.ToLower().Contains("hp") || aux.ToLower().Contains("series") || aux.ToLower().Contains("canon") || aux.ToLower().Contains("laser") || aux.ToLower().Contains("epson") || aux.ToLower().Contains("lj") || aux.ToLower().Contains("lexmark") || aux.ToLower().Contains("xerox") || aux.ToLower().Contains("sharp")) { this.foundMetadata.Add(new Printer(Functions.FilterPrinter(aux))); } else if (aux.ToLower().Contains("acrobat") || aux.ToLower().Contains("adobe") || aux.ToLower().Contains("creator") || aux.ToLower().Contains("writer") || aux.ToLower().Contains("pdf") || aux.ToLower().Contains("converter")) { this.foundMetadata.Add(new Application(Functions.FilterPrinter(Analysis.ApplicationAnalysis.GetApplicationsFromString(aux)))); } } else { var strPath = Functions.GetPathFolder(aux); if (!PathAnalysis.IsValidPath(strPath)) { continue; } this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(strPath), true)); var strUser = PathAnalysis.ExtractUserFromPath(strPath); if (!string.IsNullOrEmpty(strUser)) { this.foundMetadata.Add(new User(strUser, true)); } } } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(e.ToString()); } return(this.foundMetadata); }
public override FileMetadata AnalyzeFile() { XmlTextReader avgReader = null; try { this.foundMetadata = new FileMetadata(); avgReader = new XmlTextReader(this.fileStream) { XmlResolver = null }; avgReader.Read(); while (avgReader.Read()) { // node's value, example: <a>/home/user/file</a> if (CheckPath(avgReader.Value)) { var cleanPath = PathAnalysis.CleanPath(avgReader.Value); var user = PathAnalysis.ExtractUserFromPath(cleanPath); if (user != string.Empty) { this.foundMetadata.Add(new User(user, true)); } this.foundMetadata.Add(new Diagrams.Path(cleanPath, true)); } while (avgReader.MoveToNextAttribute()) { // attribute's value, example: <a atrib="/home/user/file"/> if (!CheckPath(avgReader.Value)) { continue; } var cleanPath = PathAnalysis.CleanPath(avgReader.Value); var user = PathAnalysis.ExtractUserFromPath(cleanPath); if (user != string.Empty) { this.foundMetadata.Add(new User(user, true)); } this.foundMetadata.Add(new Diagrams.Path(cleanPath, true)); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine("error: " + ex.Message); } finally { avgReader?.Close(); } return(this.foundMetadata); }
public override void analyzeFile() { XmlTextReader avgReader = null; try { avgReader = new XmlTextReader(this.stm) { XmlResolver = null }; avgReader.Read(); while (avgReader.Read()) { // node's value, example: <a>/home/user/file</a> if (CheckPath(avgReader.Value)) { var cleanPath = PathAnalysis.CleanPath(avgReader.Value); var user = PathAnalysis.ExtractUserFromPath(cleanPath); if (user != string.Empty) { FoundUsers.AddUniqueItem(user, true); } FoundPaths.AddUniqueItem(cleanPath, true); } while (avgReader.MoveToNextAttribute()) { // attribute's value, example: <a atrib="/home/user/file"/> if (!CheckPath(avgReader.Value)) { continue; } var cleanPath = PathAnalysis.CleanPath(avgReader.Value); var user = PathAnalysis.ExtractUserFromPath(cleanPath); if (user != string.Empty) { FoundUsers.AddUniqueItem(user, true); } FoundPaths.AddUniqueItem(cleanPath, true); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine("error: " + ex.Message); } finally { avgReader?.Close(); } }
private IEnumerable <Diagrams.Path> SearchForPaths(String fileInString) { foreach (Match m in pathRegex.Matches(fileInString)) { String path = m.Value.Trim(); if (path.Contains(")")) { path = path.Remove(path.IndexOf(')') - 1); } path = path.Replace(@"\\", @"\"); path = path.Replace("\\\r", ""); path = path.Replace("\\\n", ""); yield return(new Diagrams.Path(PathAnalysis.CleanPath(path), true)); } }
private void GetLinksBinaryWorkbook(Stream document) { if (document == null) { return; } document.Seek(0, SeekOrigin.Begin); using (var sr = new StreamReader(document, Encoding.ASCII)) { foreach (Match m in Regex.Matches(sr.ReadToEnd(), @"http(s)?://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?", RegexOptions.IgnoreCase)) { string link = m.Value.Trim(); if (IsInterestingLink(link)) { if (string.IsNullOrEmpty(link)) { continue; } string aux = link; aux = aux.Trim(new char[] { (char)18 }); if (!link.EndsWith("/")) { int cuentaSlash = 0; for (int i = 0; i < aux.Length; i++) { if (aux[i] == '/') { cuentaSlash++; } } if (cuentaSlash == 2) { aux += "/"; } } aux = PathAnalysis.CleanPath(aux); FoundPaths.AddUniqueItem(aux, true); } } } }
private void GetRelatedDocuments(OleDocument doc) { using (Stream WordDocument = doc.OpenStream("WordDocument")) { if (WordDocument == null) { return; } BinaryReader br = new BinaryReader(WordDocument); WordDocument.Seek(0xB, SeekOrigin.Begin); Byte tipo = br.ReadByte(); WordDocument.Seek(0x19A, SeekOrigin.Begin); UInt32 dir = br.ReadUInt32(); UInt32 tam = br.ReadUInt32(); if (tam > 8) { Stream table = doc.OpenStream((tipo & 2) == 2 ? "1Table" : "0Table"); BinaryReader br1 = new BinaryReader(table); table.Seek(dir, SeekOrigin.Begin); bool unicode = br1.ReadUInt16() == 0xFFFF; int nro_strings = br1.ReadInt16(); int len_extradata = br1.ReadInt32(); if (nro_strings > 0) { int strSize = br1.ReadInt16(); string ruta; if (unicode) { Byte[] cadena = br1.ReadBytes(strSize * 2); ruta = Encoding.Unicode.GetString(cadena).Replace('\0', ' '); } else { Byte[] cadena = br1.ReadBytes(strSize); ruta = Encoding.Default.GetString(cadena).Replace('\0', ' '); } FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(ruta), true); } table.Close(); } } }
private IEnumerable <Diagrams.Path> BinarySearchPaths(Stream stm) { stm.Seek(0, SeekOrigin.Begin); StreamReader sr = new StreamReader(stm); String sRead = sr.ReadToEnd(); foreach (Match m in Regex.Matches(sRead, @"([a-z]:|\\\\)\\\\(([a-z0-9\s\-_\$&()ñÇ/n/r]+)\\\\)*[a-z0-9\s,;.\-_\$%&()=ñ{}Ç/n/r+@]+", RegexOptions.IgnoreCase)) { String path = m.Value.Trim(); if (path.Contains(")")) { path = path.Remove(path.IndexOf(')') - 1); } path = path.Replace(@"\\", @"\"); path = path.Replace("\\\r", ""); path = path.Replace("\\\n", ""); yield return(new Diagrams.Path(PathAnalysis.CleanPath(path), true)); } }
private void GetLinksWordDocument(Stream document) { if (document == null) { return; } document.Seek(0, SeekOrigin.Begin); using (var sr = new StreamReader(document, Encoding.ASCII)) { foreach (Match m in Regex.Matches(sr.ReadToEnd(), "\"((ftp|http|https|ldap|mailto|ftp|telnet)://[^\"]*)" /*dominio + @"/[a-z\/\.]*)"*/, RegexOptions.IgnoreCase)) { string link = m.Groups[1].Value.Trim(); if (IsInterestingLink(link)) { if (string.IsNullOrEmpty(link)) { continue; } string aux = link; if (!link.EndsWith("/")) { int cuentaSlash = 0; for (int i = 0; i < aux.Length; i++) { if (aux[i] == '/') { cuentaSlash++; } } if (cuentaSlash == 2) { aux += "/"; } } aux = PathAnalysis.CleanPath(aux); FoundPaths.AddUniqueItem(aux, true); } } } }
private void GetLinksBinaryPowerPointDocument(Stream document) { if (document == null) { return; } document.Seek(0, SeekOrigin.Begin); using (var sr = new StreamReader(document, Encoding.Unicode)) { foreach (Match m in Regex.Matches(sr.ReadToEnd(), @"http(s)?://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?", RegexOptions.IgnoreCase)) { string link = m.Value.Trim(); if (IsInterestingLink(link)) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(link), true); } } } }
/// <summary> /// Extract metadata. /// </summary> public override void analyzeFile() { try { using (StreamReader sr = new StreamReader(stm)) { String sRead = sr.ReadToEnd(); foreach (Match m in Regex.Matches(sRead, @"@([a-z]:|\\)\\(([a-z0-9\s\-_\$&()ñÇ/n/r]+)\\)*[a-z0-9\s,;.\-_\$%&()=ñ{}Ç/n/r+@]+", RegexOptions.IgnoreCase)) { String path = m.Value.Trim(); path = path.Substring(1); FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(path), true); } foreach (Match m in Regex.Matches(sRead, @"winspool\0([a-z]:|\\)\\(([a-z0-9\s\-_\$&()ñÇ/n/r]+)\\)*[a-z0-9\s,;.\-_\$%&()=ñ{}Ç/n/r+@]+", RegexOptions.IgnoreCase)) { String printer = m.Value.Trim(); printer = printer.Substring(9); FoundPrinters.AddUniqueItem(Functions.FilterPrinter(printer)); } foreach (Match m in Regex.Matches(sRead, @"<x:xmpmeta[^\0]*</x:xmpmeta>", RegexOptions.IgnoreCase)) { String xmp = m.Value.Trim(); ReadXMPMetadata(xmp); } foreach (Match m in Regex.Matches(sRead, @"<rdf:RDF[^\0]*</rdf:RDF>", RegexOptions.IgnoreCase)) { String xmp = m.Value.Trim(); ReadXMPMetadata(xmp); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.ToString()); } finally { this.stm.Close(); this.stm = null; } }
/// <summary> /// Extract metadata. /// </summary> public override FileMetadata AnalyzeFile() { try { this.foundMetadata = new FileMetadata(); using (StreamReader sr = new StreamReader(this.fileStream)) { String sRead = sr.ReadToEnd(); foreach (Match m in Regex.Matches(sRead, @"@([a-z]:|\\)\\(([a-z0-9\s\-_\$&()ñÇ/n/r]+)\\)*[a-z0-9\s,;.\-_\$%&()=ñ{}Ç/n/r+@]+", RegexOptions.IgnoreCase)) { String path = m.Value.Trim(); path = path.Substring(1); this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(path), true)); } foreach (Match m in Regex.Matches(sRead, @"winspool\0([a-z]:|\\)\\(([a-z0-9\s\-_\$&()ñÇ/n/r]+)\\)*[a-z0-9\s,;.\-_\$%&()=ñ{}Ç/n/r+@]+", RegexOptions.IgnoreCase)) { String printer = m.Value.Trim(); printer = printer.Substring(9); this.foundMetadata.Add(new Printer(Functions.FilterPrinter(printer))); } foreach (Match m in Regex.Matches(sRead, @"<x:xmpmeta[^\0]*</x:xmpmeta>", RegexOptions.IgnoreCase)) { String xmp = m.Value.Trim(); ReadXMPMetadata(xmp); } foreach (Match m in Regex.Matches(sRead, @"<rdf:RDF[^\0]*</rdf:RDF>", RegexOptions.IgnoreCase)) { String xmp = m.Value.Trim(); ReadXMPMetadata(xmp); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.ToString()); } return(this.foundMetadata); }
/// <summary> /// Search the XMP metadata /// </summary> /// <param name="doc">A open PdfDocument</param> public void ReadXMPMetadata(string xmp) { if (xmp != string.Empty) { System.Xml.XmlDocument xDoc = new System.Xml.XmlDocument(); xDoc.XmlResolver = null; xDoc.LoadXml(xmp); #region Metadatos como atributos XmlNodeList xnl = xDoc.GetElementsByTagName("rdf:Description"); /*foreach (XmlNode xn in xnl) * { * XmlAttribute xa; * /*xa= xn.Attributes["pdf:Creator"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = Analysis.ApplicationAnalysis.GetApplicationsFromString(xa.Value); * if (strValue.Trim() != string.Empty) * { * if (!FoundMetaData.Applications.Items.Any(A => A.Name == strValue.Trim())) * FoundMetaData.Applications.Items.Add(new ApplicationsItem(strValue.Trim())); * } * //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada * else * { * if (xa.Value.Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == xa.Value.Trim())) * { * FoundMetaData.Applications.Items.Add(new ApplicationsItem(xa.Value.Trim())); * } * } * }*/ /*xa = xn.["xap:MetadataDate"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * DateTime d; * if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) * { * //Si existe una fecha de creación anterior, sobreescribir * if (!FoundDates.CreationDateSpecified || FoundDates.CreationDate > d) * { * FoundDates.CreationDateSpecified = true; * FoundDates.CreationDate = d; * } * } * } * xa = xn.Attributes["xap:ModifyDate"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * DateTime d; * if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) * { * FoundDates.ModificationDateSpecified = true; * FoundDates.ModificationDate = d; * } * } * /*xa = xn.Attributes["pdf:Title"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * if (string.IsNullOrEmpty(FoundMetaData.Title) || FoundMetaData.Title.Length < strValue.Length) * FoundMetaData.Title = strValue; * } * xa = xn.Attributes["pdf:Author"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * FoundUsers.AddUniqueItem(xa.Value, true); * xa = xn.Attributes["pdf:Producer"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = Analysis.ApplicationAnalysis.GetApplicationsFromString(xa.Value); * if (strValue.Trim() != string.Empty) * { * if (!FoundMetaData.Applications.Items.Any(A => A.Name == strValue.Trim())) * FoundMetaData.Applications.Items.Add(new ApplicationsItem(strValue.Trim())); * } * //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada * else * { * if (xa.Value.Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == xa.Value.Trim())) * { * FoundMetaData.Applications.Items.Add(new ApplicationsItem(xa.Value.Trim())); * } * } * } * xa = xn.Attributes["pdf:ModDate"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * DateTime d; * if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) * { * FoundDates.ModificationDateSpecified = true; * FoundDates.ModificationDate = d; * } * } * xa = xn.Attributes["xap:CreateDate"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * DateTime d; * if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) * { * //Si existe una fecha de creación anterior, sobreescribir * if (!FoundDates.CreationDateSpecified || FoundDates.CreationDate > d) * { * //Si existe una fecha de modificación posterior, sobreescribir * if (!FoundDates.ModificationDateSpecified || FoundDates.ModificationDate < d) * { * FoundDates.CreationDateSpecified = true; * FoundDates.CreationDate = d; * } * } * } * } * xa = xn.Attributes["xap:Title"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * //Si ya existe un título y es mas pequeño, sobreescribirle. * if ((string.IsNullOrEmpty(FoundMetaData.Title) || FoundMetaData.Title.Length < strValue.Length)) * FoundMetaData.Title = strValue; * } * xa = xn.Attributes["xap:Author"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * FoundUsers.AddUniqueItem(xa.Value, true); * xa = xn.Attributes["xap:ModifyDate"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * DateTime d; * if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) * { * //Si existe una fecha de modificación posterior, sobreescribir * if (!FoundDates.ModificationDateSpecified || FoundDates.ModificationDate < d) * { * FoundDates.ModificationDateSpecified = true; * FoundDates.ModificationDate = d; * } * } * } * xa = xn.Attributes["xap:CreatorTool"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = Analysis.ApplicationAnalysis.GetApplicationsFromString(xa.Value); * if (strValue.Trim() != string.Empty) * { * if (!FoundMetaData.Applications.Items.Any(A => A.Name == strValue.Trim())) * FoundMetaData.Applications.Items.Add(new ApplicationsItem(strValue.Trim())); * } * //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada * else * { * if (xa.Value.Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == xa.Value.Trim())) * { * FoundMetaData.Applications.Items.Add(new ApplicationsItem(xa.Value.Trim())); * } * } * } * //xap:MetadataDate, fecha en la que se añadieron los metadatos * xa = xn.Attributes["dc:title"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * //Si ya existe un título y es mas pequeño, sobreescribirle. * if (string.IsNullOrEmpty(FoundMetaData.Title) || FoundMetaData.Title.Length < strValue.Length) * FoundMetaData.Title = strValue; * } * xa = xn.Attributes["dc:creator"]; * if (xa != null && !string.IsNullOrEmpty(xa.Value)) * { * string strValue = xa.Value; * if (!string.IsNullOrEmpty(strValue)) * FoundUsers.AddUniqueItem(strValue, true); * } * }*/ #endregion #region Metadatos como nodos independientes xnl = xDoc.GetElementsByTagName("pdf:Creator"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = Analysis.ApplicationAnalysis.GetApplicationsFromString(xnl[0].FirstChild.Value); if (strValue.Trim() != string.Empty) { if (!FoundMetaData.Applications.Items.Any(A => A.Name == strValue.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strValue.Trim())); } } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else { if (xnl[0].FirstChild.Value.Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == xnl[0].FirstChild.Value.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(xnl[0].FirstChild.Value.Trim())); } } } xnl = xDoc.GetElementsByTagName("pdf:CreationDate"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = xnl[0].FirstChild.Value; DateTime d; if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) { //Si existe una fecha de creación anterior, sobreescribir if (!FoundDates.CreationDateSpecified || FoundDates.CreationDate > d) { //Si existe una fecha de modificación posterior, sobreescribir if (!FoundDates.ModificationDateSpecified || FoundDates.ModificationDate < d) { FoundDates.CreationDateSpecified = true; FoundDates.CreationDate = d; } } } } xnl = xDoc.GetElementsByTagName("xap:CreateDate"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = xnl[0].FirstChild.Value; DateTime d; if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) { //Si existe una fecha de creación anterior, sobreescribir if (!FoundDates.CreationDateSpecified || FoundDates.CreationDate > d) { //Si existe una fecha de modificación posterior, sobreescribir if (!FoundDates.ModificationDateSpecified || FoundDates.ModificationDate < d) { FoundDates.CreationDateSpecified = true; FoundDates.CreationDate = d; } } } } xnl = xDoc.GetElementsByTagName("xap:MetadataDate"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = xnl[0].FirstChild.Value; DateTime d; if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) { //Si existe una fecha de creación anterior, sobreescribir if (!FoundDates.CreationDateSpecified || FoundDates.CreationDate > d) { FoundDates.CreationDateSpecified = true; FoundDates.CreationDate = d; } } } xnl = xDoc.GetElementsByTagName("xap:Title"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { XmlNode xn = xnl[0].FirstChild; //Busca el primer subnodo con valor while (xn.Value == null && xn.HasChildNodes) { xn = xn.FirstChild; } if (!string.IsNullOrEmpty(xn.Value)) { string strValue = xn.Value; //Si ya existe un título y es mas pequeño, sobreescribirle. if ((string.IsNullOrEmpty(FoundMetaData.Title) || FoundMetaData.Title.Length < strValue.Length)) { FoundMetaData.Title = strValue; } } } xnl = xDoc.GetElementsByTagName("xap:Author"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { FoundUsers.AddUniqueItem(xnl[0].FirstChild.Value, true, "xap:Author"); } xnl = xDoc.GetElementsByTagName("pdf:ModDate"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = xnl[0].FirstChild.Value; DateTime d; if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) { //Si existe una fecha de modificación posterior, sobreescribir if (!FoundDates.ModificationDateSpecified || FoundDates.ModificationDate < d) { FoundDates.ModificationDateSpecified = true; FoundDates.ModificationDate = d; } } } xnl = xDoc.GetElementsByTagName("xap:ModifyDate"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = xnl[0].FirstChild.Value; DateTime d; if (DateTime.TryParse(strValue.Replace('T', ' ').Replace('Z', ' '), out d)) { //Si existe una fecha de modificación posterior, sobreescribir if (!FoundDates.ModificationDateSpecified || FoundDates.ModificationDate < d) { FoundDates.ModificationDateSpecified = true; FoundDates.ModificationDate = d; } } } xnl = xDoc.GetElementsByTagName("xap:CreatorTool"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { string strValue = Analysis.ApplicationAnalysis.GetApplicationsFromString(xnl[0].FirstChild.Value); if (strValue.Trim() != string.Empty) { if (!FoundMetaData.Applications.Items.Any(A => A.Name == strValue.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strValue.Trim())); } } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else { if (xnl[0].FirstChild.Value.Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == xnl[0].FirstChild.Value.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(xnl[0].FirstChild.Value.Trim())); } } } xnl = xDoc.GetElementsByTagName("dc:creator"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { XmlNode xn = xnl[0].FirstChild; //Busca el primer subnodo con valor while (xn.Value == null && xn.HasChildNodes) { xn = xn.FirstChild; } if (!string.IsNullOrEmpty(xn.Value)) { string strValue = xn.Value; FoundUsers.AddUniqueItem(strValue, true); } } xnl = xDoc.GetElementsByTagName("dc:title"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { XmlNode xn = xnl[0].FirstChild; //Busca el primer subnodo con valor while (xn.Value == null && xn.HasChildNodes) { xn = xn.FirstChild; } if (!string.IsNullOrEmpty(xn.Value)) { string strValue = xn.Value; //Si ya existe un título y es mas pequeño, sobreescribirle. if ((string.IsNullOrEmpty(FoundMetaData.Title) || FoundMetaData.Title.Length < strValue.Length)) { FoundMetaData.Title = strValue; } } } xnl = xDoc.GetElementsByTagName("stRef:lastURL"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes && !string.IsNullOrEmpty(xnl[0].FirstChild.Value)) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(xnl[0].FirstChild.Value), true); } #endregion } }
private void analizeFileSettings(Stream stm) { try { XmlDocument doc = new XmlDocument(); doc.XmlResolver = null; doc.Load(stm); XmlNodeList xnl = doc.GetElementsByTagName("config:config-item"); if (xnl != null) { foreach (XmlNode xn in xnl) { if (xn.Attributes.GetNamedItem("config:name").Value == "PrinterName") { if (xn.HasChildNodes) { this.foundMetadata.Add(new Printer(Functions.FilterPrinter(xn.FirstChild.Value))); } else if (xn.Attributes.GetNamedItem("config:name").Value == "CurrentDatabaseDataSource") { if (xn.HasChildNodes) { this.foundMetadata.DataBase = xn.FirstChild.Value; } } /* else if (xn.Attributes.GetNamedItem("config:name").Value == "PrintFaxName") * { * if (xn.HasChildNodes) * escritor("Fax: " + xn.FirstChild.Value, objeto); * } * else if (xn.Attributes.GetNamedItem("config:name").Value == "CurrentDatabaseCommandType") * { * if (!resumen) * if (xn.HasChildNodes && xn.FirstChild.Value != "0") * escritor("Current Database Command Type: " + xn.FirstChild.Value, objeto); * } * else if (xn.Attributes.GetNamedItem("config:name").Value == "CurrentDatabaseCommand") * { * if (!resumen) * if (xn.HasChildNodes) * escritor("Current Database Command: " + xn.FirstChild.Value, objeto); * }*/ //Solo aparecen en ficheros ODP y ODG file:/// else if (xn.Attributes.GetNamedItem("config:name").Value == "ColorTableURL") { if (xn.HasChildNodes) { if (PathAnalysis.CleanPath(xn.FirstChild.Value) != "$(user)/config/") { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.FirstChild.Value), true)); } } } else if (xn.Attributes.GetNamedItem("config:name").Value == "BitmapTableURL") { if (xn.HasChildNodes) { if (PathAnalysis.CleanPath(xn.FirstChild.Value) != "$(user)/config/") { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.FirstChild.Value), true)); } } } else if (xn.Attributes.GetNamedItem("config:name").Value == "DashTableURL") { if (xn.HasChildNodes) { if (PathAnalysis.CleanPath(xn.FirstChild.Value) != "$(user)/config/") { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.FirstChild.Value), true)); } } } else if (xn.Attributes.GetNamedItem("config:name").Value == "GradientTableURL") { if (xn.HasChildNodes) { if (PathAnalysis.CleanPath(xn.FirstChild.Value) != "$(user)/config/") { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.FirstChild.Value), true)); } } } else if (xn.Attributes.GetNamedItem("config:name").Value == "HatchTableURL") { if (xn.HasChildNodes) { if (PathAnalysis.CleanPath(xn.FirstChild.Value) != "$(user)/config/") { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.FirstChild.Value), true)); } } } else if (xn.Attributes.GetNamedItem("config:name").Value == "LineEndTableURL") { if (xn.HasChildNodes) { if (PathAnalysis.CleanPath(xn.FirstChild.Value) != "$(user)/config/") { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.FirstChild.Value), true)); } } } } } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error reading file settings.xml ({0}).", e.ToString())); } }
private void AnalizeFileMeta(Stream stm) { try { XmlDocument doc = new XmlDocument(); doc.XmlResolver = null; doc.Load(stm); XmlNodeList xnl = doc.GetElementsByTagName("meta:generator"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { if (strExtlo == ".odt" || strExtlo == ".ods" || strExtlo == ".odg" || strExtlo == ".odp") { if (xnl[0].FirstChild.Value.IndexOf('$') != -1 && xnl[0].FirstChild.Value.IndexOf(' ') != -1 && xnl[0].FirstChild.Value.IndexOf(' ') > xnl[0].FirstChild.Value.IndexOf('$')) { string strSoftware = xnl[0].FirstChild.Value.Remove(xnl[0].FirstChild.Value.IndexOf('$')) + " - " + xnl[0].FirstChild.Value.Substring(xnl[0].FirstChild.Value.IndexOf(' ') + 1, xnl[0].FirstChild.Value.Length - xnl[0].FirstChild.Value.IndexOf(' ') - 1); this.foundMetadata.Add(new Application(Analysis.ApplicationAnalysis.GetApplicationsFromString(strSoftware))); this.foundMetadata.OperatingSystem = xnl[0].FirstChild.Value.Substring(xnl[0].FirstChild.Value.IndexOf('$') + 1, xnl[0].FirstChild.Value.IndexOf(' ') - xnl[0].FirstChild.Value.IndexOf('$')).Trim(); } else { this.foundMetadata.Add(new Application(Analysis.ApplicationAnalysis.GetApplicationsFromString(xnl[0].FirstChild.Value))); } } else if (strExtlo == ".sxw") { if (xnl[0].FirstChild.Value.IndexOf(')') != -1 && xnl[0].FirstChild.Value.IndexOf('(') != -1 && xnl[0].FirstChild.Value.IndexOf(')') > xnl[0].FirstChild.Value.IndexOf('(')) { string strSoftware = xnl[0].FirstChild.Value.Remove(xnl[0].FirstChild.Value.IndexOf('(')); this.foundMetadata.Add(new Application(Analysis.ApplicationAnalysis.GetApplicationsFromString(strSoftware))); this.foundMetadata.OperatingSystem = xnl[0].FirstChild.Value.Substring(xnl[0].FirstChild.Value.IndexOf('(') + 1, xnl[0].FirstChild.Value.IndexOf(')') - xnl[0].FirstChild.Value.IndexOf('(') - 1); } else { this.foundMetadata.Add(new Application(Analysis.ApplicationAnalysis.GetApplicationsFromString(xnl[0].FirstChild.Value))); } } } xnl = doc.GetElementsByTagName("dc:creator"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Add(new User(xnl[0].FirstChild.Value, true, "dc:creator")); } xnl = doc.GetElementsByTagName("meta:printed-by"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Add(new User(xnl[0].FirstChild.Value, this.foundMetadata.Users.Count == 0, "meta:printed-by")); } xnl = doc.GetElementsByTagName("dc:initial-creator"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Add(new User(xnl[0].FirstChild.Value, this.foundMetadata.Users.Count == 0, "dc:initial-creator")); } xnl = doc.GetElementsByTagName("meta:initial-creator"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Add(new User(xnl[0].FirstChild.Value, this.foundMetadata.Users.Count == 0, "meta:initial-creator")); } xnl = doc.GetElementsByTagName("meta:creation-date"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace('T', ' '), out d)) { this.foundMetadata.Dates.CreationDate = d; } } xnl = doc.GetElementsByTagName("meta:date"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace('T', ' '), out d)) { this.foundMetadata.Dates.ModificationDate = d; } } xnl = doc.GetElementsByTagName("dc:date"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace('T', ' '), out d)) { this.foundMetadata.Dates.ModificationDate = d; } } xnl = doc.GetElementsByTagName("meta:print-date"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace('T', ' '), out d)) { this.foundMetadata.Dates.PrintingDate = d; } } xnl = doc.GetElementsByTagName("dc:language"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Language = xnl[0].FirstChild.Value; } xnl = doc.GetElementsByTagName("dc:title"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Title = xnl[0].FirstChild.Value; //Si el título es una ruta válida, agregar como una ruta del equipo this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(this.foundMetadata.Title), true)); } xnl = doc.GetElementsByTagName("dc:subject"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Subject = xnl[0].FirstChild.Value; } xnl = doc.GetElementsByTagName("dc:description"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { this.foundMetadata.Description = xnl[0].FirstChild.Value; } xnl = doc.GetElementsByTagName("meta:keyword"); if (xnl != null && xnl.Count != 0) { String keyWords = string.Empty; foreach (XmlNode xn in xnl) { if (xn.HasChildNodes) { keyWords += xn.FirstChild.Value + " "; } } this.foundMetadata.Keywords = keyWords; } xnl = doc.GetElementsByTagName("meta:editing-cycles"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { Decimal ediciones; if (Decimal.TryParse(xnl[0].FirstChild.Value, out ediciones)) { this.foundMetadata.VersionNumber = ediciones; } } xnl = doc.GetElementsByTagName("meta:editing-duration"); if (xnl != null && xnl.Count != 0 && xnl[0].HasChildNodes) { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace('T', ' ').Replace('P', ' '), out d)) { this.foundMetadata.EditTime = d.Ticks; } } xnl = doc.GetElementsByTagName("meta:user-defined"); if (xnl != null && xnl.Count != 0) { String Info = string.Empty; foreach (XmlNode xn in xnl) { if (xn.HasChildNodes) { Info += xn.Attributes.GetNamedItem("meta:name").Value + ": " + xn.FirstChild.Value + "|"; } } if (Info != string.Empty) { this.foundMetadata.UserInfo = Info; } } xnl = doc.GetElementsByTagName("meta:template"); if (xnl != null && xnl.Count != 0) { foreach (XmlNode xn in xnl) { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(xn.Attributes.GetNamedItem("xlink:href").Value), true)); } } xnl = doc.GetElementsByTagName("meta:document-statistic"); if (xnl != null && xnl.Count > 0) { StringBuilder statisticBuilder = new StringBuilder(); if (xnl[0].Attributes.GetNamedItem("meta:table-count") != null) { statisticBuilder.Append(" Tables: " + xnl[0].Attributes.GetNamedItem("meta:table-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:cell-count") != null) { statisticBuilder.Append(" Cell: " + xnl[0].Attributes.GetNamedItem("meta:cell-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:image-count") != null) { statisticBuilder.Append(" Images: " + xnl[0].Attributes.GetNamedItem("meta:image-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:object-count") != null) { statisticBuilder.Append(" Objects: " + xnl[0].Attributes.GetNamedItem("meta:object-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:page-count") != null) { statisticBuilder.Append(" Pages: " + xnl[0].Attributes.GetNamedItem("meta:page-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:paragraph-count") != null) { statisticBuilder.Append(" Paragraph: " + xnl[0].Attributes.GetNamedItem("meta:paragraph-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:word-count") != null) { statisticBuilder.Append(" Words: " + xnl[0].Attributes.GetNamedItem("meta:word-count").Value); } if (xnl[0].Attributes.GetNamedItem("meta:character-count") != null) { statisticBuilder.Append(" Characters: " + xnl[0].Attributes.GetNamedItem("meta:character-count").Value); } if (statisticBuilder.Length > 0) { this.foundMetadata.Statistic = statisticBuilder.ToString().Trim(); } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error reading file meta.xml ({0}).", e.ToString())); } }
/// <summary> /// Extrae los metadatos del documento /// </summary> public override FileMetadata AnalyzeFile() { try { this.foundMetadata = new FileMetadata(); using (PdfDocument doc = PdfReader.Open(this.fileStream, PdfDocumentOpenMode.InformationOnly)) { ReadXMPMetadata(doc); if (doc.Info.Title != string.Empty) { this.foundMetadata.Title = Functions.ToPlainText(doc.Info.Title); if (Uri.IsWellFormedUriString(doc.Info.Title, UriKind.Absolute)) { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(doc.Info.Title), true)); } } if (doc.Info.Subject != string.Empty) { this.foundMetadata.Subject = Functions.ToPlainText(doc.Info.Subject); } if (doc.Info.Author != string.Empty) { this.foundMetadata.Add(new User(Functions.ToPlainText(doc.Info.Author), true)); } if (doc.Info.Keywords != string.Empty) { this.foundMetadata.Keywords = Functions.ToPlainText(doc.Info.Keywords); } if (doc.Info.Creator != string.Empty) { string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Creator)); if (strSoftware.Trim() != string.Empty) { this.foundMetadata.Add(new Application(strSoftware)); } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Creator))) { this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Creator).Trim())); } } if (!String.IsNullOrWhiteSpace(doc.Info.Producer)) { string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Producer)); if (!String.IsNullOrWhiteSpace(strSoftware)) { this.foundMetadata.Add(new Application(strSoftware)); } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Producer))) { this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Producer).Trim())); } } try { if (doc.Info.CreationDate != DateTime.MinValue) { this.foundMetadata.Dates.CreationDate = doc.Info.CreationDate; } } catch (InvalidCastException) { } try { if (doc.Info.ModificationDate != DateTime.MinValue) { this.foundMetadata.Dates.ModificationDate = doc.Info.ModificationDate; } } catch (InvalidCastException) { } } //Busca path y links binariamente this.foundMetadata.AddRange(BinarySearchPaths(this.fileStream).ToArray()); this.foundMetadata.AddRange(BinarySearchLinks(this.fileStream).ToArray()); foreach (Diagrams.Path ri in this.foundMetadata.Paths) { //Busca usuarios dentro de la ruta string strUser = PathAnalysis.ExtractUserFromPath(ri.Value); this.foundMetadata.Add(new User(strUser, ri.IsComputerFolder)); } //También busca el software en el título solo en los pdf, solo lo añade si es software conocido if (!String.IsNullOrEmpty(foundMetadata.Title)) { string strSoftware = ApplicationAnalysis.GetApplicationsFromString(foundMetadata.Title); if (!String.IsNullOrWhiteSpace(strSoftware)) { this.foundMetadata.Add(new Application(strSoftware)); } } } catch (PdfReaderException) { } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.ToString()); } finally { if (foundMetadata == null) { this.foundMetadata = new FileMetadata(); } } return(this.foundMetadata); }
private void GetHistory(OleDocument doc) { using (Stream WordDocument = doc.OpenStream("WordDocument")) { if (WordDocument == null) { return; } BinaryReader br = new BinaryReader(WordDocument); WordDocument.Seek(0xB, SeekOrigin.Begin); Byte tipo = br.ReadByte(); WordDocument.Seek(0x2D2, SeekOrigin.Begin); UInt32 dir = br.ReadUInt32(); UInt32 tam = br.ReadUInt32(); if (tam > 0) { using (var table = doc.OpenStream((tipo & 2) == 2 ? "1Table" : "0Table")) { table.Seek(dir, SeekOrigin.Begin); br = new BinaryReader(table); Boolean unicode = br.ReadUInt16() == 0xFFFF; UInt32 nroCadenas = br.ReadUInt16(); UInt32 extraDataTable = br.ReadUInt16(); for (int i = 0; i < nroCadenas; i += 2) { HistoryItem hi = new HistoryItem(); UInt16 strSize = br.ReadUInt16(); if (unicode) { Byte[] cadena = br.ReadBytes(strSize * 2); hi.Author = Encoding.Unicode.GetString(cadena).Replace('\0', ' '); } else { Byte[] cadena = br.ReadBytes(strSize); hi.Author = Encoding.Default.GetString(cadena).Replace('\0', ' '); } FoundUsers.AddUniqueItem(hi.Author, false, "History"); strSize = br.ReadUInt16(); if (unicode) { Byte[] cadena = br.ReadBytes(strSize * 2); hi.Path = Encoding.Unicode.GetString(cadena).Replace('\0', ' '); } else { Byte[] cadena = br.ReadBytes(strSize); hi.Path = Encoding.Default.GetString(cadena).Replace('\0', ' '); } FoundHistory.Items.Add(hi); bool IsComputerPath = false; foreach (UserItem ui in FoundUsers.Items) { if (hi.Author.Trim() == ui.Name.Trim()) { IsComputerPath = ui.IsComputerUser; } } FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(hi.Path), IsComputerPath); } } } } }
private void GetImagesDoc(OleDocument doc) { using (Stream WordDocument = doc.OpenStream("WordDocument")) { using (Stream stmData = doc.OpenStream("Data")) { if (WordDocument == null || stmData == null) { return; } WordDocument.Seek(0x18, SeekOrigin.Begin); BinaryReader br = new BinaryReader(WordDocument); Int32 fcMin = br.ReadInt32(); Int32 fcMac = br.ReadInt32(); Int32 FKPStart = fcMac % 0x200 == 0 ? fcMac : (fcMac - fcMac % 0x200) + 0x200; WordDocument.Seek(FKPStart, SeekOrigin.Begin); int ImagesFound = 0; while (WordDocument.Position + 0x200 < WordDocument.Length) { byte[] FKP = br.ReadBytes(0x200); if (FKP[0x1FF] == 00) { break; } foreach (int offset in Functions.SearchBytesInBytes(FKP, new byte[] { 0x03, 0x6A })) { if (offset < 0x200 - 5) { int PICOffset = FKP[offset + 5] * 0x1000000 + FKP[offset + 4] * 0x10000 + FKP[offset + 3] * 0x100 + FKP[offset + 2]; if (PICOffset >= 0 && PICOffset < stmData.Length) { stmData.Seek(PICOffset, SeekOrigin.Begin); BinaryReader brData = new BinaryReader(stmData); UInt32 PICLength = brData.ReadUInt32(); long posOri = stmData.Position; int bufferLen = PICLength < stmData.Length - stmData.Position ? (int)PICLength - 4 : (int)(stmData.Length - stmData.Position); if (bufferLen == 0) { continue; } byte[] bufferPIC = brData.ReadBytes(bufferLen); string strImageName = "Image" + ImagesFound++; using (StreamReader sr = new StreamReader(new MemoryStream(bufferPIC), Encoding.Unicode)) { String sRead = sr.ReadToEnd(); foreach (Match m in Regex.Matches(sRead, @"([a-z]:|\\)\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+\\[a-zá-ú0-9\\\s,;.\-_#\$%&()=ñ´'¨{}Ç`/n/r\[\]+^@]+", RegexOptions.IgnoreCase)) { String path = m.Value.Trim(); FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(path), true); strImageName = Path.GetFileName(path); } } List <int> lstJPEG = Functions.SearchBytesInBytes(bufferPIC, new byte[] { 0xFF, 0xD8 }); if (lstJPEG.Count > 0) { using (MemoryStream msJPG = new MemoryStream(bufferPIC, lstJPEG[0], bufferPIC.Length - lstJPEG[0])) { EXIFDocument eDoc = new EXIFDocument(msJPG, ".jpg"); eDoc.analyzeFile(); dicPictureEXIF.Add(strImageName, eDoc); foreach (UserItem uiEXIF in eDoc.FoundUsers.Items) { FoundUsers.AddUniqueItem(uiEXIF.Name, false, uiEXIF.Notes); } foreach (ApplicationsItem Application in eDoc.FoundMetaData.Applications.Items) { string strApplication = Application.Name; if (!string.IsNullOrEmpty(strApplication.Trim()) && !FoundMetaData.Applications.Items.Any(A => A.Name == strApplication.Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strApplication.Trim())); } } eDoc.Close(); } } } } } } } } }
private void analizeFileCore(Stream stm) { try { XmlDocument doc = new XmlDocument(); doc.XmlResolver = null; doc.Load(stm); XmlNodeList xnl; xnl = doc.GetElementsByTagName("dc:title"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundMetaData.Title = xnl[0].FirstChild.Value; //Si el título es una ruta válida, agregar como una ruta del equipo FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(FoundMetaData.Title), true); } } xnl = doc.GetElementsByTagName("dc:subject"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundMetaData.Subject = xnl[0].FirstChild.Value; } } xnl = doc.GetElementsByTagName("dc:description"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundMetaData.Description = xnl[0].FirstChild.Value; } } xnl = doc.GetElementsByTagName("cp:lastModifiedBy"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundUsers.AddUniqueItem(xnl[0].FirstChild.Value, true, "cp:lastModifiedBy"); } } xnl = doc.GetElementsByTagName("dc:creator"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundUsers.AddUniqueItem(xnl[0].FirstChild.Value, FoundUsers.Items.Count == 0, "dc:creator"); } } xnl = doc.GetElementsByTagName("cp:revision"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { Decimal d; if (Decimal.TryParse(xnl[0].FirstChild.Value, out d)) { FoundMetaData.VersionNumber = d; } } } xnl = doc.GetElementsByTagName("dcterms:created"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { if (xnl[0].FirstChild.Value != "1601-01-01T00:00:00Z") { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace("T", " ").Replace("Z", ""), out d)) { FoundDates.CreationDateSpecified = true; FoundDates.CreationDate = d.ToLocalTime(); } } } } xnl = doc.GetElementsByTagName("dcterms:modified"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { if (xnl[0].FirstChild.Value != "1601-01-01T00:00:00Z") { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace("T", " ").Replace("Z", ""), out d)) { FoundDates.ModificationDateSpecified = true; FoundDates.ModificationDate = d.ToLocalTime(); } } } } xnl = doc.GetElementsByTagName("cp:keywords"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundMetaData.Keywords = xnl[0].FirstChild.Value; } } xnl = doc.GetElementsByTagName("cp:category"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundMetaData.Category = xnl[0].FirstChild.Value; } } xnl = doc.GetElementsByTagName("dc:language"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { FoundMetaData.Language = xnl[0].FirstChild.Value; } } xnl = doc.GetElementsByTagName("cp:lastPrinted"); if (xnl.Count != 0) { if (xnl[0].HasChildNodes) { if (xnl[0].FirstChild.Value != "1601-01-01T00:00:00Z") { if (xnl[0].FirstChild.Value != "1601-01-01T00:00:00Z") { DateTime d; if (DateTime.TryParse(xnl[0].FirstChild.Value.Replace("T", " ").Replace("Z", ""), out d)) { FoundDates.DatePrintingSpecified = true; FoundDates.DatePrinting = d.ToLocalTime(); } } } } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error reading file core.xml ({0}).", e.ToString())); } }
/// <summary> /// Extrae los metadatos del documento /// </summary> public override FileMetadata AnalyzeFile() { try { this.foundMetadata = new FileMetadata(); using (PdfDocument doc = PdfReader.Open(this.fileStream, PdfDocumentOpenMode.InformationOnly)) { int imageNumber = 0; //Read embedded images foreach (PdfDictionary item in doc.Internals.GetAllObjects().Where(p => p is PdfDictionary d && d.Stream != null && "/Image".Equals(d.Elements["/Subtype"]?.ToString()))) { try { using (MemoryStream msJPG = new MemoryStream(item.Stream.Value)) { using (EXIFDocument eDoc = new EXIFDocument(msJPG)) { FileMetadata exifMetadata = eDoc.AnalyzeFile(); //Ignore images which only contain 'Adobe JPEG' makernotes if (exifMetadata != null && exifMetadata.HasMetadata() && !exifMetadata.Makernotes.All(p => p.Key == "Adobe JPEG")) { foundMetadata.EmbeddedImages.Add(imageNumber.ToString(), exifMetadata); imageNumber++; this.foundMetadata.AddRange(exifMetadata.Users.ToArray()); this.foundMetadata.AddRange(exifMetadata.Applications.ToArray()); } } } } catch (Exception) { } } ReadXMPMetadata(doc); if (doc.Info.Title != string.Empty) { this.foundMetadata.Title = Functions.ToPlainText(doc.Info.Title); if (Uri.IsWellFormedUriString(doc.Info.Title, UriKind.Absolute)) { this.foundMetadata.Add(new Diagrams.Path(PathAnalysis.CleanPath(doc.Info.Title), true)); } } if (doc.Info.Subject != string.Empty) { this.foundMetadata.Subject = Functions.ToPlainText(doc.Info.Subject); } if (doc.Info.Author != string.Empty) { this.foundMetadata.Add(new User(Functions.ToPlainText(doc.Info.Author), true)); } if (doc.Info.Keywords != string.Empty) { this.foundMetadata.Keywords = Functions.ToPlainText(doc.Info.Keywords); } if (doc.Info.Creator != string.Empty) { string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Creator)); if (strSoftware.Trim() != string.Empty) { this.foundMetadata.Add(new Application(strSoftware)); } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Creator))) { this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Creator).Trim())); } } if (!String.IsNullOrWhiteSpace(doc.Info.Producer)) { string strSoftware = ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Producer)); if (!String.IsNullOrWhiteSpace(strSoftware)) { this.foundMetadata.Add(new Application(strSoftware)); } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else if (!String.IsNullOrWhiteSpace(Functions.ToPlainText(doc.Info.Producer))) { this.foundMetadata.Add(new Application(Functions.ToPlainText(doc.Info.Producer).Trim())); } } try { if (doc.Info.CreationDate != DateTime.MinValue) { this.foundMetadata.Dates.CreationDate = doc.Info.CreationDate; } } catch (InvalidCastException) { } try { if (doc.Info.ModificationDate != DateTime.MinValue) { this.foundMetadata.Dates.ModificationDate = doc.Info.ModificationDate; } } catch (InvalidCastException) { } } SearchPathsLinksAndEmails(this.fileStream); //Find users in paths foreach (Diagrams.Path path in this.foundMetadata.Paths) { string strUser = PathAnalysis.ExtractUserFromPath(path.Value); this.foundMetadata.Add(new User(strUser, path.IsComputerFolder)); } //Also search software in the title (only pdf). It is added only if the software is known. if (!String.IsNullOrEmpty(foundMetadata.Title)) { string strSoftware = ApplicationAnalysis.GetApplicationsFromString(foundMetadata.Title); if (!String.IsNullOrWhiteSpace(strSoftware)) { this.foundMetadata.Add(new Application(strSoftware)); } } } catch (PdfReaderException) { } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.ToString()); } finally { if (foundMetadata == null) { this.foundMetadata = new FileMetadata(); } if (fileStream != null) { this.fileStream.Dispose(); } } return(this.foundMetadata); }
private void analizeFileContent(Stream stm) { try { XmlDocument doc = new XmlDocument(); doc.XmlResolver = null; doc.Load(stm); XmlNodeList xnl = doc.GetElementsByTagName("text:a"); if (xnl != null) { List <String> links = new List <String>(); foreach (XmlNode xn in xnl) { String href = xn.Attributes.GetNamedItem("xlink:href").Value; if (href != string.Empty) { if (href.StartsWith("mailto:")) { String email = href.Substring(7, (href.Contains("?") ? href.IndexOf('?') : href.Length) - 7); FoundEmails.AddUniqueItem(email); } else if (href.StartsWith("ftp:")) { if (!links.Contains(href)) { links.Add(href); } } else if (href.StartsWith("telnet:")) { if (!links.Contains(href)) { links.Add(href); } } else if (href.StartsWith("ldap:")) { if (!links.Contains(href)) { links.Add(href); } } else { try { Uri u = new Uri(href); /* ¿Porque este if, oca?, pueden sacarse nombres sin reoslucion como http://privado/ */ /* * if (u.HostNameType != UriHostNameType.Dns) * { * if (!links.Contains(href)) * links.Add(href); * } */ if (!links.Contains(href)) { links.Add(href); } } catch (UriFormatException) //No es una URI, será un path interno... { if (!href.StartsWith("#")) //Se omiten referencias del tipo #Pais { if (!links.Contains(href)) { links.Add(href); } } } } } } if (links.Count != 0) { foreach (String link in links) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(link), true);//false); } } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error reading file content.xml ({0}).", e.ToString())); } }
private void analizeLinks(Stream stm) { try { XmlDocument doc = new XmlDocument(); doc.XmlResolver = null; doc.Load(stm); XmlNodeList xnl = doc.GetElementsByTagName("Relationship"); List <string> links = new List <string>(); foreach (XmlNode xn in xnl) { if (xn.Attributes["Type"].Value == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") { String href = xn.Attributes["Target"].Value; if (href != string.Empty) { if (href.StartsWith("mailto:")) { String email = href.Substring(7, (href.Contains("?") ? href.IndexOf('?') : href.Length) - 7); FoundEmails.AddUniqueItem(email); } else if (href.StartsWith("ftp:")) { if (!links.Contains(href)) { links.Add(href); } } else if (href.StartsWith("telnet:")) { if (!links.Contains(href)) { links.Add(href); } } else if (href.StartsWith("ldap:")) { if (!links.Contains(href)) { links.Add(href); } } else if (href.StartsWith("http:")) { if (!href.EndsWith("/")) // Si la direccion no termina con un slash, se le añade { int cuentaSlash = 0; for (int i = 0; i < href.Length; i++) { if (href[i] == '/') { cuentaSlash++; } } if (cuentaSlash == 2) { href += "/"; } } if (!links.Contains(href)) { links.Add(href); } } else { try { Uri u = new Uri(href); /* * if (u.HostNameType != UriHostNameType.Dns) * { * if (!links.Contains(href)) * links.Add(href); * } */ if (!links.Contains(href)) { links.Add(href); } } catch (UriFormatException) //No es una URI, será un path interno... { if (!href.StartsWith("#")) //Se omiten referencias del tipo #Pais { if (!links.Contains(href)) { links.Add(href); } } } } } } } if (links.Count != 0) { foreach (String link in links) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(link), true);//false); } } } catch (Exception e) { System.Diagnostics.Debug.WriteLine(String.Format("Error searching links ({0}).", e.ToString())); } }
/// <summary> /// Extrae los metadatos del documento /// </summary> public override void analyzeFile() { PdfDocument doc = null; try { doc = PdfReader.Open(stm, PdfDocumentOpenMode.InformationOnly); ReadXMPMetadata(doc); if (doc.Info.Title != string.Empty) { FoundMetaData.Title = Functions.ToPlainText(doc.Info.Title); if (Uri.IsWellFormedUriString(doc.Info.Title, UriKind.Absolute)) { FoundPaths.AddUniqueItem(PathAnalysis.CleanPath(doc.Info.Title), true); } } if (doc.Info.Subject != string.Empty) { FoundMetaData.Subject = Functions.ToPlainText(doc.Info.Subject); } if (doc.Info.Author != string.Empty) { FoundUsers.AddUniqueItem(Functions.ToPlainText(doc.Info.Author), true); } if (doc.Info.Keywords != string.Empty) { FoundMetaData.Keywords = Functions.ToPlainText(doc.Info.Keywords); } if (doc.Info.Creator != string.Empty) { string strSoftware = Analysis.ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Creator)); if (strSoftware.Trim() != string.Empty) { if (!FoundMetaData.Applications.Items.Any(A => A.Name == strSoftware)) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strSoftware)); } } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else { if (Functions.ToPlainText(doc.Info.Creator).Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == Functions.ToPlainText(doc.Info.Creator).Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(Functions.ToPlainText(doc.Info.Creator).Trim())); } } } if (doc.Info.Producer != string.Empty) { string strSoftware = Analysis.ApplicationAnalysis.GetApplicationsFromString(Functions.ToPlainText(doc.Info.Producer)); if (strSoftware.Trim() != string.Empty) { if (!FoundMetaData.Applications.Items.Any(A => A.Name == strSoftware)) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strSoftware)); } } //No se ha localizado ninguna aplicación conocida, aun así mostrar la aplicación encontrada else { if (Functions.ToPlainText(doc.Info.Producer).Trim() != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == Functions.ToPlainText(doc.Info.Producer).Trim())) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(Functions.ToPlainText(doc.Info.Producer).Trim())); } } } try { if (doc.Info.CreationDate != DateTime.MinValue) { FoundDates.CreationDateSpecified = true; FoundDates.CreationDate = doc.Info.CreationDate; } } catch (InvalidCastException) { } try { if (doc.Info.ModificationDate != DateTime.MinValue) { FoundDates.ModificationDateSpecified = true; FoundDates.ModificationDate = doc.Info.ModificationDate; } } catch (InvalidCastException) { } //Busca path y links binariamente BinarySearchPaths(stm); BinarySearchLinks(stm); foreach (PathsItem ri in FoundPaths.Items) { //Busca usuarios dentro de la ruta string strUser = PathAnalysis.ExtractUserFromPath(ri.Path); FoundUsers.AddUniqueItem(strUser, ri.IsComputerFolder); } //También busca el software en el título solo en los pdf, solo lo añade si es software conocido if (!String.IsNullOrEmpty(this.FoundMetaData.Title)) { string strSoftware = Analysis.ApplicationAnalysis.GetApplicationsFromString(this.FoundMetaData.Title); if (strSoftware != string.Empty && !FoundMetaData.Applications.Items.Any(A => A.Name == strSoftware)) { FoundMetaData.Applications.Items.Add(new ApplicationsItem(strSoftware)); } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.ToString()); } finally { if (doc != null) { doc.Dispose(); } } }