public static void Determine_Parser(System.IO.FileInfo fi, uint level) { string ext = fi.Extension.ToString(); if (ext.CompareTo(".txt") == 0) { try { StreamReader textFile = new StreamReader(fi.FullName); string text = textFile.ReadToEnd(); textFile.Close(); // MessageBox.Show(fi.FullName); int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".rtf") == 0) { try { RichTextBox rtb = new RichTextBox(); rtb.Rtf = System.IO.File.ReadAllText(fi.FullName); string text = rtb.Text; //Console.WriteLine(s); int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".pdf") == 0) { try { String text = PDFParser.Parser.ParsePDFtoString(fi.FullName); int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } catch(Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".doc") == 0 || ext.CompareTo(".xls") == 0 || ext.CompareTo(".ppt") == 0) { try { TextReader reader = new FilterReader(fi.FullName); String text = ""; using (reader) { text = reader.ReadToEnd(); } int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".docx") == 0) { try { String text = OfficeParser.Parser.docxParser(fi.FullName); if (text != null) { int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); } //Console.WriteLine("\t" + text); } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".xlsx") == 0) { try { String text = OfficeParser.Parser.xlsxParser(fi.FullName); if (text != null) { int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".pptx") == 0) { try { String text = OfficeParser.Parser.pptxParser(fi.FullName); if (text != null) { int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".odt") == 0 || ext.CompareTo(".ods") == 0 || ext.CompareTo(".odp") == 0) { try { String text = OfficeParser.Parser.openOfficeParser(fi.FullName); if (text != null) { int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } } catch (Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".xml") == 0) { try { String text = XMLParser.Parser.ParseXMLtoString(fi.FullName); int retCode, count = 0; retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count); if (retCode > 0) WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); //Console.WriteLine("\t" + text); } catch(Exception e) { Console.WriteLine(e.Message); } } else if (ext.CompareTo(".pst") == 0) { } else if (ext.CompareTo(".zip") == 0 || ext.CompareTo(".7z") == 0 || ext.CompareTo(".xz") == 0 || ext.CompareTo(".bzip2") == 0 || ext.CompareTo(".gzip") == 0 || ext.CompareTo(".tar") == 0 || ext.CompareTo(".wim") == 0 || ext.CompareTo(".arj") == 0 || ext.CompareTo(".cab") == 0 || ext.CompareTo(".chm") == 0 || ext.CompareTo(".cpio") == 0 || ext.CompareTo(".cramfs") == 0 || ext.CompareTo(".deb") == 0 || ext.CompareTo(".fat") == 0 || ext.CompareTo(".hfs") == 0 || ext.CompareTo(".iso") == 0 || ext.CompareTo(".lzh") == 0 || ext.CompareTo(".lzma") == 0 || ext.CompareTo(".mbr") == 0 || ext.CompareTo(".msi") == 0 || ext.CompareTo(".nsis") == 0 || ext.CompareTo(".ntfs") == 0 || ext.CompareTo(".rar") == 0 || ext.CompareTo(".rpm") == 0 || ext.CompareTo(".squashfs") == 0 || ext.CompareTo(".udf") == 0 || ext.CompareTo(".vhd") == 0 || ext.CompareTo(".xar") == 0 || ext.CompareTo(".z") == 0) { //extract code SevenZipExtractor extractor = null; try { //path to the systems temporary folder String tempFolderPath = Path.GetTempPath(); tempFolderPath += "temp_dir\\"; //create a directory to dump everything into inside the temp folder Directory.CreateDirectory(tempFolderPath); //set the path of the 7z.dll (it needs to be in the debug folder) SevenZipExtractor.SetLibraryPath("7z.dll"); extractor = new SevenZipExtractor(fi.FullName); //Extract the entire file extractor.ExtractArchive(tempFolderPath); extractor.Dispose(); //Count how many files in archive int count = Directory.GetFiles(tempFolderPath, "*.*", SearchOption.AllDirectories).Length; // traverse files string[] fileEntries = Directory.GetFiles(tempFolderPath); foreach (string fileName in fileEntries) { //Console.WriteLine("IN ARCHIVE: " + fileName); } //delete the temporary directory we created at the beginning Directory.Delete(tempFolderPath, true); } catch (Exception e) { //get rid of the object because it is unmanaged extractor.Dispose(); Console.WriteLine(e.Message); } } }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } string extension = MapContentTypeToExtension(propertyBag.ContentType); if (extension.IsNullOrEmpty()) { return; } propertyBag.Title = propertyBag.Step.Uri.PathAndQuery; using (TempFile temp = new TempFile()) { temp.FileName += "." + extension; File.WriteAllBytes(temp.FileName, propertyBag.Response); using (FilterReader filterReader = new FilterReader(temp.FileName)) { string content = filterReader.ReadToEnd(); propertyBag.Text = content.Trim(); } } }
/// <summary> /// /// </summary> /// <param name="filename"></param> /// <returns></returns> public static string Parse(string filename) { IFilter filter = null; try { try { FileInfo fileInfo = new FileInfo(filename); using (var reader = new FilterReader(filename, fileInfo.Extension, filterReaderTimeout: FilterReaderTimeout.TimeoutWithException, timeout: 60 * 1000)) { string text = reader.ReadToEnd(); } } catch (Exception) { } filter = loadIFilter(filename); return(ExtractText(filter)); } catch { throw; } finally { if (filter != null) { Marshal.ReleaseComObject(filter); } } }
public static List <string> ExtractDocParagraphs(Stream readStream) { //write the stream to a temp file var tmp = Path.GetTempFileName(); var fileName = tmp + ".doc"; File.Move(tmp, fileName); using (var fileStream = File.Create(fileName)) { readStream.Seek(0, SeekOrigin.Begin); readStream.CopyTo(fileStream); fileStream.Close(); } var results = new List <string>(); using (var reader = new FilterReader(fileName)) { using (var stringReader = new StringReader(reader.ReadToEnd())) { var nextLine = stringReader.ReadLine(); while (nextLine != null) { if (!string.IsNullOrWhiteSpace(nextLine)) { results.Add(FormatParagraphText(nextLine)); } nextLine = stringReader.ReadLine(); } } } File.Delete(fileName); return(results); }
public void ExtractText(string inpufFileName, string outputFileName) { #if false IFilterTextReader.FilterReader reader = new FilterReader(inpufFileName); var data = reader.ReadToEnd(); using (var writer = new StreamWriter(outputFileName, false, System.Text.Encoding.UTF8)) { writer.Write(data); } #else PDDocument doc = null; try { doc = PDDocument.load(inpufFileName); PDFTextStripper stripper = new PDFTextStripper(); using (var writer = new StreamWriter(outputFileName, false, System.Text.Encoding.UTF8)) { writer.Write(stripper.getText(doc)); } } finally { if (doc != null) { doc.close(); } } #endif }
// Searches the contents of filtered files. Does not care about exceptions. public void Search() { foreach (String dir in Directories) { var fileInfo = new FileInfo(dir); string fileContents; if (fileInfo.Length < MAX_FILE_SIZE) { if (IsOfficeExtension(fileInfo.Extension)) { try { var reader = new FilterReader(fileInfo.FullName); fileContents = reader.ReadToEnd(); CheckForKeywords(fileContents, fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName); } } else { //normal file try { CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName); } } } else { Console.WriteLine("[-] File exceeds 1MB file size {0}", fileInfo.FullName); } } }
public string GetContent() { try { var filter = FilterReader.GetFilter( ClientState.Current.Storage.ResolvePhysicalFilename(".", document.StreamName), Path.GetExtension(document.Filename)); // If filter is null that means we have no filter for given extension if (filter != null) { using (FilterReader reader = new FilterReader(filter)) return(reader.ReadToEnd()); } else { Logger.Debug("Unable to find filter for file {0}", LogSource.Search, document.Filename); return(String.Empty); } } catch (Exception ex) { Logger.Debug("An error occured while trying to find filter for file {0}. Exception = {1}", LogSource.Search, document.Filename, ex); return(String.Empty); } }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } string extension = MapContentTypeToExtension(propertyBag.ContentType); if (extension.IsNullOrEmpty()) { return; } propertyBag.Title = propertyBag.Step.Uri.PathAndQuery; using (TempFile temp = new TempFile()) { temp.FileName += "." + extension; using (FileStream fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000)) using (Stream input = propertyBag.GetResponse()) { input.CopyToStream(fs); } using (FilterReader filterReader = new FilterReader(temp.FileName)) { string content = filterReader.ReadToEnd(); propertyBag.Text = content.Trim(); } } }
/* Performs a image snapshop of the file before it is opened to use as a sample for * the A.I image comparison when attempting to detect any copies of the file. This is done * by first reading the contents of the actual file, using the NuGet package 'IFilterTextReader', * a C# TextReader that gets text from different file formats through the IFilter interface. * Using the reader, obtains all the text within the document and removing all new lines and indentations * to ensure each document is read the same. Then using a Bitmap, Font and Graphics objects to * draw the contents onto a Bitmap image. Finally, the Bitmap image is saved as a PNG to the * 'Samples' folder for future analysis if needed. * * A DISCLAIMER, 'IFilterTextReader' is under the The Code Project Open License (CPOL) 1.02. * My usage is in compliance with the license. All credits go to Kees van Spelde. * URL Link: https://github.com/Sicos1977/IFilterTextReader */ public void SnapshotFile(String filePath) { try { TextReader reader = new FilterReader(filePath); using (reader) { var text = Regex.Replace(reader.ReadToEnd(), @"\t|\n|\r", ""); Bitmap bitmap = new Bitmap(1, 1); Font font = new Font("Arial", 8, FontStyle.Regular, GraphicsUnit.Pixel); Graphics graphics = Graphics.FromImage(bitmap); int width = (int)graphics.MeasureString(text, font).Width; int height = (int)graphics.MeasureString(text, font).Height; bitmap = new Bitmap(bitmap, new Size(width, height)); graphics = Graphics.FromImage(bitmap); graphics.Clear(Color.White); graphics.SmoothingMode = SmoothingMode.AntiAlias; graphics.TextRenderingHint = TextRenderingHint.AntiAlias; graphics.DrawString(text, font, new SolidBrush(Color.FromArgb(0, 0, 0)), 0, 0); graphics.Flush(); graphics.Dispose(); bitmap.Save("C:\\Users\\" + utilities.Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\" + filePath.Split('\\').Last() + ".png"); bitmap.Dispose(); } } catch (Exception ex) { MessageBox.Show(ex.Message); } }
public override string Extract(Stream stream) { //IFilter var target = new FilterReader(GetBytesFromStream(stream), ".xls"); target.Init(); return(target.ReadToEnd()); }
public static DataForIndex CreateDocumentIndex(FilterReader reader, string fileName) { var dfi = new DataForIndex(); dfi.ID = new Random().Next(int.MaxValue); dfi.Label = "Docs"; dfi.FileExtension = Path.GetExtension(fileName); dfi.FileName = fileName; dfi.Body = reader.ReadToEnd(); return(dfi); }
public override string Extract(Stream stream, TextExtractorContext context) { try { //extract text using IFilter var target = new FilterReader(GetBytesFromStream(stream), ".pdf"); target.Init(); return(target.ReadToEnd()); } catch (OutOfMemoryException ex) { Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge, "Pdf text extract failed with out of memory exception. " + ex, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); return(string.Empty); } catch (Exception ex) { Logger.WriteWarning(EventId.Indexing.IFilterError, "Pdf IFilter error: " + ex.Message); } //fallback to the other mechanism in case the pdf IFilter is missing var text = new StringBuilder(); try { var pdfReader = new PdfReader(stream); for (var page = 1; page <= pdfReader.NumberOfPages; page++) { // extract text using the old version (4.1.6) of iTextSharp var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page)); if (string.IsNullOrEmpty(pageText)) { continue; } text.Append(pageText); } } catch (OutOfMemoryException ex) { Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge, "Pdf text extract failed with out of memory exception. " + ex, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); } return(text.ToString()); }
/// <summary> /// Returns an array with all the matches that are found with the give <see cref="regularExpression"/> regular expression /// </summary> /// <param name="fileName">The file to inspect</param> /// <param name="regularExpression">The regular expression to use</param> /// <param name="ignoreCase">Set to false to search case sensitive</param> /// <returns></returns> public string[] GetRegexMatchesFromFile(string fileName, string regularExpression, bool ignoreCase = true) { var regex = new Regex(regularExpression, ignoreCase ? RegexOptions.IgnoreCase : RegexOptions.None); var result = new List <string>(); using (var reader = new FilterReader(fileName)) { var text = reader.ReadToEnd(); result.AddRange(from Match match in regex.Matches(text) select match.ToString()); } return(result.ToArray()); }
public override string Extract(Stream stream, TextExtractorContext context) { try { //IFilter var target = new FilterReader(GetBytesFromStream(stream), ".msg"); target.Init(); return(target.ReadToEnd()); } catch (Exception ex) { Logger.WriteWarning(EventId.Indexing.IFilterError, "Msg IFilter error: " + ex.Message); } return(string.Empty); }
public override string Extract(System.IO.Stream stream) { try { //IFilter var target = new FilterReader(GetBytesFromStream(stream), ".doc"); target.Init(); return(target.ReadToEnd()); } catch (Exception ex) { Logger.WriteWarning(EventId.Indexing.IFilterError, "Doc IFilter error: " + ex.Message); } return(string.Empty); }
private void btnGetText_Click(object sender, EventArgs e) { string dbt_wfile = ""; OpenFileDialog dlg = new OpenFileDialog(); if (dlg.ShowDialog() == DialogResult.OK) { dbt_wfile = dlg.FileName; TextReader reader = new FilterReader(dbt_wfile); using (reader) { MessageBox.Show(reader.ReadToEnd()); } reader.Close(); } }
// Searches the contents of filtered files. Does not care about exceptions. public void Search() { foreach (String dir in Directories) { try { bool usingLegacyPathHandling = false; AppContext.TryGetSwitch("Switch.System.IO.UseLegacyPathHandling", out usingLegacyPathHandling); var dirToCheck = dir; if (!usingLegacyPathHandling) { dirToCheck = ConvertToNTPath(dir); } var fileInfo = new FileInfo(dirToCheck); string fileContents; if (Convert.ToUInt64(fileInfo.Length) < 1024 * this.MAX_FILE_SIZE) { if (IsOfficeExtension(fileInfo.Extension)) { try { var reader = new FilterReader(fileInfo.FullName); fileContents = reader.ReadToEnd(); CheckForKeywords(fileContents, fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName)); } } else { //normal file try { CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName)); } } } else { Console.WriteLine("[-] File exceeds max file size {0}", PrettyPrintNTPath(fileInfo.FullName)); } } catch (PathTooLongException ex) { Console.WriteLine("[-] Path {0} is too long. Skipping.", dir); continue; } catch (Exception e) { Console.WriteLine("[-] Some unknown exception {0} occured while processing {1}. Continuing with the next directory.", e.Message, dir); } } }
private void ifilt(bool asEMBED) { _selected_file = string.Empty; if (openFileDialog.ShowDialog() == DialogResult.OK) { try { if (asEMBED) { _selected_file = this.openFileDialog.FileName; webBrowser.Navigate(_selected_file); } else { try { TextReader reader = new FilterReader(openFileDialog.FileName); using (reader) { string tmp_file = Path.Combine(Environment.GetEnvironmentVariable("TEMP"), DateTime.Now.Ticks.ToString() + ".html"); using (StreamWriter sw = new StreamWriter(tmp_file, false, DefaultEncoding)) { sw.Write(Regex.Replace(reader.ReadToEnd().Replace(Environment.NewLine, "<br>"), "\n", "<BR>" + Environment.NewLine, RegexOptions.Singleline)); sw.Close(); } ////object missing = System.Reflection.Missing.Value; ////object tmp = (object)tmp_file; _selected_file = tmp_file; webBrowser.Navigate(tmp_file); ////this.axWebBrowser1.Navigate2(ref tmp, ref missing, ref missing, ref missing, ref missing); } } catch (ArgumentException) { MessageBox.Show("Данный тип файлов не поддерживается"); } } } catch (Exception ex) { MessageBox.Show(ex.Message, "Error"); return; } } SelectFile.Invoke(_selected_file); ////EditModeOn(); }
// Searches the contents of filtered files. Does not care about exceptions. public void Search() { foreach (String dir in Directories) { try { var fileInfo = new FileInfo(ConvertToNTPath(dir)); string fileContents; if (Convert.ToUInt64(fileInfo.Length) < 1024 * this.MAX_FILE_SIZE) { if (IsOfficeExtension(fileInfo.Extension)) { try { var reader = new FilterReader(fileInfo.FullName); fileContents = reader.ReadToEnd(); CheckForKeywords(fileContents, fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName)); } } else { //normal file try { CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName)); } } } else { Console.WriteLine("[-] File exceeds max file size {0}", PrettyPrintNTPath(fileInfo.FullName)); } } catch (PathTooLongException ex) { Console.WriteLine("[-] Path {0} is too long. Skipping.", dir); continue; } catch (Exception e) { Console.WriteLine("[-] Some unknown exception {0} occured while processing {1}. Continuing with the next directory.", e.Message, dir); } } if (ossflag) { Console.WriteLine("\n[*] Now Create Zipfile and Upload Zipfile to aliyunOSS"); Zipfile(Uploadfiles, this.bucketName, this.accessKeyId, this.accessKeySecret, this.endpoint); } }
private List <string> GetTextFromPPT(string path) { List <string> text = new List <string>(); string line; if (!File.Exists(path)) { MessageBox.Show("Please give the valid path of the PowerPont file."); } TextReader reader = new FilterReader(path); using (reader) { line = reader.ReadToEnd(); text.Add(line); } return(text); }
private int ConvertFile(string sourcefile) { if (!File.Exists(sourcefile)) { return(1); } try { TextReader reader = new FilterReader(sourcefile); using (reader) { // char[] buffer = new char[0x5000]; // reader.Read(buffer, 0, 0x5000); // string context = new string(buffer); string context = reader.ReadToEnd(); calcWords(context); context = Regex.Replace(context, "\n\r", " ", RegexOptions.IgnoreCase); try { string name = GetFileName(sourcefile); using (StreamWriter writer = new StreamWriter((outpath + @"\" + name + ".txt").Replace(@"\\", @"\"), false, Encoding.Default)) { writer.Write(context); writer.Close(); } reader.Close(); return(0); } catch (Exception exception) { reader.Close(); return(3); } } } catch (Exception e) { return(2); } }
public static int ConvertFile() { if (!File.Exists(sourcefile)) { return((int)OutStatus.FileLoss); } try { TextReader reader = new FilterReader(sourcefile); using (reader) { // char[] buffer = new char[0x5000]; // reader.Read(buffer, 0, 0x5000); // string context = new string(buffer); string context = reader.ReadToEnd(); context = Regex.Replace(context, "\n\r", " ", RegexOptions.IgnoreCase); try { string txtfile = (outtxtpath + @"\" + fileid + ".txt").Replace(@"\\", @"\"); using (StreamWriter writer = new StreamWriter(txtfile, false, Encoding.Default)) { writer.Write(context); writer.Close(); } reader.Close(); ExecuteRegexTxt(txtfile); return((int)OutStatus.ConvertSuccess); } catch (Exception exception) { Console.WriteLine("保存txt文件发生异常" + exception); return((int)OutStatus.TotxtFailed); } } } catch (Exception e) { Console.WriteLine("打开文件失败" + e); return((int)OutStatus.TotxtFailed); } }
public void WordDocumentRetrieve() { SqlDataAdapter adp1 = new SqlDataAdapter("Select Max(ID) From FileUpload", sqlCon); DataSet ds1 = new DataSet(); adp1.Fill(ds1, "FileUpload"); id = Convert.ToInt32(ds1.Tables["FileUpload"].Rows[0][0].ToString()); SqlDataAdapter adp = new SqlDataAdapter("Select UpFile1 From FileUpload Where ID=" + id, sqlCon); DataSet ds = new DataSet(); adp.Fill(ds, "FileUpload"); name = ds.Tables["FileUpload"].Rows[0]["UpFile1"].ToString(); if (name != "") { string temp = null; var a = new ArrayList(); TextReader reader = new FilterReader(Server.MapPath("~/Docs/" + name)); using (reader) { temp = reader.ReadToEnd(); } if (temp.Length >= 1000) { SqlCommand Comm = sqlCon.CreateCommand(); Comm.CommandText = "Update FileUpload SET FileData='" + temp.Substring(0, 999).Trim() + "' Where ID=" + id; Comm.Connection = sqlCon; Comm.ExecuteNonQuery(); sqlCon.Close(); } else { SqlCommand Comm = sqlCon.CreateCommand(); Comm.CommandText = "Update FileUpload SET FileData='" + temp + "' Where ID=" + id; Comm.Connection = sqlCon; Comm.ExecuteNonQuery(); sqlCon.Close(); } } }
// Searches the contents of filtered files. Does not care about exceptions. public void Search() { foreach (String dir in Directories) { try { var NTdir = @"\\?\" + dir; var fileInfo = new FileInfo(NTdir); string fileContents; if (fileInfo.Length < MAX_FILE_SIZE) { if (IsOfficeExtension(fileInfo.Extension)) { try { var reader = new FilterReader(fileInfo.FullName); fileContents = reader.ReadToEnd(); CheckForKeywords(fileContents, fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName.Replace(@"\\?\", "")); } } else { //normal file try { CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo); } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName.Replace(@"\\?\", "")); } } } else { Console.WriteLine("[-] File exceeds 1MB file size {0}", fileInfo.FullName.Replace(@"\\?\", "")); } } catch (PathTooLongException ex) { Console.WriteLine("[-] Path {0} is too long. Skipping.", dir); continue; } catch (Exception e) { Console.WriteLine("[-] Some unknown exception {0} occured while processing {1}. Continuing with the next directory.", e.Message, dir); } } }
private static void TryReadFile(FileInfo file) { var stream = file.OpenRead(); FilterReader reader = null; try { FilterReaderOptions filterReaderOptions = new FilterReaderOptions(); reader = new FilterReader(stream, file.Extension, filterReaderOptions); var result = reader.ReadToEnd(); } catch (Exception ex) { Console.WriteLine(ex.Message); } finally { reader?.Close(); stream?.Close(); } }
static void Main(string[] args) { try { var fileName = @"C:\Sicos1977.doc"; var file = new FileInfo(fileName); Console.WriteLine($"Reading {file.Name}"); var reader = new FilterReader(fileName); var txt = reader.ReadToEnd(); // Try and read the stream from the file TryReadFile(file); } catch (Exception ex) { Console.WriteLine(ex.Message); } Console.ReadKey(); }
/// <summary> /// Extract the contents of the given file as plain text. /// </summary> /// <param name="filePath">The physical path of the file that contains the text to be extracted.</param> /// <returns>The extracted text.</returns> public string ExtractTextFromFile(string filePath) { string extractedText = String.Empty; string[] allowedExtensionsArray = this.AllowedExtensions.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); if (allowedExtensionsArray.Contains(Path.GetExtension(filePath))) { try { using (FilterReader filterReader = new FilterReader(filePath)) { extractedText = filterReader.ReadToEnd(); } } catch (ArgumentException ex) { // An argument exception usually happens when the IFilter for the file could not be found. // This is a non-critical error, so we're just logging it. Logger.Error(string.Format("Unable to extract text for {0}.", filePath), ex); } } return(extractedText); }
/* * Checks the file extension and calls the corresponding parser. * Gets the results returned from the parser and passes it to the engine. * Passes the results to the Database * * fInfo: Incoming FileInfo object to be processed * parentIsArchive: Whether or not the file is in an archive * returns: an int[] with the results from the call to the engine (results[0] = Count, results[1] = RetCode) */ public static int[][] ProcessNonArchive(Delimon.Win32.IO.FileInfo fInfo, bool parentIsArchive) { int[][] results = { new int[5], new int[9] }; string ext = Path.GetExtension(fInfo.FullName); ScanData returnedData; CreditData ccReturnedData; if (ext.CompareTo(".txt") == 0 || ext.CompareTo(".csv") == 0) { try { StreamReader textFile = new StreamReader(fInfo.FullName); string text = textFile.ReadToEnd(); textFile.Close(); if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1]= returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3]= returnedData.Pattern_D9; results[0][4]= returnedData.Pattern_D3D2D4; } else { //Database entry goes here. //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (ccReturnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } else { //Database entry goes here. //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " +ccReturnedData.Priority); Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } else if (ext.CompareTo(".rtf") == 0) { try { RichTextBox rtb = new RichTextBox(); rtb.Rtf = System.IO.File.ReadAllText(fInfo.FullName); string text = rtb.Text; if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1] = returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3] = returnedData.Pattern_D9; results[0][4] = returnedData.Pattern_D3D2D4; } else { //Database entry //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } else { //Database entry //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } } } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { //Console.WriteLine(e.Message); Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } else if (ext.CompareTo(".pdf") == 0) { try { string text = PDFParser.Parser.ParsePDFtoString(fInfo.FullName); if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1] = returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3] = returnedData.Pattern_D9; results[0][4] = returnedData.Pattern_D3D2D4; } else { //Database entry //WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } else { //Database entry //WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } } } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { //Console.WriteLine(e.Message); Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } else if (ext.CompareTo(".doc") == 0 || ext.CompareTo(".xls") == 0 || ext.CompareTo(".ppt") == 0) { try { TextReader reader = new FilterReader(fInfo.FullName); String text = ""; using (reader) { text = reader.ReadToEnd(); } if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1] = returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3] = returnedData.Pattern_D9; results[0][4] = returnedData.Pattern_D3D2D4; } else { //Database entry //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (ccReturnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } else { //Database entry Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { //Console.WriteLine(e.Message); Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } else if (ext.CompareTo(".docx") == 0 || ext.CompareTo(".xlsx") == 0 || ext.CompareTo(".pptx") == 0 || ext.CompareTo(".odt") == 0 || ext.CompareTo(".ods") == 0 || ext.CompareTo(".odp") == 0) { try { String text = OfficeParser.Parser.Parse(fInfo.FullName, ext); if (text != null) { if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1] = returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3] = returnedData.Pattern_D9; results[0][4] = returnedData.Pattern_D3D2D4; } else { //Database entry //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (ccReturnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } else { //Database entry Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } } } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { //Console.WriteLine(e.Message); Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } else if (ext.CompareTo(".xml") == 0) { if(fInfo.Name.Equals("iTunes Music Library.xml")) return results; try { String text = XMLParser.Parser.ParseXMLtoString(fInfo.FullName); if (String.IsNullOrEmpty(text)) { //Log to Unscannable table. } if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1] = returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3] = returnedData.Pattern_D9; results[0][4] = returnedData.Pattern_D3D2D4; } else { //Database entry //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (ccReturnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } else { //Database entry Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } } } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { //Console.WriteLine(e.Message); Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } else if (ext.CompareTo(".pst") == 0) { try { com.pff.PSTFile pstFile = new com.pff.PSTFile(fInfo.FullName); String text = pstFile.processFolder(pstFile.getRootFolder()); com.pff.PSTFolder folder = pstFile.getRootFolder(); processFolder(folder); // Process the main folder, once we hit an email we will scan that email /* if (MainForm.socialSecurityMode) { returnedData = Engine.ScanForSocialSecurity(text); if (returnedData.RetCode > 0) { if (parentIsArchive) { results[0][0] = returnedData.Count; results[0][1] = returnedData.RetCode; results[0][2] = (int)returnedData.Priority; results[0][3] = returnedData.Pattern_D9; results[0][4] = returnedData.Pattern_D3D2D4; } //Database entry //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority); Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } if (MainForm.creditCardMode) { ccReturnedData = Engine.ScanForCreditCard(text); if (ccReturnedData.RetCode > 0) { if (parentIsArchive) { results[1][0] = ccReturnedData.Count; results[1][1] = ccReturnedData.RetCode; results[1][2] = (int)ccReturnedData.Priority; results[1][3] = ccReturnedData.VisaCount; results[1][4] = ccReturnedData.MC_Count; results[1][5] = ccReturnedData.AmexCount; results[1][6] = ccReturnedData.DisCount; results[1][7] = ccReturnedData.DinnCount; results[1][8] = ccReturnedData.JCB_Count; } //Database entry Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData); try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); } catch (InvalidOperationException) { } } } */ } catch (UnauthorizedAccessException u) { //File is encrypted: Add entry to Uncsannable table with reason: encrypted. Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString()); } catch (Exception e) { //Console.WriteLine(e.Message); Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); } } return results; }
/* A method that creates a File System Watcher for a given drive with the objective * of stopping the copying of any active protected file(s) on the system. This is done by * first watching for any new files being created that have a specific file extension, defined * in the extensions List. When created, it will raise an event and trigger the 'OnCreated' * method. From here, the file is copied to a temporary folder, the contents is read and a * PNG image is made containing the text using the 'IFilterTextReader'. The image is then * compared against all the 'Sample' images using the AForge.NET Imaging library, an * Artificial Intelligence library with image processing routines and filters. If the similarity * threshold 0.85% or above, it is considered as a copy attempt and the file is closed, deleted * and the event is logged with an administrator being informed. This works for most file formats. * A breakdown of each section, where necessary, is given below. * * A DISCLAIMER, 'IFilterTextReader' is under the The Code Project Open License (CPOL) 1.02. * My usage is in compliance with the license. All credits go to Kees van Spelde. * URL Link: https://github.com/Sicos1977/IFilterTextReader * * A DISCLAIMER, 'AForge.NET' Framework is published under LGPL-3.0-only or LGPL-3.0-or-later license. * I have taken the required steps to validate my use of this library. All credits go to AForge.NET * Framework. I do not take credit for any of it's code or methodology. * URL Link: http://www.aforgenet.com/framework/license.html */ public FileSystemWatcher Watcher(String drive) { List <String> extensions = new List <String> { "doc", "docx", "docm", "txt", "xlsx", "ppt", "pptx", "pdf" }; FileSystemWatcher watcher = new FileSystemWatcher(); watcher.Path = drive; watcher.IncludeSubdirectories = true; watcher.NotifyFilter = NotifyFilters.LastAccess | NotifyFilters.LastWrite | NotifyFilters.FileName | NotifyFilters.DirectoryName; watcher.Filter = "*.*"; watcher.Created += new FileSystemEventHandler(OnCreated); watcher.EnableRaisingEvents = true; void OnCreated(object source, FileSystemEventArgs e) { Thread.Sleep(1000); // Checks if the newly created file has an extension that matches one in the extension list. if (extensions.Contains(e.FullPath.Split('.').Last()) && !e.FullPath.Split('\\').Last().Contains("~$")) { try { if (!File.Exists("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last())) { // Makes a copy of the newly created file with '(ds)' at the front. This is to ensure we can read it due to the original file // potentially being used by another process. The file contents is then read using 'IFilter'. File.Copy(e.FullPath, "C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last()); TextReader reader = new FilterReader("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last()); using (reader) { // Removes any new lines and indentations, allowing for all outputs to be the same text format. String text = Regex.Replace(reader.ReadToEnd(), @"\t|\n|\r", ""); // Creates a PNG image of the text from the file. MakeImage(text); // Using the AForge.Imaging class, compares the newly created file contents image with all the images // stored in the 'Sample' folder, being all active protected files. If it gets a similarity threshold // of more than 0.85%, it kills the parent processes, logs the copy attempt and deletes the file. if (CompareImage()) { Task.Run(() => MessageBox.Show("An attempt to copy a protected file has been identified. This has been logged and an administrator has been notified.")); KillProcesses(); LogCopyAttempt(); Thread.Sleep(1000); File.Delete(e.FullPath); } } // Deletes the temporary copy made of the file and clears the 'Temp' folder. File.Delete("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last()); ClearTemp(); } } catch (Exception) { // If there is an error. //Task.Run(() => MessageBox.Show(ex.Source)); } } } /* Creates an image of the text from the recently created file, making one or more * based on how many sample (active protected file) images there are. This will create multiple * images based on the size dimensions of each sample image as only imagesas of the same size can * be compared together. More details are given below. * * Param: String - text to be drawn to image. * Return: None. */ void MakeImage(String text) { // Get all the PNG samples files from the 'Sample' folder. DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\"); FileInfo[] samples = di.GetFiles("*.png"); if (samples.Length > 0) { // For each sample image, create a new image with the new text using the sample's // size dimensions. Save the output in the 'Temp' folder. foreach (var sample in samples) { Bitmap img = new Bitmap(sample.FullName); Bitmap bitmap = new Bitmap(1, 1); Font font = new Font("Arial", 8, FontStyle.Regular, GraphicsUnit.Pixel); Graphics graphics = Graphics.FromImage(bitmap); bitmap = new Bitmap(bitmap, new Size(img.Width, img.Height)); graphics = Graphics.FromImage(bitmap); graphics.Clear(Color.White); graphics.SmoothingMode = SmoothingMode.AntiAlias; graphics.TextRenderingHint = TextRenderingHint.AntiAlias; graphics.DrawString(text, font, new SolidBrush(Color.FromArgb(0, 0, 0)), 0, 0); graphics.Flush(); graphics.Dispose(); bitmap.Save("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Temp\\" + sample.Name); img.Dispose(); bitmap.Dispose(); } } else { //MessageBox.Show("No samples."); } } /* Compares the temp images (being the newly created files) against the sample * images (active protected files) using AForge.Net Imaging. Details on how it works * are given below. * * Param: None. * Return: Boolean - if the temp image had a similarity rate of 85% or higher. */ Boolean CompareImage() { // Get all the sample PNG images from the Sample folder. DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\"); FileInfo[] samples = di.GetFiles("*.png"); // Cycle through each sample image to be tested against temp image(s). foreach (var sample in samples) { // Get all the temp PNG images from the Temp folder. di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Temp\\"); FileInfo[] temps = di.GetFiles("*.png"); // Cycle through each temp image to be compared against the sample image. foreach (var temp in temps) { // Loads both the sample and temp images as bitmap images. Bitmap imageOne = new Bitmap(sample.FullName); Bitmap imageTwo = new Bitmap(temp.FullName); // Allows for an almost double width size for either less than or more compared to the sample dimensions. if (imageOne.Width > imageTwo.Width / 1.75 && imageOne.Width < imageTwo.Width * 1.75) { Bitmap newBitmap1 = ChangePixelFormat(new Bitmap(imageOne), PixelFormat.Format24bppRgb); Bitmap newBitmap2 = ChangePixelFormat(new Bitmap(imageTwo), PixelFormat.Format24bppRgb); // Setup the AForge library, with the threshold being at 85% similarity. ExhaustiveTemplateMatching tm = new ExhaustiveTemplateMatching(0.85f); try { // Perform the comparison of the two images. var results = tm.ProcessImage(newBitmap1, newBitmap2); // If no results, indicates the similarity was below 85% and deemed as a non-copy attempt // and returns false. if (results.Length > 0) { // If one or more results come back, indicates the file(s) had a 85% or more similarity, // meaning it was a copy of a protected file(s) and returns true. //MessageBox.Show(results[0].Similarity.ToString()); imageOne.Dispose(); imageTwo.Dispose(); return(true); } } catch (Exception ex) { // In the case an error occurs. MessageBox.Show(ex.Message); } } imageOne.Dispose(); imageTwo.Dispose(); } } return(false); } /* Changes the pixel format of the input image. * * Param: Bitmap - input image. PixelFormat - new image format. * Return: new bitmap image. */ Bitmap ChangePixelFormat(Bitmap inputImage, PixelFormat newFormat) { return(inputImage.Clone(new Rectangle(0, 0, inputImage.Width, inputImage.Height), newFormat)); } /* Kills all the known processes used by protected files. * * Param: None. * Return: None. */ void KillProcesses() { var processess = Process.GetProcesses(); foreach (var process in processess) { if (process.ProcessName.Equals("WINWORD") || process.ProcessName.Equals("POWERPNT") || process.ProcessName.Equals("EXCEL") || process.ProcessName.Equals("notepad") || process.ProcessName.Equals("MSPUB")) { process.Kill(); } } } /* Logs the copy of protected file attempt to the server, which notifies an admin and logs * it in the database being the file name, user, date and time. * * Param: None. * Return: None. */ void LogCopyAttempt() { DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\"); String file = di.GetFiles().First().Name.Replace(".png", ""); using (WebClient client = new WebClient()) { ServicePointManager.ServerCertificateValidationCallback = (senderX, certificate, chain, sslPolicyErrors) => { return(true); }; client.UploadString(ConfigurationManager.AppSettings["logcopyattempt"], file + "|" + Username() + "|" + ComputerName()); } } /* Clears the Temp folder. This is done after the newly created file has been analysed. * * Param: None. * Return: None. */ void ClearTemp() { DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Temp\\"); foreach (FileInfo file in di.GetFiles()) { file.Delete(); } } return(watcher); }
/// <summary> /// Get the textual representation of the Binary Data of the document using IFILTER /// </summary> /// <returns>The text of the document or null if we could not parse the document into text</returns> public virtual string GetTextFromDocumentBinary() { /* * The default is to save the binary data to a temporary location and * use IFilter to extract the text. This should be a good catch-all for * all files that don't have a specific mechanism for extracting the * text of the file. */ // If we have no bytes then we can't do anything. if (Bytes == null || Bytes.Length == 0) { // Log the problem. log.Error("Tried to extract text from empty bytes for file " + Name); return(null); } // Get the original file name without the extension string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(Name); bool success = false; string newFileName = ""; try { // Now try to generate a new temporary file name that we don't have in the temporary directory for (int i = 0; i < 50; i++) { Random rand = new Random(); newFileName = "~/TemporaryFilesDirectory/" + fileNameWithoutExtension + Convert.ToString(rand.Next(100000)) + Extension; newFileName = HttpContext.Current.Server.MapPath(newFileName); // Try to see if this file exists if (!File.Exists(newFileName)) { success = true; break; } } if (!success) { // We failed. Log the problem. log.Error("Failed to create a unique file to extract data. Last file tried is " + newFileName); return(null); } } catch (Exception e) { // We failed. Log the problem. log.Error("Failed to create a unique file to extract data for file " + Name, e); return(null); } FileStream theFileStream = null; try { // Now try to write the bytes to the newly created file theFileStream = File.Create(newFileName); theFileStream.Write(Bytes, 0, Bytes.Length); theFileStream.Close(); } catch (Exception e) { // We failed to write the file. Log the problem log.Error("Failed to write bytes to new file " + newFileName, e); // Try to close the stream, in case it is still open and delete the file try { if (theFileStream != null) { theFileStream.Close(); } if (File.Exists(newFileName)) { File.Delete(newFileName); } } catch { // We don't do anything. This is a best effort close and delete } return(null); } string text = null; FilterReader myFilterReader = null; // Now try to extract the text for the file try { myFilterReader = new FilterReader(newFileName); text = myFilterReader.ReadToEnd(); myFilterReader.Close(); } catch (Exception e) { log.Error("Failed to parse text for file " + Name + " using IFilter", e); // Try to close the IFilter, in case it is still open try { if (myFilterReader != null) { myFilterReader.Close(); } } catch { // We don't do anything. This is a best effort close. } } try { // Try to delete the temporary file if (File.Exists(newFileName)) { File.Delete(newFileName); } } catch (Exception e) { log.Error("Failed to delete temporary file " + newFileName, e); } return(text); }
private void TimKiemThuMuc(DirectoryInfo dir, List <string> words) { if (!dir.Exists) { return; } //Debug.WriteLine($"Tìm thư mục {dir.FullName}"); lblTrangThai.ChangeTextAsync($"Tìm thư mục {dir.FullName}", Color.Blue); try { Document doc = new Document(); foreach (var file in dir.GetFiles()) { if (_cancel) { lblTrangThai.ChangeTextAsync("Đã hủy tìm kiếm", Color.Red); break; } if (file.IsMSWordFile()) { Debug.WriteLine($"Tìm File {file.FullName}"); try { string allTextLower = ""; if (file.Extension.ToLower() == ".doc") { TextReader reader = new FilterReader(file.FullName); using (reader) { allTextLower = reader.ReadToEnd().ToLower(); } } else { allTextLower = _msWordBehavior.ReadAllText(file.FullName).ToLower(); } foreach (var w in words) { if (w.Contains("+")) { var wspl = w.Split('+'); bool found = true; foreach (var word in wspl) { //if (!doc.FindWord(word) && !file.Name.ToLower().Contains(word)) //{ // found = false; // break; //} if (!allTextLower.Contains(word.ToLower()) && !file.Name.ToLower().Contains(word)) { found = false; break; } } if (found) { lock (_lstResult) { _lstResult.Add(file); fileInfoBindingSource.Add(file); }; } } else { //if (doc.FindWord(w) || file.Name.ToLower().Contains(w)) //{ // lock (_lstResult) // { // _lstResult.Add(file); // fileInfoBindingSource.Add(file); // }; //} if (allTextLower.Contains(w.ToLower()) || file.Name.ToLower().Contains(w)) { lock (_lstResult) { _lstResult.Add(file); fileInfoBindingSource.Add(file); }; } } } } catch (Exception ex) { Debug.WriteLine($"Lỗi: {file.FullName}\r\n{ex.Message}"); Log.WriteLog($"Lỗi đọc tệp {file.FullName}\r\n{ex.Message}"); } } } } catch { } foreach (var d in dir.GetDirectories()) { //ThreadPool.QueueUserWorkItem(TimKiemThuMuc, d); try { if (_cancel) { lblTrangThai.ChangeTextAsync("Đã hủy tìm kiếm", Color.Red); break; } TimKiemThuMuc(d, words); } catch { } } }
/// <summary> /// Returns all the text that is inside the <paramref name="fileName"/> /// </summary> /// <param name="fileName">The file to read</param> /// <returns></returns> public string GetAllText(string fileName) { using (var reader = new FilterReader(fileName)) return(reader.ReadToEnd()); }