public static void Determine_Parser(System.IO.FileInfo fi, uint level)
        {
            string ext = fi.Extension.ToString();
            if (ext.CompareTo(".txt") == 0)
            {
                try
                {
                    StreamReader textFile = new StreamReader(fi.FullName);
                    string text = textFile.ReadToEnd();
                    textFile.Close();
                   // MessageBox.Show(fi.FullName);
                    int retCode, count = 0;
                    retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                    if (retCode > 0)
                        WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".rtf") == 0)
            {
                try
                {
                    RichTextBox rtb = new RichTextBox();
                    rtb.Rtf = System.IO.File.ReadAllText(fi.FullName);
                    string text = rtb.Text;
                    //Console.WriteLine(s);
                    int retCode, count = 0;
                    retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                    if (retCode > 0)
                        WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                    //Console.WriteLine("\t" + text);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".pdf") == 0)
            {
                try
                {
                    String text = PDFParser.Parser.ParsePDFtoString(fi.FullName);
                    int retCode, count = 0;
                    retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                    if (retCode > 0)
                    WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                    //Console.WriteLine("\t" + text);
                }
                catch(Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".doc") == 0 || ext.CompareTo(".xls") == 0 || ext.CompareTo(".ppt") == 0)
            {
                try
                {
                    TextReader reader = new FilterReader(fi.FullName);
                    String text = "";
                    using (reader) { text = reader.ReadToEnd(); }
                    int retCode, count = 0;
                    retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                    if (retCode > 0)
                        WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                    //Console.WriteLine("\t" + text);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".docx") == 0)
            {
                try
                {
                    String text = OfficeParser.Parser.docxParser(fi.FullName);
                    if (text != null)
                    {
                        int retCode, count = 0;
                        retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                        if (retCode > 0)
                            WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                    }
                    //Console.WriteLine("\t" + text);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }

            else if (ext.CompareTo(".xlsx") == 0)
            {
                try
                {
                    String text = OfficeParser.Parser.xlsxParser(fi.FullName);
                    if (text != null)
                    {
                        int retCode, count = 0;
                        retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                        if (retCode > 0)
                            WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                        //Console.WriteLine("\t" + text);
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".pptx") == 0)
            {
                try
                {
                    String text = OfficeParser.Parser.pptxParser(fi.FullName);
                    if (text != null)
                    {
                        int retCode, count = 0;
                        retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                        if (retCode > 0)
                            WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                        //Console.WriteLine("\t" + text);
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".odt") == 0 || ext.CompareTo(".ods") == 0 || ext.CompareTo(".odp") == 0)
            {
                try
                {
                    String text = OfficeParser.Parser.openOfficeParser(fi.FullName);
                    if (text != null)
                    {
                        int retCode, count = 0;
                        retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                        if (retCode > 0)
                            WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                        //Console.WriteLine("\t" + text);
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".xml") == 0)
            {
                try
                {
                    String text = XMLParser.Parser.ParseXMLtoString(fi.FullName);
                    int retCode, count = 0;
                    retCode = UnsafeNativeMethod.ScanAnsi(text.ToCharArray(), text.Length, ref count);
                    if (retCode > 0)
                        WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                    //Console.WriteLine("\t" + text);
                }
                catch(Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
            else if (ext.CompareTo(".pst") == 0)
            {

            }
            else if (ext.CompareTo(".zip") == 0 || ext.CompareTo(".7z") == 0 || ext.CompareTo(".xz") == 0 || ext.CompareTo(".bzip2") == 0 ||
                ext.CompareTo(".gzip") == 0 || ext.CompareTo(".tar") == 0 || ext.CompareTo(".wim") == 0 || ext.CompareTo(".arj") == 0 ||
                ext.CompareTo(".cab") == 0 || ext.CompareTo(".chm") == 0 || ext.CompareTo(".cpio") == 0 || ext.CompareTo(".cramfs") == 0 ||
                ext.CompareTo(".deb") == 0 || ext.CompareTo(".fat") == 0 || ext.CompareTo(".hfs") == 0 || ext.CompareTo(".iso") == 0 ||
                ext.CompareTo(".lzh") == 0 || ext.CompareTo(".lzma") == 0 || ext.CompareTo(".mbr") == 0 || ext.CompareTo(".msi") == 0 ||
                ext.CompareTo(".nsis") == 0 || ext.CompareTo(".ntfs") == 0 || ext.CompareTo(".rar") == 0 || ext.CompareTo(".rpm") == 0 ||
                ext.CompareTo(".squashfs") == 0 || ext.CompareTo(".udf") == 0 || ext.CompareTo(".vhd") == 0 || ext.CompareTo(".xar") == 0 ||
                ext.CompareTo(".z") == 0)
            {
                //extract code
                SevenZipExtractor extractor = null;
                try
                {
                    //path to the systems temporary folder
                    String tempFolderPath = Path.GetTempPath();
                    tempFolderPath += "temp_dir\\";
                    //create a directory to dump everything into inside the temp folder
                    Directory.CreateDirectory(tempFolderPath);

                    //set the path of the 7z.dll (it needs to be in the debug folder)
                    SevenZipExtractor.SetLibraryPath("7z.dll");
                    extractor = new SevenZipExtractor(fi.FullName);

                    //Extract the entire file
                    extractor.ExtractArchive(tempFolderPath);
                    extractor.Dispose();

                    //Count how many files in archive
                    int count = Directory.GetFiles(tempFolderPath, "*.*", SearchOption.AllDirectories).Length;

                    // traverse files
                    string[] fileEntries = Directory.GetFiles(tempFolderPath);
                    foreach (string fileName in fileEntries)
                    {
                        //Console.WriteLine("IN ARCHIVE: " + fileName);
                    }

                    //delete the temporary directory we created at the beginning
                    Directory.Delete(tempFolderPath, true);
                }
                catch (Exception e)
                {
                    //get rid of the object because it is unmanaged
                    extractor.Dispose();
                    Console.WriteLine(e.Message);
                }
            }
        }
Exemplo n.º 2
0
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            if (propertyBag.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            string extension = MapContentTypeToExtension(propertyBag.ContentType);

            if (extension.IsNullOrEmpty())
            {
                return;
            }

            propertyBag.Title = propertyBag.Step.Uri.PathAndQuery;
            using (TempFile temp = new TempFile())
            {
                temp.FileName += "." + extension;
                File.WriteAllBytes(temp.FileName, propertyBag.Response);
                using (FilterReader filterReader = new FilterReader(temp.FileName))
                {
                    string content = filterReader.ReadToEnd();
                    propertyBag.Text = content.Trim();
                }
            }
        }
Exemplo n.º 3
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="filename"></param>
        /// <returns></returns>
        public static string Parse(string filename)
        {
            IFilter filter = null;

            try
            {
                try
                {
                    FileInfo fileInfo = new FileInfo(filename);
                    using (var reader = new FilterReader(filename, fileInfo.Extension, filterReaderTimeout: FilterReaderTimeout.TimeoutWithException, timeout: 60 * 1000))
                    {
                        string text = reader.ReadToEnd();
                    }
                }
                catch (Exception)
                {
                }
                filter = loadIFilter(filename);

                return(ExtractText(filter));
            }
            catch { throw; }
            finally
            {
                if (filter != null)
                {
                    Marshal.ReleaseComObject(filter);
                }
            }
        }
Exemplo n.º 4
0
        public static List <string> ExtractDocParagraphs(Stream readStream)
        {
            //write the stream to a temp file
            var tmp      = Path.GetTempFileName();
            var fileName = tmp + ".doc";

            File.Move(tmp, fileName);
            using (var fileStream = File.Create(fileName))
            {
                readStream.Seek(0, SeekOrigin.Begin);
                readStream.CopyTo(fileStream);
                fileStream.Close();
            }

            var results = new List <string>();

            using (var reader = new FilterReader(fileName))
            {
                using (var stringReader = new StringReader(reader.ReadToEnd()))
                {
                    var nextLine = stringReader.ReadLine();
                    while (nextLine != null)
                    {
                        if (!string.IsNullOrWhiteSpace(nextLine))
                        {
                            results.Add(FormatParagraphText(nextLine));
                        }

                        nextLine = stringReader.ReadLine();
                    }
                }
            }
            File.Delete(fileName);
            return(results);
        }
Exemplo n.º 5
0
        public void ExtractText(string inpufFileName, string outputFileName)
        {
#if false
            IFilterTextReader.FilterReader reader = new FilterReader(inpufFileName);
            var data = reader.ReadToEnd();
            using (var writer = new StreamWriter(outputFileName, false, System.Text.Encoding.UTF8))
            {
                writer.Write(data);
            }
#else
            PDDocument doc = null;
            try
            {
                doc = PDDocument.load(inpufFileName);
                PDFTextStripper stripper = new PDFTextStripper();
                using (var writer = new StreamWriter(outputFileName, false, System.Text.Encoding.UTF8))
                {
                    writer.Write(stripper.getText(doc));
                }
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
#endif
        }
Exemplo n.º 6
0
 // Searches the contents of filtered files. Does not care about exceptions.
 public void Search()
 {
     foreach (String dir in Directories)
     {
         var    fileInfo = new FileInfo(dir);
         string fileContents;
         if (fileInfo.Length < MAX_FILE_SIZE)
         {
             if (IsOfficeExtension(fileInfo.Extension))
             {
                 try {
                     var reader = new FilterReader(fileInfo.FullName);
                     fileContents = reader.ReadToEnd();
                     CheckForKeywords(fileContents, fileInfo);
                 } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName); }
             }
             else
             {
                 //normal file
                 try {
                     CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo);
                 } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName); }
             }
         }
         else
         {
             Console.WriteLine("[-] File exceeds 1MB file size {0}", fileInfo.FullName);
         }
     }
 }
        public string GetContent()
        {
            try
            {
                var filter = FilterReader.GetFilter(
                    ClientState.Current.Storage.ResolvePhysicalFilename(".", document.StreamName),
                    Path.GetExtension(document.Filename));

                // If filter is null that means we have no filter for given extension
                if (filter != null)
                {
                    using (FilterReader reader = new FilterReader(filter))
                        return(reader.ReadToEnd());
                }
                else
                {
                    Logger.Debug("Unable to find filter for file {0}", LogSource.Search, document.Filename);

                    return(String.Empty);
                }
            }
            catch (Exception ex)
            {
                Logger.Debug("An error occured while trying to find filter for file {0}. Exception = {1}", LogSource.Search, document.Filename, ex);

                return(String.Empty);
            }
        }
Exemplo n.º 8
0
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            if (propertyBag.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            string extension = MapContentTypeToExtension(propertyBag.ContentType);

            if (extension.IsNullOrEmpty())
            {
                return;
            }

            propertyBag.Title = propertyBag.Step.Uri.PathAndQuery;
            using (TempFile temp = new TempFile())
            {
                temp.FileName += "." + extension;
                using (FileStream fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000))
                    using (Stream input = propertyBag.GetResponse())
                    {
                        input.CopyToStream(fs);
                    }

                using (FilterReader filterReader = new FilterReader(temp.FileName))
                {
                    string content = filterReader.ReadToEnd();
                    propertyBag.Text = content.Trim();
                }
            }
        }
Exemplo n.º 9
0
 /* Performs a image snapshop of the file before it is opened to use as a sample for
  * the A.I image comparison when attempting to detect any copies of the file. This is done
  * by first reading the contents of the actual file, using the NuGet package 'IFilterTextReader',
  * a C# TextReader that gets text from different file formats through the IFilter interface.
  * Using the reader, obtains all the text within the document and removing all new lines and indentations
  * to ensure each document is read the same. Then using a Bitmap, Font and Graphics objects to
  * draw the contents onto a Bitmap image. Finally, the Bitmap image is saved as a PNG to the
  * 'Samples' folder for future analysis if needed.
  *
  * A DISCLAIMER, 'IFilterTextReader' is under the The Code Project Open License (CPOL) 1.02.
  * My usage is in compliance with the license. All credits go to Kees van Spelde.
  * URL Link: https://github.com/Sicos1977/IFilterTextReader
  */
 public void SnapshotFile(String filePath)
 {
     try
     {
         TextReader reader = new FilterReader(filePath);
         using (reader)
         {
             var      text     = Regex.Replace(reader.ReadToEnd(), @"\t|\n|\r", "");
             Bitmap   bitmap   = new Bitmap(1, 1);
             Font     font     = new Font("Arial", 8, FontStyle.Regular, GraphicsUnit.Pixel);
             Graphics graphics = Graphics.FromImage(bitmap);
             int      width    = (int)graphics.MeasureString(text, font).Width;
             int      height   = (int)graphics.MeasureString(text, font).Height;
             bitmap   = new Bitmap(bitmap, new Size(width, height));
             graphics = Graphics.FromImage(bitmap);
             graphics.Clear(Color.White);
             graphics.SmoothingMode     = SmoothingMode.AntiAlias;
             graphics.TextRenderingHint = TextRenderingHint.AntiAlias;
             graphics.DrawString(text, font, new SolidBrush(Color.FromArgb(0, 0, 0)), 0, 0);
             graphics.Flush();
             graphics.Dispose();
             bitmap.Save("C:\\Users\\" + utilities.Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\" + filePath.Split('\\').Last() + ".png");
             bitmap.Dispose();
         }
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message);
     }
 }
Exemplo n.º 10
0
        public override string Extract(Stream stream)
        {
            //IFilter
            var target = new FilterReader(GetBytesFromStream(stream), ".xls");

            target.Init();
            return(target.ReadToEnd());
        }
Exemplo n.º 11
0
        public static DataForIndex CreateDocumentIndex(FilterReader reader, string fileName)
        {
            var dfi = new DataForIndex();

            dfi.ID            = new Random().Next(int.MaxValue);
            dfi.Label         = "Docs";
            dfi.FileExtension = Path.GetExtension(fileName);
            dfi.FileName      = fileName;
            dfi.Body          = reader.ReadToEnd();
            return(dfi);
        }
Exemplo n.º 12
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            try
            {
                //extract text using IFilter
                var target = new FilterReader(GetBytesFromStream(stream), ".pdf");
                target.Init();
                return(target.ReadToEnd());
            }
            catch (OutOfMemoryException ex)
            {
                Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge,
                                    "Pdf text extract failed with out of memory exception. " + ex,
                                    properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });

                return(string.Empty);
            }
            catch (Exception ex)
            {
                Logger.WriteWarning(EventId.Indexing.IFilterError, "Pdf IFilter error: " + ex.Message);
            }

            //fallback to the other mechanism in case the pdf IFilter is missing
            var text = new StringBuilder();

            try
            {
                var pdfReader = new PdfReader(stream);
                for (var page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    // extract text using the old version (4.1.6) of iTextSharp
                    var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page));
                    if (string.IsNullOrEmpty(pageText))
                    {
                        continue;
                    }

                    text.Append(pageText);
                }
            }
            catch (OutOfMemoryException ex)
            {
                Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge,
                                    "Pdf text extract failed with out of memory exception. " + ex,
                                    properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });
            }

            return(text.ToString());
        }
Exemplo n.º 13
0
        /// <summary>
        /// Returns an array with all the matches that are found with the give <see cref="regularExpression"/> regular expression
        /// </summary>
        /// <param name="fileName">The file to inspect</param>
        /// <param name="regularExpression">The regular expression to use</param>
        /// <param name="ignoreCase">Set to false to search case sensitive</param>
        /// <returns></returns>
        public string[] GetRegexMatchesFromFile(string fileName, string regularExpression, bool ignoreCase = true)
        {
            var regex  = new Regex(regularExpression, ignoreCase ? RegexOptions.IgnoreCase : RegexOptions.None);
            var result = new List <string>();

            using (var reader = new FilterReader(fileName))
            {
                var text = reader.ReadToEnd();
                result.AddRange(from Match match in regex.Matches(text) select match.ToString());
            }

            return(result.ToArray());
        }
Exemplo n.º 14
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            try
            {
                //IFilter
                var target = new FilterReader(GetBytesFromStream(stream), ".msg");
                target.Init();
                return(target.ReadToEnd());
            }
            catch (Exception ex)
            {
                Logger.WriteWarning(EventId.Indexing.IFilterError, "Msg IFilter error: " + ex.Message);
            }

            return(string.Empty);
        }
Exemplo n.º 15
0
        public override string Extract(System.IO.Stream stream)
        {
            try
            {
                //IFilter
                var target = new FilterReader(GetBytesFromStream(stream), ".doc");
                target.Init();
                return(target.ReadToEnd());
            }
            catch (Exception ex)
            {
                Logger.WriteWarning(EventId.Indexing.IFilterError, "Doc IFilter error: " + ex.Message);
            }

            return(string.Empty);
        }
Exemplo n.º 16
0
        private void btnGetText_Click(object sender, EventArgs e)
        {
            string         dbt_wfile = "";
            OpenFileDialog dlg       = new OpenFileDialog();

            if (dlg.ShowDialog() == DialogResult.OK)
            {
                dbt_wfile = dlg.FileName;
                TextReader reader = new FilterReader(dbt_wfile);
                using (reader)
                {
                    MessageBox.Show(reader.ReadToEnd());
                }
                reader.Close();
            }
        }
Exemplo n.º 17
0
        // Searches the contents of filtered files. Does not care about exceptions.
        public void Search()
        {
            foreach (String dir in Directories)
            {
                try {
                    bool usingLegacyPathHandling = false;
                    AppContext.TryGetSwitch("Switch.System.IO.UseLegacyPathHandling", out usingLegacyPathHandling);
                    var dirToCheck = dir;
                    if (!usingLegacyPathHandling)
                    {
                        dirToCheck = ConvertToNTPath(dir);
                    }
                    var fileInfo = new FileInfo(dirToCheck);

                    string fileContents;
                    if (Convert.ToUInt64(fileInfo.Length) < 1024 * this.MAX_FILE_SIZE)
                    {
                        if (IsOfficeExtension(fileInfo.Extension))
                        {
                            try {
                                var reader = new FilterReader(fileInfo.FullName);
                                fileContents = reader.ReadToEnd();
                                CheckForKeywords(fileContents, fileInfo);
                            } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName)); }
                        }
                        else
                        {
                            //normal file
                            try {
                                CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo);
                            } catch (Exception e) {
                                Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName));
                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine("[-] File exceeds max file size {0}", PrettyPrintNTPath(fileInfo.FullName));
                    }
                } catch (PathTooLongException ex) {
                    Console.WriteLine("[-] Path {0} is too long. Skipping.", dir);
                    continue;
                } catch (Exception e) {
                    Console.WriteLine("[-] Some unknown exception {0} occured while processing {1}. Continuing with the next directory.", e.Message, dir);
                }
            }
        }
Exemplo n.º 18
0
 private void ifilt(bool asEMBED)
 {
     _selected_file = string.Empty;
     if (openFileDialog.ShowDialog() == DialogResult.OK)
     {
         try
         {
             if (asEMBED)
             {
                 _selected_file = this.openFileDialog.FileName;
                 webBrowser.Navigate(_selected_file);
             }
             else
             {
                 try
                 {
                     TextReader reader = new FilterReader(openFileDialog.FileName);
                     using (reader)
                     {
                         string tmp_file = Path.Combine(Environment.GetEnvironmentVariable("TEMP"), DateTime.Now.Ticks.ToString() + ".html");
                         using (StreamWriter sw = new StreamWriter(tmp_file, false, DefaultEncoding))
                         {
                             sw.Write(Regex.Replace(reader.ReadToEnd().Replace(Environment.NewLine, "<br>"), "\n", "<BR>" + Environment.NewLine, RegexOptions.Singleline));
                             sw.Close();
                         }
                         ////object missing = System.Reflection.Missing.Value;
                         ////object tmp = (object)tmp_file;
                         _selected_file = tmp_file;
                         webBrowser.Navigate(tmp_file);
                         ////this.axWebBrowser1.Navigate2(ref tmp, ref missing, ref missing, ref missing, ref missing);
                     }
                 }
                 catch (ArgumentException)
                 {
                     MessageBox.Show("Данный тип файлов не поддерживается");
                 }
             }
         }
         catch (Exception ex)
         {
             MessageBox.Show(ex.Message, "Error");
             return;
         }
     }
     SelectFile.Invoke(_selected_file);
     ////EditModeOn();
 }
Exemplo n.º 19
0
        // Searches the contents of filtered files. Does not care about exceptions.
        public void Search()
        {
            foreach (String dir in Directories)
            {
                try {
                    var fileInfo = new FileInfo(ConvertToNTPath(dir));

                    string fileContents;
                    if (Convert.ToUInt64(fileInfo.Length) < 1024 * this.MAX_FILE_SIZE)
                    {
                        if (IsOfficeExtension(fileInfo.Extension))
                        {
                            try {
                                var reader = new FilterReader(fileInfo.FullName);
                                fileContents = reader.ReadToEnd();
                                CheckForKeywords(fileContents, fileInfo);
                            } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName)); }
                        }
                        else
                        {
                            //normal file
                            try {
                                CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo);
                            } catch (Exception e) {
                                Console.WriteLine("[-] Could not read contents of {0}", PrettyPrintNTPath(fileInfo.FullName));
                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine("[-] File exceeds max file size {0}", PrettyPrintNTPath(fileInfo.FullName));
                    }
                } catch (PathTooLongException ex) {
                    Console.WriteLine("[-] Path {0} is too long. Skipping.", dir);
                    continue;
                } catch (Exception e) {
                    Console.WriteLine("[-] Some unknown exception {0} occured while processing {1}. Continuing with the next directory.", e.Message, dir);
                }
            }
            if (ossflag)
            {
                Console.WriteLine("\n[*] Now Create Zipfile and Upload Zipfile to aliyunOSS");
                Zipfile(Uploadfiles, this.bucketName, this.accessKeyId, this.accessKeySecret, this.endpoint);
            }
        }
Exemplo n.º 20
0
        private List <string> GetTextFromPPT(string path)
        {
            List <string> text = new List <string>();
            string        line;

            if (!File.Exists(path))
            {
                MessageBox.Show("Please give the valid path of the PowerPont file.");
            }
            TextReader reader = new FilterReader(path);

            using (reader)
            {
                line = reader.ReadToEnd();
                text.Add(line);
            }
            return(text);
        }
Exemplo n.º 21
0
        private int ConvertFile(string sourcefile)
        {
            if (!File.Exists(sourcefile))
            {
                return(1);
            }
            try
            {
                TextReader reader = new FilterReader(sourcefile);
                using (reader)
                {
                    //                 char[] buffer = new char[0x5000];
                    //                 reader.Read(buffer, 0, 0x5000);
                    //                 string context = new string(buffer);
                    string context = reader.ReadToEnd();

                    calcWords(context);

                    context = Regex.Replace(context, "\n\r", " ", RegexOptions.IgnoreCase);

                    try
                    {
                        string name = GetFileName(sourcefile);
                        using (StreamWriter writer = new StreamWriter((outpath + @"\" + name + ".txt").Replace(@"\\", @"\"), false, Encoding.Default))
                        {
                            writer.Write(context);
                            writer.Close();
                        }
                        reader.Close();

                        return(0);
                    }
                    catch (Exception exception)
                    {
                        reader.Close();
                        return(3);
                    }
                }
            }
            catch (Exception e)
            {
                return(2);
            }
        }
Exemplo n.º 22
0
        public static int ConvertFile()
        {
            if (!File.Exists(sourcefile))
            {
                return((int)OutStatus.FileLoss);
            }
            try
            {
                TextReader reader = new FilterReader(sourcefile);
                using (reader)
                {
                    //                 char[] buffer = new char[0x5000];
                    //                 reader.Read(buffer, 0, 0x5000);
                    //                 string context = new string(buffer);
                    string context = reader.ReadToEnd();
                    context = Regex.Replace(context, "\n\r", " ", RegexOptions.IgnoreCase);

                    try
                    {
                        string txtfile = (outtxtpath + @"\" + fileid + ".txt").Replace(@"\\", @"\");
                        using (StreamWriter writer = new StreamWriter(txtfile, false, Encoding.Default))
                        {
                            writer.Write(context);
                            writer.Close();
                        }
                        reader.Close();
                        ExecuteRegexTxt(txtfile);
                        return((int)OutStatus.ConvertSuccess);
                    }
                    catch (Exception exception)
                    {
                        Console.WriteLine("保存txt文件发生异常" + exception);
                        return((int)OutStatus.TotxtFailed);
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine("打开文件失败" + e);
                return((int)OutStatus.TotxtFailed);
            }
        }
    public void   WordDocumentRetrieve()
    {
        SqlDataAdapter adp1 = new SqlDataAdapter("Select Max(ID) From FileUpload", sqlCon);
        DataSet        ds1  = new DataSet();

        adp1.Fill(ds1, "FileUpload");
        id = Convert.ToInt32(ds1.Tables["FileUpload"].Rows[0][0].ToString());

        SqlDataAdapter adp = new SqlDataAdapter("Select UpFile1 From FileUpload Where ID=" + id, sqlCon);
        DataSet        ds  = new DataSet();

        adp.Fill(ds, "FileUpload");
        name = ds.Tables["FileUpload"].Rows[0]["UpFile1"].ToString();
        if (name != "")
        {
            string     temp   = null;
            var        a      = new ArrayList();
            TextReader reader = new FilterReader(Server.MapPath("~/Docs/" + name));
            using (reader)
            {
                temp = reader.ReadToEnd();
            }

            if (temp.Length >= 1000)
            {
                SqlCommand Comm = sqlCon.CreateCommand();
                Comm.CommandText = "Update FileUpload SET FileData='" + temp.Substring(0, 999).Trim() + "' Where ID=" + id;
                Comm.Connection  = sqlCon;
                Comm.ExecuteNonQuery();
                sqlCon.Close();
            }
            else
            {
                SqlCommand Comm = sqlCon.CreateCommand();
                Comm.CommandText = "Update FileUpload SET FileData='" + temp + "' Where ID=" + id;
                Comm.Connection  = sqlCon;
                Comm.ExecuteNonQuery();
                sqlCon.Close();
            }
        }
    }
Exemplo n.º 24
0
        // Searches the contents of filtered files. Does not care about exceptions.
        public void Search()
        {
            foreach (String dir in Directories)
            {
                try {
                    var NTdir    = @"\\?\" + dir;
                    var fileInfo = new FileInfo(NTdir);

                    string fileContents;
                    if (fileInfo.Length < MAX_FILE_SIZE)
                    {
                        if (IsOfficeExtension(fileInfo.Extension))
                        {
                            try {
                                var reader = new FilterReader(fileInfo.FullName);
                                fileContents = reader.ReadToEnd();
                                CheckForKeywords(fileContents, fileInfo);
                            } catch (Exception e) { Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName.Replace(@"\\?\", "")); }
                        }
                        else
                        {
                            //normal file
                            try {
                                CheckForKeywords(File.ReadAllText(fileInfo.FullName), fileInfo);
                            } catch (Exception e) {
                                Console.WriteLine("[-] Could not read contents of {0}", fileInfo.FullName.Replace(@"\\?\", ""));
                            }
                        }
                    }
                    else
                    {
                        Console.WriteLine("[-] File exceeds 1MB file size {0}", fileInfo.FullName.Replace(@"\\?\", ""));
                    }
                } catch (PathTooLongException ex) {
                    Console.WriteLine("[-] Path {0} is too long. Skipping.", dir);
                    continue;
                } catch (Exception e) {
                    Console.WriteLine("[-] Some unknown exception {0} occured while processing {1}. Continuing with the next directory.", e.Message, dir);
                }
            }
        }
Exemplo n.º 25
0
        private static void TryReadFile(FileInfo file)
        {
            var          stream = file.OpenRead();
            FilterReader reader = null;

            try
            {
                FilterReaderOptions filterReaderOptions = new FilterReaderOptions();
                reader = new FilterReader(stream, file.Extension, filterReaderOptions);
                var result = reader.ReadToEnd();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            finally
            {
                reader?.Close();
                stream?.Close();
            }
        }
Exemplo n.º 26
0
        static void Main(string[] args)
        {
            try
            {
                var fileName = @"C:\Sicos1977.doc";
                var file     = new FileInfo(fileName);

                Console.WriteLine($"Reading {file.Name}");
                var reader = new FilterReader(fileName);
                var txt    = reader.ReadToEnd();

                // Try and read the stream from the file
                TryReadFile(file);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }

            Console.ReadKey();
        }
Exemplo n.º 27
0
        /// <summary>
        /// Extract the contents of the given file as plain text.
        /// </summary>
        /// <param name="filePath">The physical path of the file that contains the text to be extracted.</param>
        /// <returns>The extracted text.</returns>
        public string ExtractTextFromFile(string filePath)
        {
            string extractedText = String.Empty;

            string[] allowedExtensionsArray = this.AllowedExtensions.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
            if (allowedExtensionsArray.Contains(Path.GetExtension(filePath)))
            {
                try
                {
                    using (FilterReader filterReader = new FilterReader(filePath))
                    {
                        extractedText = filterReader.ReadToEnd();
                    }
                }
                catch (ArgumentException ex)
                {
                    // An argument exception usually happens when the IFilter for the file could not be found.
                    // This is a non-critical error, so we're just logging it.
                    Logger.Error(string.Format("Unable to extract text for {0}.", filePath), ex);
                }
            }
            return(extractedText);
        }
Exemplo n.º 28
0
        /*
         * Checks the file extension and calls the corresponding parser.
         * Gets the results returned from the parser and passes it to the engine.
         * Passes the results to the Database
         *
         * fInfo: Incoming FileInfo object to be processed
         * parentIsArchive: Whether or not the file is in an archive
         * returns: an int[] with the results from the call to the engine  (results[0] = Count, results[1] = RetCode)
         */
        public static int[][] ProcessNonArchive(Delimon.Win32.IO.FileInfo fInfo, bool parentIsArchive)
        {
            int[][] results = { new int[5], new int[9] };

            string ext = Path.GetExtension(fInfo.FullName);

            ScanData returnedData;
            CreditData ccReturnedData;

            if (ext.CompareTo(".txt") == 0 || ext.CompareTo(".csv") == 0)
            {
                try
                {
                    StreamReader textFile = new StreamReader(fInfo.FullName);
                    string text = textFile.ReadToEnd();
                    textFile.Close();

                    if (MainForm.socialSecurityMode)
                    {
                        returnedData = Engine.ScanForSocialSecurity(text);

                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[0][0] = returnedData.Count;
                                results[0][1]= returnedData.RetCode;
                                results[0][2] = (int)returnedData.Priority;
                                results[0][3]= returnedData.Pattern_D9;
                                results[0][4]= returnedData.Pattern_D3D2D4;
                            }
                            else
                            {
                                //Database entry goes here.
                                //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                                Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }

                            }
                        }
                    }
                    if (MainForm.creditCardMode)
                    {
                        ccReturnedData = Engine.ScanForCreditCard(text);

                        if (ccReturnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[1][0] = ccReturnedData.Count;
                                results[1][1] = ccReturnedData.RetCode;
                                results[1][2] = (int)ccReturnedData.Priority;
                                results[1][3] = ccReturnedData.VisaCount;
                                results[1][4] = ccReturnedData.MC_Count;
                                results[1][5] = ccReturnedData.AmexCount;
                                results[1][6] = ccReturnedData.DisCount;
                                results[1][7] = ccReturnedData.DinnCount;
                                results[1][8] = ccReturnedData.JCB_Count;

                            }
                            else
                            {
                                //Database entry goes here.
                                //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " +ccReturnedData.Priority);
                                Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                    }
                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e) { Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString()); }
            }
            else if (ext.CompareTo(".rtf") == 0)
            {
                try
                {
                    RichTextBox rtb = new RichTextBox();
                    rtb.Rtf = System.IO.File.ReadAllText(fInfo.FullName);
                    string text = rtb.Text;

                    if (MainForm.socialSecurityMode)
                    {
                        returnedData = Engine.ScanForSocialSecurity(text);

                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[0][0] = returnedData.Count;
                                results[0][1] = returnedData.RetCode;
                                results[0][2] = (int)returnedData.Priority;
                                results[0][3] = returnedData.Pattern_D9;
                                results[0][4] = returnedData.Pattern_D3D2D4;
                            }
                            else
                            {
                                //Database entry
                                //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                                Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                        if (MainForm.creditCardMode)
                        {
                            ccReturnedData = Engine.ScanForCreditCard(text);

                            if (returnedData.RetCode > 0)
                            {
                                if (parentIsArchive)
                                {
                                    results[1][0] = ccReturnedData.Count;
                                    results[1][1] = ccReturnedData.RetCode;
                                    results[1][2] = (int)ccReturnedData.Priority;
                                    results[1][3] = ccReturnedData.VisaCount;
                                    results[1][4] = ccReturnedData.MC_Count;
                                    results[1][5] = ccReturnedData.AmexCount;
                                    results[1][6] = ccReturnedData.DisCount;
                                    results[1][7] = ccReturnedData.DinnCount;
                                    results[1][8] = ccReturnedData.JCB_Count;
                                }
                                else
                                {
                                    //Database entry
                                    //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                                    Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData);
                                    try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                    catch (InvalidOperationException) { }
                                }
                            }
                        }
                    }
                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString());
                }
            }
            else if (ext.CompareTo(".pdf") == 0)
            {
                try
                {

                    string text = PDFParser.Parser.ParsePDFtoString(fInfo.FullName);

                    if (MainForm.socialSecurityMode)
                    {
                        returnedData = Engine.ScanForSocialSecurity(text);
                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[0][0] = returnedData.Count;
                                results[0][1] = returnedData.RetCode;
                                results[0][2] = (int)returnedData.Priority;
                                results[0][3] = returnedData.Pattern_D9;
                                results[0][4] = returnedData.Pattern_D3D2D4;
                            }
                            else
                            {
                                //Database entry
                                //WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                                Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                            }
                    }
                    if (MainForm.creditCardMode)
                    {
                        ccReturnedData = Engine.ScanForCreditCard(text);

                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[1][0] = ccReturnedData.Count;
                                results[1][1] = ccReturnedData.RetCode;
                                results[1][2] = (int)ccReturnedData.Priority;
                                results[1][3] = ccReturnedData.VisaCount;
                                results[1][4] = ccReturnedData.MC_Count;
                                results[1][5] = ccReturnedData.AmexCount;
                                results[1][6] = ccReturnedData.DisCount;
                                results[1][7] = ccReturnedData.DinnCount;
                                results[1][8] = ccReturnedData.JCB_Count;
                            }
                            else
                            {
                                //Database entry
                                //WriteToLog(fi.Name, fi.FullName, retCode.ToString(), count);
                                Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                     }
                    }

                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString());
                }
            }
            else if (ext.CompareTo(".doc") == 0 || ext.CompareTo(".xls") == 0 || ext.CompareTo(".ppt") == 0)
            {
                try
                {
                    TextReader reader = new FilterReader(fInfo.FullName);
                    String text = "";
                    using (reader) { text = reader.ReadToEnd(); }

                    if (MainForm.socialSecurityMode)
                    {
                        returnedData = Engine.ScanForSocialSecurity(text);

                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[0][0] = returnedData.Count;
                                results[0][1] = returnedData.RetCode;
                                results[0][2] = (int)returnedData.Priority;
                                results[0][3] = returnedData.Pattern_D9;
                                results[0][4] = returnedData.Pattern_D3D2D4;
                            }
                            else
                            {
                                //Database entry
                                //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                                Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                    }
                    if (MainForm.creditCardMode)
                    {
                        ccReturnedData = Engine.ScanForCreditCard(text);

                        if (ccReturnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[1][0] = ccReturnedData.Count;
                                results[1][1] = ccReturnedData.RetCode;
                                results[1][2] = (int)ccReturnedData.Priority;
                                results[1][3] = ccReturnedData.VisaCount;
                                results[1][4] = ccReturnedData.MC_Count;
                                results[1][5] = ccReturnedData.AmexCount;
                                results[1][6] = ccReturnedData.DisCount;
                                results[1][7] = ccReturnedData.DinnCount;
                                results[1][8] = ccReturnedData.JCB_Count;
                            }
                            else
                            {
                                //Database entry
                                Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                    }
                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString());
                }
            }
            else if (ext.CompareTo(".docx") == 0 || ext.CompareTo(".xlsx") == 0 || ext.CompareTo(".pptx") == 0 || ext.CompareTo(".odt") == 0 || ext.CompareTo(".ods") == 0 || ext.CompareTo(".odp") == 0)
            {
                try
                {
                    String text = OfficeParser.Parser.Parse(fInfo.FullName, ext);
                    if (text != null)
                    {

                        if (MainForm.socialSecurityMode)
                        {
                            returnedData = Engine.ScanForSocialSecurity(text);

                            if (returnedData.RetCode > 0)
                            {
                                if (parentIsArchive)
                                {
                                    results[0][0] = returnedData.Count;
                                    results[0][1] = returnedData.RetCode;
                                    results[0][2] = (int)returnedData.Priority;
                                    results[0][3] = returnedData.Pattern_D9;
                                    results[0][4] = returnedData.Pattern_D3D2D4;
                                }
                                else
                                {
                                    //Database entry
                                    //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                                    Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                                    try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                    catch (InvalidOperationException) { }
                                }
                            }
                        }
                        if (MainForm.creditCardMode)
                        {
                            ccReturnedData = Engine.ScanForCreditCard(text);

                            if (ccReturnedData.RetCode > 0)
                            {
                                if (parentIsArchive)
                                {
                                    results[1][0] = ccReturnedData.Count;
                                    results[1][1] = ccReturnedData.RetCode;
                                    results[1][2] = (int)ccReturnedData.Priority;
                                    results[1][3] = ccReturnedData.VisaCount;
                                    results[1][4] = ccReturnedData.MC_Count;
                                    results[1][5] = ccReturnedData.AmexCount;
                                    results[1][6] = ccReturnedData.DisCount;
                                    results[1][7] = ccReturnedData.DinnCount;
                                    results[1][8] = ccReturnedData.JCB_Count;
                                }
                                else
                                {
                                    //Database entry
                                    Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData);
                                    try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                    catch (InvalidOperationException) { }
                                }
                            }
                        }
                    }
                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString());
                }
            }
            else if (ext.CompareTo(".xml") == 0)
            {
                if(fInfo.Name.Equals("iTunes Music Library.xml"))
                    return results;
                try
                {
                    String text = XMLParser.Parser.ParseXMLtoString(fInfo.FullName);
                    if (String.IsNullOrEmpty(text))
                    {
                        //Log to Unscannable table.
                    }

                    if (MainForm.socialSecurityMode)
                    {
                        returnedData = Engine.ScanForSocialSecurity(text);

                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[0][0] = returnedData.Count;
                                results[0][1] = returnedData.RetCode;
                                results[0][2] = (int)returnedData.Priority;
                                results[0][3] = returnedData.Pattern_D9;
                                results[0][4] = returnedData.Pattern_D3D2D4;
                            }
                            else
                            {
                                //Database entry
                                //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                                Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                    }
                    if (MainForm.creditCardMode)
                    {
                        ccReturnedData = Engine.ScanForCreditCard(text);

                        if (ccReturnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[1][0] = ccReturnedData.Count;
                                results[1][1] = ccReturnedData.RetCode;
                                results[1][2] = (int)ccReturnedData.Priority;
                                results[1][3] = ccReturnedData.VisaCount;
                                results[1][4] = ccReturnedData.MC_Count;
                                results[1][5] = ccReturnedData.AmexCount;
                                results[1][6] = ccReturnedData.DisCount;
                                results[1][7] = ccReturnedData.DinnCount;
                                results[1][8] = ccReturnedData.JCB_Count;
                            }
                            else
                            {
                                //Database entry
                                Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData);
                                try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                                catch (InvalidOperationException) { }
                            }
                        }
                    }
                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString());
                }
            }
            else if (ext.CompareTo(".pst") == 0)
            {
                try
                {
                    com.pff.PSTFile pstFile = new com.pff.PSTFile(fInfo.FullName);
                    String text = pstFile.processFolder(pstFile.getRootFolder());
                    com.pff.PSTFolder folder = pstFile.getRootFolder();
                    processFolder(folder); // Process the main folder, once we hit an email we will scan that email

                    /*
                    if (MainForm.socialSecurityMode)
                    {
                        returnedData = Engine.ScanForSocialSecurity(text);

                        if (returnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[0][0] = returnedData.Count;
                                results[0][1] = returnedData.RetCode;
                                results[0][2] = (int)returnedData.Priority;
                                results[0][3] = returnedData.Pattern_D9;
                                results[0][4] = returnedData.Pattern_D3D2D4;
                            }
                            //Database entry
                            //WriteToLogFile("Detected: " + fInfo.FullName + " Priority: " + returnedData.Priority);
                            Database.AddToTableScanned(fInfo.Name, fInfo.FullName, returnedData);
                            try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                            catch (InvalidOperationException) { }
                        }
                    }
                    if (MainForm.creditCardMode)
                    {
                        ccReturnedData = Engine.ScanForCreditCard(text);

                        if (ccReturnedData.RetCode > 0)
                        {
                            if (parentIsArchive)
                            {
                                results[1][0] = ccReturnedData.Count;
                                results[1][1] = ccReturnedData.RetCode;
                                results[1][2] = (int)ccReturnedData.Priority;
                                results[1][3] = ccReturnedData.VisaCount;
                                results[1][4] = ccReturnedData.MC_Count;
                                results[1][5] = ccReturnedData.AmexCount;
                                results[1][6] = ccReturnedData.DisCount;
                                results[1][7] = ccReturnedData.DinnCount;
                                results[1][8] = ccReturnedData.JCB_Count;
                            }
                            //Database entry
                            Database.AddToTableCreditCard(fInfo.Name, fInfo.FullName, ccReturnedData);
                            try { mainUIForm.lblItemsFound.BeginInvoke(new MainForm.InvokeDelegateFound(mainUIForm.UpdateLblItemsFound), new object[] { numFound++ }); }
                            catch (InvalidOperationException) { }
                        }
                    }
                    */
                }
                catch (UnauthorizedAccessException u)
                {
                    //File is encrypted: Add entry to Uncsannable table with reason: encrypted.
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, u.ToString());
                }
                catch (Exception e)
                {
                    //Console.WriteLine(e.Message);
                    Database.AddToTableUnScannable(fInfo.Name, fInfo.FullName, Environment.UserName, e.ToString());
                }

            }

            return results;
        }
Exemplo n.º 29
0
        /* A method that creates a File System Watcher for a given drive with the objective
         * of stopping the copying of any active protected file(s) on the system. This is done by
         * first watching for any new files being created that have a specific file extension, defined
         * in the extensions List. When created, it will raise an event and trigger the 'OnCreated'
         * method. From here, the file is copied to a temporary folder, the contents is read and a
         * PNG image is made containing the text using the 'IFilterTextReader'. The image is then
         * compared against all the 'Sample' images using the AForge.NET Imaging library, an
         * Artificial Intelligence library with image processing routines and filters. If the similarity
         * threshold 0.85% or above, it is considered as a copy attempt and the file is closed, deleted
         * and the event is logged with an administrator being informed. This works for most file formats.
         * A breakdown of each section, where necessary, is given below.
         *
         * A DISCLAIMER, 'IFilterTextReader' is under the The Code Project Open License (CPOL) 1.02.
         * My usage is in compliance with the license. All credits go to Kees van Spelde.
         * URL Link: https://github.com/Sicos1977/IFilterTextReader
         *
         * A DISCLAIMER, 'AForge.NET' Framework is published under LGPL-3.0-only or LGPL-3.0-or-later license.
         * I have taken the required steps to validate my use of this library. All credits go to AForge.NET
         * Framework. I do not take credit for any of it's code or methodology.
         * URL Link: http://www.aforgenet.com/framework/license.html
         */
        public FileSystemWatcher Watcher(String drive)
        {
            List <String> extensions = new List <String> {
                "doc", "docx", "docm", "txt", "xlsx", "ppt", "pptx", "pdf"
            };
            FileSystemWatcher watcher = new FileSystemWatcher();

            watcher.Path = drive;
            watcher.IncludeSubdirectories = true;
            watcher.NotifyFilter          = NotifyFilters.LastAccess | NotifyFilters.LastWrite
                                            | NotifyFilters.FileName | NotifyFilters.DirectoryName;
            watcher.Filter              = "*.*";
            watcher.Created            += new FileSystemEventHandler(OnCreated);
            watcher.EnableRaisingEvents = true;

            void OnCreated(object source, FileSystemEventArgs e)
            {
                Thread.Sleep(1000);

                // Checks if the newly created file has an extension that matches one in the extension list.
                if (extensions.Contains(e.FullPath.Split('.').Last()) && !e.FullPath.Split('\\').Last().Contains("~$"))
                {
                    try
                    {
                        if (!File.Exists("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last()))
                        {
                            // Makes a copy of the newly created file with '(ds)' at the front. This is to ensure we can read it due to the original file
                            // potentially being used by another process. The file contents is then read using 'IFilter'.
                            File.Copy(e.FullPath, "C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last());
                            TextReader reader = new FilterReader("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last());
                            using (reader)
                            {
                                // Removes any new lines and indentations, allowing for all outputs to be the same text format.
                                String text = Regex.Replace(reader.ReadToEnd(), @"\t|\n|\r", "");

                                // Creates a PNG image of the text from the file.
                                MakeImage(text);

                                // Using the AForge.Imaging class, compares the newly created file contents image with all the images
                                // stored in the 'Sample' folder, being all active protected files. If it gets a similarity threshold
                                // of more than 0.85%, it kills the parent processes, logs the copy attempt and deletes the file.
                                if (CompareImage())
                                {
                                    Task.Run(() => MessageBox.Show("An attempt to copy a protected file has been identified. This has been logged and an administrator has been notified."));
                                    KillProcesses();
                                    LogCopyAttempt();
                                    Thread.Sleep(1000);
                                    File.Delete(e.FullPath);
                                }
                            }
                            // Deletes the temporary copy made of the file and clears the 'Temp' folder.
                            File.Delete("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\(ds)" + e.FullPath.Split('\\').Last());
                            ClearTemp();
                        }
                    }
                    catch (Exception)
                    {
                        // If there is an error.
                        //Task.Run(() => MessageBox.Show(ex.Source));
                    }
                }
            }

            /* Creates an image of the text from the recently created file, making one or more
             * based on how many sample (active protected file) images there are. This will create multiple
             * images based on the size dimensions of each sample image as only imagesas of the same size can
             * be compared together. More details are given below.
             *
             * Param: String - text to be drawn to image.
             * Return: None.
             */
            void MakeImage(String text)
            {
                // Get all the PNG samples files from the 'Sample' folder.
                DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\");

                FileInfo[] samples = di.GetFiles("*.png");

                if (samples.Length > 0)
                {
                    // For each sample image, create a new image with the new text using the sample's
                    // size dimensions. Save the output in the 'Temp' folder.
                    foreach (var sample in samples)
                    {
                        Bitmap   img      = new Bitmap(sample.FullName);
                        Bitmap   bitmap   = new Bitmap(1, 1);
                        Font     font     = new Font("Arial", 8, FontStyle.Regular, GraphicsUnit.Pixel);
                        Graphics graphics = Graphics.FromImage(bitmap);
                        bitmap   = new Bitmap(bitmap, new Size(img.Width, img.Height));
                        graphics = Graphics.FromImage(bitmap);
                        graphics.Clear(Color.White);
                        graphics.SmoothingMode     = SmoothingMode.AntiAlias;
                        graphics.TextRenderingHint = TextRenderingHint.AntiAlias;
                        graphics.DrawString(text, font, new SolidBrush(Color.FromArgb(0, 0, 0)), 0, 0);
                        graphics.Flush();
                        graphics.Dispose();
                        bitmap.Save("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Temp\\" + sample.Name);
                        img.Dispose();
                        bitmap.Dispose();
                    }
                }
                else
                {
                    //MessageBox.Show("No samples.");
                }
            }

            /* Compares the temp images (being the newly created files) against the sample
             * images (active protected files) using AForge.Net Imaging. Details on how it works
             * are given below.
             *
             * Param: None.
             * Return: Boolean - if the temp image had a similarity rate of 85% or higher.
             */
            Boolean CompareImage()
            {
                // Get all the sample PNG images from the Sample folder.
                DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\");

                FileInfo[] samples = di.GetFiles("*.png");

                // Cycle through each sample image to be tested against temp image(s).
                foreach (var sample in samples)
                {
                    // Get all the temp PNG images from the Temp folder.
                    di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Temp\\");
                    FileInfo[] temps = di.GetFiles("*.png");

                    // Cycle through each temp image to be compared against the sample image.
                    foreach (var temp in temps)
                    {
                        // Loads both the sample and temp images as bitmap images.
                        Bitmap imageOne = new Bitmap(sample.FullName);
                        Bitmap imageTwo = new Bitmap(temp.FullName);

                        // Allows for an almost double width size for either less than or more compared to the sample dimensions.
                        if (imageOne.Width > imageTwo.Width / 1.75 && imageOne.Width < imageTwo.Width * 1.75)
                        {
                            Bitmap newBitmap1 = ChangePixelFormat(new Bitmap(imageOne), PixelFormat.Format24bppRgb);
                            Bitmap newBitmap2 = ChangePixelFormat(new Bitmap(imageTwo), PixelFormat.Format24bppRgb);

                            // Setup the AForge library, with the threshold being at 85% similarity.
                            ExhaustiveTemplateMatching tm = new ExhaustiveTemplateMatching(0.85f);

                            try
                            {
                                // Perform the comparison of the two images.
                                var results = tm.ProcessImage(newBitmap1, newBitmap2);

                                // If no results, indicates the similarity was below 85% and deemed as a non-copy attempt
                                // and returns false.
                                if (results.Length > 0)
                                {
                                    // If one or more results come back, indicates the file(s) had a 85% or more similarity,
                                    // meaning it was a copy of a protected file(s) and returns true.
                                    //MessageBox.Show(results[0].Similarity.ToString());
                                    imageOne.Dispose();
                                    imageTwo.Dispose();
                                    return(true);
                                }
                            }
                            catch (Exception ex)
                            {
                                // In the case an error occurs.
                                MessageBox.Show(ex.Message);
                            }
                        }
                        imageOne.Dispose();
                        imageTwo.Dispose();
                    }
                }
                return(false);
            }

            /* Changes the pixel format of the input image.
             *
             * Param: Bitmap - input image. PixelFormat - new image format.
             * Return: new bitmap image.
             */
            Bitmap ChangePixelFormat(Bitmap inputImage, PixelFormat newFormat)
            {
                return(inputImage.Clone(new Rectangle(0, 0, inputImage.Width, inputImage.Height), newFormat));
            }

            /* Kills all the known processes used by protected files.
             *
             * Param: None.
             * Return: None.
             */
            void KillProcesses()
            {
                var processess = Process.GetProcesses();

                foreach (var process in processess)
                {
                    if (process.ProcessName.Equals("WINWORD") || process.ProcessName.Equals("POWERPNT") ||
                        process.ProcessName.Equals("EXCEL") || process.ProcessName.Equals("notepad") ||
                        process.ProcessName.Equals("MSPUB"))
                    {
                        process.Kill();
                    }
                }
            }

            /* Logs the copy of protected file attempt to the server, which notifies an admin and logs
             * it in the database being the file name, user, date and time.
             *
             * Param: None.
             * Return: None.
             */
            void LogCopyAttempt()
            {
                DirectoryInfo di   = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Samples\\");
                String        file = di.GetFiles().First().Name.Replace(".png", "");

                using (WebClient client = new WebClient())
                {
                    ServicePointManager.ServerCertificateValidationCallback = (senderX, certificate, chain, sslPolicyErrors) => { return(true); };
                    client.UploadString(ConfigurationManager.AppSettings["logcopyattempt"], file + "|" + Username() + "|" + ComputerName());
                }
            }

            /* Clears the Temp folder. This is done after the newly created file has been analysed.
             *
             * Param: None.
             * Return: None.
             */
            void ClearTemp()
            {
                DirectoryInfo di = new DirectoryInfo("C:\\Users\\" + Username() + "\\AppData\\Roaming\\Data Shield\\Temp\\");

                foreach (FileInfo file in di.GetFiles())
                {
                    file.Delete();
                }
            }

            return(watcher);
        }
Exemplo n.º 30
0
        /// <summary>
        /// Get the textual representation of the Binary Data of the document using IFILTER
        /// </summary>
        /// <returns>The text of the document or null if we could not parse the document into text</returns>
        public virtual string GetTextFromDocumentBinary()
        {
            /*
             * The default is to save the binary data to a temporary location and
             * use IFilter to extract the text.  This should be a good catch-all for
             * all files that don't have a specific mechanism for extracting the
             * text of the file.
             */

            // If we have no bytes then we can't do anything.
            if (Bytes == null || Bytes.Length == 0)
            {
                // Log the problem.
                log.Error("Tried to extract text from empty bytes for file " + Name);
                return(null);
            }

            // Get the original file name without the extension
            string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(Name);

            bool   success     = false;
            string newFileName = "";

            try
            {
                // Now try to generate a new temporary file name that we don't have in the temporary directory
                for (int i = 0; i < 50; i++)
                {
                    Random rand = new Random();
                    newFileName = "~/TemporaryFilesDirectory/" + fileNameWithoutExtension +
                                  Convert.ToString(rand.Next(100000)) + Extension;
                    newFileName = HttpContext.Current.Server.MapPath(newFileName);

                    // Try to see if this file exists
                    if (!File.Exists(newFileName))
                    {
                        success = true;
                        break;
                    }
                }

                if (!success)
                {
                    // We failed.  Log the problem.
                    log.Error("Failed to create a unique file to extract data. Last file tried is " + newFileName);
                    return(null);
                }
            }
            catch (Exception e)
            {
                // We failed.  Log the problem.
                log.Error("Failed to create a unique file to extract data for file " + Name, e);
                return(null);
            }

            FileStream theFileStream = null;

            try
            {
                // Now try to write the bytes to the newly created file

                theFileStream = File.Create(newFileName);
                theFileStream.Write(Bytes, 0, Bytes.Length);
                theFileStream.Close();
            }
            catch (Exception e)
            {
                // We failed to write the file.  Log the problem
                log.Error("Failed to write bytes to new file " + newFileName, e);

                // Try to close the stream, in case it is still open and delete the file
                try
                {
                    if (theFileStream != null)
                    {
                        theFileStream.Close();
                    }

                    if (File.Exists(newFileName))
                    {
                        File.Delete(newFileName);
                    }
                }
                catch
                {
                    // We don't do anything.  This is a best effort close and delete
                }

                return(null);
            }

            string       text           = null;
            FilterReader myFilterReader = null;

            // Now try to extract the text for the file
            try
            {
                myFilterReader = new FilterReader(newFileName);
                text           = myFilterReader.ReadToEnd();
                myFilterReader.Close();
            }
            catch (Exception e)
            {
                log.Error("Failed to parse text for file " + Name + " using IFilter", e);

                // Try to close the IFilter, in case it is still open
                try
                {
                    if (myFilterReader != null)
                    {
                        myFilterReader.Close();
                    }
                }
                catch
                {
                    // We don't do anything.  This is a best effort close.
                }
            }

            try
            {
                // Try to delete the temporary file
                if (File.Exists(newFileName))
                {
                    File.Delete(newFileName);
                }
            }
            catch (Exception e)
            {
                log.Error("Failed to delete temporary file " + newFileName, e);
            }

            return(text);
        }
Exemplo n.º 31
0
        private void TimKiemThuMuc(DirectoryInfo dir, List <string> words)
        {
            if (!dir.Exists)
            {
                return;
            }
            //Debug.WriteLine($"Tìm thư mục {dir.FullName}");
            lblTrangThai.ChangeTextAsync($"Tìm thư mục {dir.FullName}", Color.Blue);
            try
            {
                Document doc = new Document();

                foreach (var file in dir.GetFiles())
                {
                    if (_cancel)
                    {
                        lblTrangThai.ChangeTextAsync("Đã hủy tìm kiếm", Color.Red);
                        break;
                    }

                    if (file.IsMSWordFile())
                    {
                        Debug.WriteLine($"Tìm File {file.FullName}");
                        try
                        {
                            string allTextLower = "";

                            if (file.Extension.ToLower() == ".doc")
                            {
                                TextReader reader = new FilterReader(file.FullName);
                                using (reader)
                                {
                                    allTextLower = reader.ReadToEnd().ToLower();
                                }
                            }
                            else
                            {
                                allTextLower = _msWordBehavior.ReadAllText(file.FullName).ToLower();
                            }

                            foreach (var w in words)
                            {
                                if (w.Contains("+"))
                                {
                                    var  wspl  = w.Split('+');
                                    bool found = true;
                                    foreach (var word in wspl)
                                    {
                                        //if (!doc.FindWord(word) && !file.Name.ToLower().Contains(word))
                                        //{
                                        //    found = false;
                                        //    break;
                                        //}

                                        if (!allTextLower.Contains(word.ToLower()) && !file.Name.ToLower().Contains(word))
                                        {
                                            found = false;
                                            break;
                                        }
                                    }
                                    if (found)
                                    {
                                        lock (_lstResult)
                                        {
                                            _lstResult.Add(file);
                                            fileInfoBindingSource.Add(file);
                                        };
                                    }
                                }
                                else
                                {
                                    //if (doc.FindWord(w) || file.Name.ToLower().Contains(w))
                                    //{
                                    //    lock (_lstResult)
                                    //    {
                                    //        _lstResult.Add(file);
                                    //        fileInfoBindingSource.Add(file);
                                    //    };
                                    //}

                                    if (allTextLower.Contains(w.ToLower()) || file.Name.ToLower().Contains(w))
                                    {
                                        lock (_lstResult)
                                        {
                                            _lstResult.Add(file);
                                            fileInfoBindingSource.Add(file);
                                        };
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Debug.WriteLine($"Lỗi: {file.FullName}\r\n{ex.Message}");
                            Log.WriteLog($"Lỗi đọc tệp {file.FullName}\r\n{ex.Message}");
                        }
                    }
                }
            }
            catch { }

            foreach (var d in dir.GetDirectories())
            {
                //ThreadPool.QueueUserWorkItem(TimKiemThuMuc, d);
                try
                {
                    if (_cancel)
                    {
                        lblTrangThai.ChangeTextAsync("Đã hủy tìm kiếm", Color.Red);
                        break;
                    }
                    TimKiemThuMuc(d, words);
                }
                catch { }
            }
        }
Exemplo n.º 32
0
 /// <summary>
 /// Returns all the text that is inside the <paramref name="fileName"/>
 /// </summary>
 /// <param name="fileName">The file to read</param>
 /// <returns></returns>
 public string GetAllText(string fileName)
 {
     using (var reader = new FilterReader(fileName))
         return(reader.ReadToEnd());
 }