public List <GrepSearchResult> Search(string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { string tempFolder = Path.Combine(Utils.GetTempFolder(), "dnGREP-PDF"); try { // Extract text string tempFile = ExtractText(file); if (!File.Exists(tempFile)) { throw new ApplicationException("pdftotext failed to create text file."); } // GrepCore cannot check encoding of the original pdf file. If the encoding parameter is not default // then it is the user-specified code page. If the encoding parameter *is* the default, // then it most likely not been set, so get the encoding of the extracted text file: if (encoding == Encoding.Default) { encoding = Utils.GetFileEncoding(tempFile); } IGrepEngine engine = GrepEngineFactory.GetSearchEngine(tempFile, initParams, FileFilter); List <GrepSearchResult> results = engine.Search(tempFile, searchPattern, searchType, searchOptions, encoding); if (results.Count > 0) { using (FileStream reader = File.Open(tempFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (StreamReader streamReader = new StreamReader(reader, encoding)) { foreach (var result in results) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, initParams.LinesBefore, initParams.LinesAfter); } } foreach (GrepSearchResult result in results) { result.ReadOnly = true; if (file.Contains(tempFolder)) { result.FileNameDisplayed = file.Substring(tempFolder.Length + 1); } else { result.FileNameDisplayed = file; } result.FileNameReal = file; } } GrepEngineFactory.ReturnToPool(tempFile, engine); return(results); } catch (Exception ex) { logger.Error(ex, $"Failed to search inside PDF file: {ex.Message}"); return(new List <GrepSearchResult>()); } }
public List <GrepSearchResult> Search(string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { string tempFolder = Path.Combine(Utils.GetTempFolder(), "dnGREP-PDF"); try { // Extract text string tempFile = extractText(file); if (!File.Exists(tempFile)) { throw new ApplicationException("pdftotext failed to create text file."); } IGrepEngine engine = GrepEngineFactory.GetSearchEngine(tempFile, initParams, FileFilter); List <GrepSearchResult> results = engine.Search(tempFile, searchPattern, searchType, searchOptions, encoding); if (results.Count > 0) { using (FileStream reader = File.Open(tempFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (StreamReader streamReader = new StreamReader(reader)) { foreach (var result in results) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, initParams.LinesBefore, initParams.LinesAfter); } } foreach (GrepSearchResult result in results) { result.ReadOnly = true; if (file.Contains(tempFolder)) { result.FileNameDisplayed = file.Substring(tempFolder.Length + 1); } else { result.FileNameDisplayed = file; } result.FileNameReal = file; } } GrepEngineFactory.ReturnToPool(tempFile, engine); return(results); } catch (Exception ex) { logger.Log <Exception>(LogLevel.Error, "Failed to search inside Pdf file", ex); return(new List <GrepSearchResult>()); } }
public List <GrepSearchResult> Search(string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { List <GrepSearchResult> searchResults = new List <GrepSearchResult>(); SevenZipExtractor extractor = new SevenZipExtractor(file); GrepEnginePlainText plainTextEngine = new GrepEnginePlainText(); plainTextEngine.Initialize(new GrepEngineInitParams(showLinesInContext, linesBefore, linesAfter, fuzzyMatchThreshold)); string tempFolder = Utils.FixFolderName(Utils.GetTempFolder()) + "dnGREP-Archive\\" + Utils.GetHash(file) + "\\"; if (Directory.Exists(tempFolder)) { Utils.DeleteFolder(tempFolder); } Directory.CreateDirectory(tempFolder); try { extractor.ExtractArchive(tempFolder); foreach (string archiveFileName in Directory.GetFiles(tempFolder, "*.*", SearchOption.AllDirectories)) { IGrepEngine engine = GrepEngineFactory.GetSearchEngine(archiveFileName, new GrepEngineInitParams(showLinesInContext, linesBefore, linesAfter, fuzzyMatchThreshold)); var innerFileResults = engine.Search(archiveFileName, searchPattern, searchType, searchOptions, encoding); using (FileStream reader = File.Open(archiveFileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (StreamReader streamReader = new StreamReader(reader)) { foreach (var result in innerFileResults) { if (!result.HasSearchResults) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, linesBefore, linesAfter); } } } searchResults.AddRange(innerFileResults); } foreach (GrepSearchResult result in searchResults) { result.FileNameDisplayed = file + "\\" + result.FileNameDisplayed.Substring(tempFolder.Length); result.FileNameReal = file; result.ReadOnly = true; } } catch (Exception ex) { logger.LogException(LogLevel.Error, "Failed to search inside archive.", ex); } return(searchResults); }
public List <GrepSearchResult> Search(string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { try { // Extract text string tempFile = extractText(file); if (!File.Exists(tempFile)) { throw new ApplicationException("pdftotext failed to create text file."); } IGrepEngine engine = GrepEngineFactory.GetSearchEngine(tempFile, new GrepEngineInitParams(showLinesInContext, linesBefore, linesAfter, fuzzyMatchThreshold)); List <GrepSearchResult> results = engine.Search(tempFile, searchPattern, searchType, searchOptions, encoding); using (FileStream reader = File.Open(tempFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (StreamReader streamReader = new StreamReader(reader)) { foreach (var result in results) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, linesBefore, linesAfter); } } foreach (GrepSearchResult result in results) { result.ReadOnly = true; result.FileNameDisplayed = file; result.FileNameReal = file; } return(results); } catch (Exception ex) { logger.LogException(LogLevel.Error, "Failed to search inside Pdf file", ex); return(new List <GrepSearchResult>()); } }
/// <summary> /// Searches folder for files whose content matches regex /// </summary> /// <param name="files">Files to search in. If one of the files does not exist or is open, it is skipped.</param> /// <param name="searchRegex">Regex pattern</param> /// <returns>List of results. If nothing is found returns empty list</returns> public List <GrepSearchResult> Search(IEnumerable <string> files, SearchType searchType, string searchPattern, GrepSearchOption searchOptions, int codePage) { List <GrepSearchResult> searchResults = new List <GrepSearchResult>(); if (files == null) { return(searchResults); } Utils.CancelSearch = false; if (searchPattern == null || searchPattern.Trim() == "") { foreach (string file in files) { searchResults.Add(new GrepSearchResult(file, searchPattern, null)); if ((searchOptions & GrepSearchOption.StopAfterFirstMatch) == GrepSearchOption.StopAfterFirstMatch) { break; } } if (ProcessedFile != null) { ProcessedFile(this, new ProgressStatus(searchResults.Count, searchResults)); } return(searchResults); } else { int processedFiles = 0; try { foreach (string file in files) { try { IGrepEngine engine = GrepEngineFactory.GetSearchEngine(file, SearchParams); processedFiles++; Encoding encoding = null; if (codePage == -1) { encoding = Utils.GetFileEncoding(file); } else { encoding = Encoding.GetEncoding(codePage); } if (Utils.CancelSearch) { return(searchResults); } List <GrepSearchResult> fileSearchResults = engine.Search(file, searchPattern, searchType, searchOptions, encoding); if (fileSearchResults != null && fileSearchResults.Count > 0) { searchResults.AddRange(fileSearchResults); } if (ProcessedFile != null) { ProcessedFile(this, new ProgressStatus(processedFiles, fileSearchResults)); } } catch (Exception ex) { logger.LogException(LogLevel.Error, ex.Message, ex); searchResults.Add(new GrepSearchResult(file, searchPattern, ex.Message, false)); if (ProcessedFile != null) { List <GrepSearchResult> _results = new List <GrepSearchResult>(); _results.Add(new GrepSearchResult(file, searchPattern, ex.Message, false)); ProcessedFile(this, new ProgressStatus(processedFiles, _results)); } } if ((searchOptions & GrepSearchOption.StopAfterFirstMatch) == GrepSearchOption.StopAfterFirstMatch && searchResults.Count > 0) { break; } } } finally { GrepEngineFactory.UnloadEngines(); } return(searchResults); } }
private void Search(string file, SearchType searchType, string searchPattern, GrepSearchOption searchOptions, int codePage) { try { ProcessedFile(this, new ProgressStatus(true, processedFilesCount, foundfilesCount, null, file)); IGrepEngine engine = GrepEngineFactory.GetSearchEngine(file, SearchParams, FileFilter); Interlocked.Increment(ref processedFilesCount); Encoding encoding = Encoding.Default; if (codePage > -1) { encoding = Encoding.GetEncoding(codePage); } else if (!Utils.IsBinary(file) && !Utils.IsPdfFile(file)) { encoding = Utils.GetFileEncoding(file); } if (Utils.CancelSearch) { if (cancellationTokenSource != null) { cancellationTokenSource.Cancel(); } return; } List <GrepSearchResult> fileSearchResults = engine.Search(file, searchPattern, searchType, searchOptions, encoding); if (fileSearchResults != null && fileSearchResults.Count > 0) { AddSearchResults(fileSearchResults); } int hits = fileSearchResults.Where(r => r.IsSuccess).Count(); Interlocked.Add(ref foundfilesCount, hits); ProcessedFile(this, new ProgressStatus(false, processedFilesCount, foundfilesCount, fileSearchResults, file)); GrepEngineFactory.ReturnToPool(file, engine); } catch (Exception ex) { logger.Log <Exception>(LogLevel.Error, ex.Message, ex); AddSearchResult(new GrepSearchResult(file, searchPattern, ex.Message, false)); if (ProcessedFile != null) { List <GrepSearchResult> _results = new List <GrepSearchResult>(); _results.Add(new GrepSearchResult(file, searchPattern, ex.Message, false)); ProcessedFile(this, new ProgressStatus(false, processedFilesCount, foundfilesCount, _results, file)); } } finally { if ((searchOptions & GrepSearchOption.StopAfterFirstMatch) == GrepSearchOption.StopAfterFirstMatch && searchResults.Count > 0) { if (cancellationTokenSource != null) { cancellationTokenSource.Cancel(); } } } }
public List <GrepSearchResult> Search(Stream input, string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { List <GrepSearchResult> searchResults = new List <GrepSearchResult>(); var filter = FileFilter.ToStandardFilter(); var includeRegexPatterns = new List <Regex>(); var excludeRegexPatterns = new List <Regex>(); Utils.PrepareFilters(filter, includeRegexPatterns, excludeRegexPatterns); List <string> hiddenDirectories = new List <string>(); try { using (SevenZipExtractor extractor = new SevenZipExtractor(input)) { foreach (var fileInfo in extractor.ArchiveFileData) { var attr = (FileAttributes)fileInfo.Attributes; string innerFileName = fileInfo.FileName; if (fileInfo.IsDirectory) { if (!filter.IncludeHidden && attr.HasFlag(FileAttributes.Hidden) && !hiddenDirectories.Contains(innerFileName)) { hiddenDirectories.Add(innerFileName); } continue; } if (CheckHidden(filter, attr) && CheckHidden(filter, innerFileName, hiddenDirectories) && CheckSize(filter, fileInfo.Size) && CheckDate(filter, fileInfo) && IsPatternMatch(innerFileName, includeRegexPatterns) && !IsPatternMatch(innerFileName, excludeRegexPatterns)) { using (Stream stream = new MemoryStream()) { extractor.ExtractFile(innerFileName, stream); stream.Seek(0, SeekOrigin.Begin); if (CheckBinary(filter, stream)) { // Need to check the encoding of each file in the archive. If the encoding parameter is not default // then it is the user-specified code page. If the encoding parameter *is* the default, // then it most likely not been set, so get the encoding of the extracted text file: if (encoding == Encoding.Default && !Utils.IsBinary(stream)) { stream.Seek(0, SeekOrigin.Begin); encoding = Utils.GetFileEncoding(stream); } IGrepEngine engine = GrepEngineFactory.GetSearchEngine(innerFileName, initParams, filter); var innerFileResults = engine.Search(stream, innerFileName, searchPattern, searchType, searchOptions, encoding); if (innerFileResults.Count > 0) { using (Stream readStream = new MemoryStream()) { extractor.ExtractFile(innerFileName, readStream); readStream.Seek(0, SeekOrigin.Begin); using (StreamReader streamReader = new StreamReader(readStream, encoding)) { foreach (var result in innerFileResults) { if (Utils.CancelSearch) { break; } if (!result.HasSearchResults) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, initParams.LinesBefore, initParams.LinesAfter); } } } searchResults.AddRange(innerFileResults); } } GrepEngineFactory.ReturnToPool(innerFileName, engine); } if (Utils.CancelSearch) { break; } } } } } foreach (GrepSearchResult result in searchResults) { result.FileNameDisplayed = file + "\\" + result.FileNameDisplayed; result.FileNameReal = file; result.ReadOnly = true; } } catch (Exception ex) { logger.Log <Exception>(LogLevel.Error, string.Format("Failed to search inside archive '{0}'", file), ex); } return(searchResults); }
public List <GrepSearchResult> Search(string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { List <GrepSearchResult> searchResults = new List <GrepSearchResult>(); SevenZipExtractor extractor = new SevenZipExtractor(file); string tempFolder = Utils.FixFolderName(Utils.GetTempFolder()) + "dnGREP-Archive\\" + Utils.GetHash(file) + "\\"; FileFilter filter = FileFilter.ChangePath(tempFolder); // if the search pattern(s) only match archive files, need to include an 'any' file type to search inside the archive. // otherwise, keep the original pattern set so the user can specify what types of files to search inside the archive. var patterns = Utils.SplitPath(FileFilter.NamePatternToInclude).ToList(); bool hasNonArchivePattern = patterns.Where(p => !Utils.IsArchiveExtension(Path.GetExtension(p))).Any(); if (!hasNonArchivePattern) { patterns.Add(FileFilter.IsRegex ? ".*" : "*.*"); filter = filter.ChangeIncludePattern(string.Join(";", patterns.ToArray())); } if (Directory.Exists(tempFolder)) { Utils.DeleteFolder(tempFolder); } Directory.CreateDirectory(tempFolder); try { extractor.ExtractArchive(tempFolder); foreach (var innerFileName in Utils.GetFileListEx(filter)) { IGrepEngine engine = GrepEngineFactory.GetSearchEngine(innerFileName, initParams, FileFilter); var innerFileResults = engine.Search(innerFileName, searchPattern, searchType, searchOptions, encoding); if (innerFileResults.Count > 0) { using (FileStream reader = File.Open(innerFileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) using (StreamReader streamReader = new StreamReader(reader)) { foreach (var result in innerFileResults) { if (Utils.CancelSearch) { break; } if (!result.HasSearchResults) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, initParams.LinesBefore, initParams.LinesAfter); } } } searchResults.AddRange(innerFileResults); } if (Utils.CancelSearch) { break; } } foreach (GrepSearchResult result in searchResults) { result.FileNameDisplayed = file + "\\" + result.FileNameDisplayed.Substring(tempFolder.Length); result.FileNameReal = file; result.ReadOnly = true; } } catch (Exception ex) { logger.Log <Exception>(LogLevel.Error, string.Format("Failed to search inside archive '{0}'", file), ex); } return(searchResults); }
private void Search(string file, SearchType searchType, string searchPattern, GrepSearchOption searchOptions, int codePage) { try { ProcessedFile(this, new ProgressStatus(true, processedFilesCount, foundfilesCount, null, file)); bool isArchive = Utils.IsArchive(file); Encoding encoding = Encoding.Default; if (codePage > -1) { encoding = Encoding.GetEncoding(codePage); } else if (!isArchive && !Utils.IsBinary(file) && !Utils.IsPdfFile(file)) { encoding = Utils.GetFileEncoding(file); } if (Utils.CancelSearch) { if (cancellationTokenSource != null) { cancellationTokenSource.Cancel(); } return; } IGrepEngine engine = GrepEngineFactory.GetSearchEngine(file, SearchParams, FileFilter, searchType); if (isArchive && engine is ArchiveEngine archiveEngine) { archiveEngine.SetSearchOptions(FileFilter, SearchParams); archiveEngine.StartingFileSearch += ArchiveEngine_StartingFileSearch; foreach (var fileSearchResults in archiveEngine.Search(file, searchPattern, searchType, searchOptions, encoding)) { if (fileSearchResults != null && fileSearchResults.Count > 0) { AddSearchResults(fileSearchResults); } int hits = fileSearchResults.Where(r => r.IsSuccess).Count(); Interlocked.Add(ref foundfilesCount, hits); ProcessedFile(this, new ProgressStatus(false, processedFilesCount, foundfilesCount, fileSearchResults, file)); } archiveEngine.StartingFileSearch -= ArchiveEngine_StartingFileSearch; } else { Interlocked.Increment(ref processedFilesCount); var fileSearchResults = engine.Search(file, searchPattern, searchType, searchOptions, encoding).ToList(); if (fileSearchResults != null && fileSearchResults.Count > 0) { AddSearchResults(fileSearchResults); } int hits = fileSearchResults.Where(r => r.IsSuccess).Count(); Interlocked.Add(ref foundfilesCount, hits); ProcessedFile(this, new ProgressStatus(false, processedFilesCount, foundfilesCount, fileSearchResults, file)); } GrepEngineFactory.ReturnToPool(file, engine); } catch (Exception ex) { logger.Error(ex, "Failed in Search"); AddSearchResult(new GrepSearchResult(file, searchPattern, ex.Message, false)); if (ProcessedFile != null) { List <GrepSearchResult> _results = new List <GrepSearchResult> { new GrepSearchResult(file, searchPattern, ex.Message, false) }; ProcessedFile(this, new ProgressStatus(false, processedFilesCount, foundfilesCount, _results, file)); } } finally { if (searchOptions.HasFlag(GrepSearchOption.StopAfterFirstMatch) && searchResults.Count > 0) { if (cancellationTokenSource != null) { cancellationTokenSource.Cancel(); } } } }
public List <GrepSearchResult> Search(Stream input, string file, string searchPattern, SearchType searchType, GrepSearchOption searchOptions, Encoding encoding) { List <GrepSearchResult> searchResults = new List <GrepSearchResult>(); var includeRegexPatterns = new List <Regex>(); var excludeRegexPatterns = new List <Regex>(); Utils.PrepareFilters(FileFilter, includeRegexPatterns, excludeRegexPatterns); List <string> hiddenDirectories = new List <string>(); try { using (SevenZipExtractor extractor = new SevenZipExtractor(input)) { foreach (var fileInfo in extractor.ArchiveFileData) { var attr = (FileAttributes)fileInfo.Attributes; string innerFileName = fileInfo.FileName; if (fileInfo.IsDirectory) { if (!FileFilter.IncludeHidden && attr.HasFlag(FileAttributes.Hidden) && !hiddenDirectories.Contains(innerFileName)) { hiddenDirectories.Add(innerFileName); } continue; } if (CheckHidden(FileFilter, attr) && CheckHidden(FileFilter, innerFileName, hiddenDirectories) && CheckSize(FileFilter, fileInfo.Size) && CheckDate(FileFilter, fileInfo) && IsPatternMatch(innerFileName, includeRegexPatterns) && !IsPatternMatch(innerFileName, excludeRegexPatterns)) { using (Stream stream = new MemoryStream()) { extractor.ExtractFile(innerFileName, stream); stream.Seek(0, SeekOrigin.Begin); if (CheckBinary(FileFilter, stream)) { IGrepEngine engine = GrepEngineFactory.GetSearchEngine(innerFileName, initParams, FileFilter); var innerFileResults = engine.Search(stream, innerFileName, searchPattern, searchType, searchOptions, encoding); if (innerFileResults.Count > 0) { using (Stream readStream = new MemoryStream()) { extractor.ExtractFile(innerFileName, readStream); readStream.Seek(0, SeekOrigin.Begin); using (StreamReader streamReader = new StreamReader(readStream)) { foreach (var result in innerFileResults) { if (Utils.CancelSearch) { break; } if (!result.HasSearchResults) { result.SearchResults = Utils.GetLinesEx(streamReader, result.Matches, initParams.LinesBefore, initParams.LinesAfter); } } } searchResults.AddRange(innerFileResults); } } } if (Utils.CancelSearch) { break; } } } } } foreach (GrepSearchResult result in searchResults) { result.FileNameDisplayed = file + "\\" + result.FileNameDisplayed; result.FileNameReal = file; result.ReadOnly = true; } } catch (Exception ex) { logger.Log <Exception>(LogLevel.Error, string.Format("Failed to search inside archive '{0}'", file), ex); } return(searchResults); }