/// <summary> /// Decompress a GZIP file stream. /// </summary> /// <param name="fileName">The GZIP file name.</param> /// <param name="searchTerms">The terms to search for.</param> /// <param name="matcher">The matcher object to determine search criteria.</param> /// <returns>List of matched lines for GZIP file contents.</returns> private List <MatchedLine> DecompressGZipStream(string fileName, IEnumerable <string> searchTerms, Matcher matcher) { List <MatchedLine> matchedLines = new List <MatchedLine>(); string newFileName = string.Empty; FileInfo fileToDecompress = new FileInfo(fileName); using (FileStream originalFileStream = fileToDecompress.OpenRead()) { string currentFileName = fileToDecompress.FullName; newFileName = currentFileName.Remove(currentFileName.Length - fileToDecompress.Extension.Length); using (FileStream decompressedFileStream = File.Create(newFileName)) { using (GZipStream decompressionStream = new GZipStream(originalFileStream, CompressionMode.Decompress)) { decompressionStream.CopyTo(decompressedFileStream); } } } if (!string.IsNullOrWhiteSpace(newFileName)) { matchedLines.AddRange(FileSearchHandlerFactory.Search(newFileName, searchTerms, matcher)); } return(matchedLines); }
public void SearchText_TwoWords_CaseInsensitive_MatchesTwo() { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "the", "quick" }, new Matcher { RegularExpressionOptions = RegexOptions.None }); Assert.Equal(2, matchedLines.Count); }
public void SearchText_Regex_CaseSensitive_MatchesOne() { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "Th.*qu" }, new Matcher { RegularExpressionOptions = RegexOptions.Singleline }); Assert.Single(matchedLines); }
public void SearchText_Regex_CaseInsensitive_Multiline_MatchesThree() { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "e(.|\n)*?o" }, new Matcher { RegularExpressionOptions = RegexOptions.Multiline | RegexOptions.IgnoreCase }); Assert.Single(matchedLines); }
public void SearchText_CaseInsensitive_MatchesThree() { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "the" }, new Matcher { RegularExpressionOptions = RegexOptions.IgnoreCase }); Assert.Equal(3, matchedLines.Count); }
public void SearchText_Exclude_Result_ALL_NotFound() { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "the", "quicker" }, new Matcher { RegularExpressionOptions = RegexOptions.IgnoreCase, AllMatchesInFile = true }); Assert.Empty(matchedLines); }
public void SearchText_CaseSensitive_Matches13(string filePath) { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "The" }, new Matcher { RegularExpressionOptions = RegexOptions.None }); Assert.Equal(13, matchedLines.Count); }
public void SearchText_Regex_CaseInsensitive_Multiline_RegexFailure() { var test = File.ReadAllText(filePath); Assert.Throws <Exception>(() => { FileSearchHandlerFactory.Search(filePath, new string[] { @"The [ fox" }, new Matcher { RegularExpressionOptions = RegexOptions.IgnoreCase }); }); }
public void SearchText_Regex_CaseInsensitive_Multiline_MatchesThree() { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "e(.|\n)*?o" }, new Matcher { RegularExpressionOptions = RegexOptions.Multiline | RegexOptions.IgnoreCase }); Assert.Equal(3, matchedLines.Count); Assert.StartsWith("Page 1", matchedLines[0].Content); Assert.StartsWith("Page 2", matchedLines[1].Content); Assert.StartsWith("Page 3", matchedLines[2].Content); }
public void SearchText_WithCancellation() { var test = File.ReadAllText(filePath); CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(); cancellationTokenSource.Cancel(); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "the" }, new Matcher { CancellationTokenSource = cancellationTokenSource }); Assert.Empty(matchedLines); }
public void SearchText_Cancellation_Succeeds() { var test = File.ReadAllText(filePath); CancellationTokenSource cts = new CancellationTokenSource(); cts.Cancel(); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "aa" }, new Matcher { RegularExpressionOptions = RegexOptions.IgnoreCase, CancellationTokenSource = cts }); Assert.Empty(matchedLines); }
public void SearchText_Regex_CaseInsensitive_Multiline_Matches11(string filePath, int matchCount) { var test = File.ReadAllText(filePath); var matchedLines = FileSearchHandlerFactory.Search(filePath, new string[] { "e(.|\n)*?o" }, new Matcher { RegularExpressionOptions = RegexOptions.Multiline | RegexOptions.IgnoreCase }); try { Assert.Equal(matchCount, matchedLines.Count); } catch (Xunit.Sdk.EqualException ee) { throw new Xunit.Sdk.XunitException($"Failed for {filePath}. {ee}"); } }
/// <summary> /// Search for matches in zipped archive files. /// </summary> /// <param name="fileName">The name of the file.</param> /// <param name="searchTerms">The terms to search.</param> /// <param name="tempDirPath">The temporary extract directory.</param> /// <param name="archive">The archive to be searched.</param> /// <param name="matcher">The matcher object to determine search criteria.</param> /// <returns>The matched lines containing the search terms.</returns> private List <MatchedLine> GetMatchedLinesInZipArchive(string fileName, IEnumerable <string> searchTerms, string tempDirPath, SharpCompress.Archives.IArchive archive, Matcher matcher) { List <MatchedLine> matchedLines = new List <MatchedLine>(); try { IReader reader = archive.ExtractAllEntries(); while (reader.MoveToNextEntry()) { if (!reader.Entry.IsDirectory) { // Ignore symbolic links as these are captured by the original target. if (string.IsNullOrWhiteSpace(reader.Entry.LinkTarget) && !reader.Entry.Key.Any(c => DisallowedCharactersByOperatingSystem.Any(dc => dc == c))) { try { reader.WriteEntryToDirectory(tempDirPath, new ExtractionOptions() { ExtractFullPath = true, Overwrite = true }); string fullFilePath = Path.Combine(tempDirPath, reader.Entry.Key.Replace(@"/", @"\")); matchedLines.AddRange(FileSearchHandlerFactory.Search(fullFilePath, searchTerms, matcher)); if (matchedLines != null && matchedLines.Count > 0) { // Want the exact path of the file - without the .extract part. string dirNameToDisplay = fullFilePath.Replace(TempExtractDirectoryName, string.Empty); matchedLines.Where(ml => string.IsNullOrEmpty(ml.FileName) || ml.FileName.Contains(TempExtractDirectoryName)).ToList() .ForEach(ml => ml.FileName = dirNameToDisplay); } } catch (PathTooLongException ptlex) { throw new PathTooLongException(string.Format("{0} {1} {2} {3} - {4}", Resources.Strings.ErrorAccessingEntry, reader.Entry.Key, Resources.Strings.InArchive, fileName, ptlex.Message)); } } } } if (matcher.CancellationTokenSource.Token.IsCancellationRequested) { matchedLines.Clear(); } } catch (ArgumentNullException ane) { if (ane.Message.Contains("Value cannot be null") && fileName.EndsWith(".gz", StringComparison.OrdinalIgnoreCase)) { matchedLines = this.DecompressGZipStream(fileName, searchTerms, matcher); } else if (ane.Message.Contains("String reference not set to an instance of a String.")) { throw new NotSupportedException(string.Format("{0} {1}. {2}", Resources.Strings.ErrorAccessingFile, fileName, Resources.Strings.FileEncrypted)); } else { throw; } } return(matchedLines); }