/// <summary> /// Sets an EncodingCacheItem in the cache for the given key. /// </summary> /// <param name="key">Unique key</param> /// <param name="item">EncodingCacheItem to add</param> /// <history> /// [Curtis_Beard] 05/28/2015 FIX: 69, Created for speed improvements for encoding detection /// </history> public void SetItem(string key, EncodingCacheItem item) { if (cache.ContainsKey(key)) { // update item incase of change cache[key] = item; // Remove it from current position lruList.Remove(key); // Add it again, this will result in it being placed on top lruList.AddLast(key); } else { cache.Add(key, item); if (cache.Count > capacity) { // over capacity so remove least used item and key cache.Remove(lruList.First.Value); lruList.RemoveFirst(); } lruList.AddLast(key); } }
/// <summary> /// Search a given file for the searchText. /// </summary> /// <param name="file">FileInfo object for file to search for searchText</param> /// <history> /// [Curtis_Beard] 09/08/2005 Created /// [Curtis_Beard] 11/21/2005 ADD: update hit count when actual line added /// [Curtis_Beard] 12/02/2005 CHG: use SearchingFile instead of StatusMessage /// [Curtis_Beard] 04/21/2006 CHG: use a regular expression match collection to get /// correct count of hits in a line when using RegEx /// [Curtis_Beard] 07/03/2006 FIX: 1500174, use a FileStream to open the files readonly /// [Curtis_Beard] 07/07/2006 FIX: 1512029, RegEx use Case Sensitivity and WholeWords, /// also use different whole word matching regex /// [Curtis_Beard] 07/26/2006 ADD: 1512026, column position /// [Curtis_Beard] 07/26/2006 FIX: 1530023, retrieve file with correct encoding /// [Curtis_Beard] 09/12/2006 CHG: Converted to C# /// [Curtis_Beard] 09/28/2006 FIX: check for any plugins before looping through them /// [Curtis_Beard] 05/18/2006 FIX: 1723815, use correct whole word matching regex /// [Curtis_Beard] 06/26/2007 FIX: correctly detect plugin extension support /// [Curtis_Beard] 06/26/2007 FIX: 1779270, increase array size holding context lines /// [Curtis_Beard] 10/09/2012 FIX: don't overwrite position when getting context lines /// [Curtis_Beard] 10/12/2012 FIX: get correct position when using whole word option /// [Curtis_Beard] 10/12/2012 CHG: 32, implement a hit count filter /// [Curtis_Beard] 10/31/2012 CHG: renamed to SearchFileContents, remove parameter searchText /// [Curtis_Beard] 08/19/2014 FIX: 57, escape search text when whole word is enabled but not regular expressions /// [Curtis_Beard] 10/27/2014 CHG: 85, remove leading white space, remove use of newline so each line is in hit object /// [Curtis_Beard] 02/09/2015 CHG: 92, support for specific file encodings /// [Curtis_Beard] 03/05/2015 FIX: 64/35, if whole word doesn't pass our check but does pass regex, make it fail. Code cleanup. /// [Curtis_Beard] 04/02/2015 CHG: remove line number logic and always include line number in MatchResultLine. /// [Curtis_Beard] 05/18/2015 FIX: 72, don't grab file sample when detect encoding option is turned off. /// [Curtis_Beard] 05/18/2015 FIX: 69, use same stream to detect encoding and grep contents /// [Curtis_Beard] 05/26/2015 FIX: 69, add performance setting for file detection /// [Curtis_Beard] 06/02/2015 FIX: 75, use sample size from performance setting /// [theblackbunny] 06/25/2015 FIX: 39, remove context lines that intersect with each other in different MatchResults /// </history> private void SearchFileContents(FileInfo file) { // Raise SearchFile Event OnSearchingFile(file); FileStream _stream = null; StreamReader _reader = null; int _lineNumber = 0; MatchResult match = null; Regex _regularExp; MatchCollection _regularExpCol = null; bool _hitOccurred = false; bool _fileNameDisplayed = false; int _maxContextLines = 0; var _context = new string[11]; int _contextIndex = -1; int _lastHit = 0; int userFilterCount = 0; try { #region Plugin Processing if (Plugins != null) { for (int i = 0; i < Plugins.Count; i++) { // find a valid plugin for this file type if (Plugins[i].Enabled && Plugins[i].Plugin.IsAvailable) { // detect if plugin supports extension bool isFound = Plugins[i].Plugin.IsFileSupported(file); // if extension not supported try another plugin if (!isFound) continue; Exception pluginEx = null; // load plugin and perform grep if (Plugins[i].Plugin.Load()) { OnSearchingFileByPlugin(Plugins[i].Plugin.Name); match = Plugins[i].Plugin.Grep(file, SearchSpec, ref pluginEx); } else { OnSearchError(file, new Exception(string.Format("Plugin {0} failed to load.", Plugins[i].Plugin.Name))); } Plugins[i].Plugin.Unload(); // if the plugin processed successfully if (pluginEx == null) { // check for a hit if (match != null) { match.FromPlugin = true; // only perform is not using negation if (!SearchSpec.UseNegation) { if (DoesPassHitCountCheck(match)) { match.Index = MatchResults.Count; MatchResults.Add(match); OnFileHit(file, match.Index); if (SearchSpec.ReturnOnlyFileNames) match.SetHitCount(); OnLineHit(match, match.Index); } } } else if (SearchSpec.UseNegation) { // no hit but using negation so create one match = new MatchResult(file) { Index = MatchResults.Count, FromPlugin = true }; MatchResults.Add(match); OnFileHit(file, match.Index); } } else { // the plugin had an error OnSearchError(file, pluginEx); } return; } } } #endregion // open stream to file to use in encoding detection if enabled and in grep logic _stream = file.Open(FileMode.Open, FileAccess.Read, FileShare.ReadWrite); #region Encoding Detection string usedEncoder = string.Empty; System.Text.Encoding encoding = null; // // User specified file encoding // FileEncoding fileEncoding = SearchSpec.FileEncodings != null && SearchSpec.FileEncodings.Count > 0 ? (from f in SearchSpec.FileEncodings where f.FilePath.Equals(file.FullName, StringComparison.InvariantCultureIgnoreCase) && f.Enabled select f).ToList().FirstOrDefault() : null; if (fileEncoding != null) { usedEncoder = "User"; encoding = fileEncoding.Encoding; } else { // // Detect file encoding if enabled // if (SearchSpec.EncodingDetectionOptions.DetectFileEncoding) { // encoding cache check var key = file.FullName; if (SearchSpec.EncodingDetectionOptions.UseEncodingCache && EncodingCache.Instance.ContainsKey(key)) { var value = EncodingCache.Instance.GetItem(key); usedEncoder = value.DetectorName; encoding = System.Text.Encoding.GetEncoding(value.CodePage); } else { byte[] sampleBytes; //Check if can read first try { int sampleSize = EncodingOptions.GetSampleSizeByPerformance(SearchSpec.EncodingDetectionOptions != null ? SearchSpec.EncodingDetectionOptions.PerformanceSetting : EncodingOptions.Performance.Default); sampleBytes = EncodingTools.ReadFileContentSample(_stream, sampleSize); } catch (Exception ex) { // can't read file for sample bytes OnSearchError(file, ex); return; } // detect encoding based on user set performance level that determines what detectors are used encoding = EncodingDetector.Detect(sampleBytes, out usedEncoder, EncodingOptions.GetEncodingDetectorOptionsByPerformance(SearchSpec.EncodingDetectionOptions != null ? SearchSpec.EncodingDetectionOptions.PerformanceSetting : EncodingOptions.Performance.Default), System.Text.Encoding.Default); // add to cache if enabled if (encoding != null && SearchSpec.EncodingDetectionOptions.UseEncodingCache) { var value = new EncodingCacheItem() { CodePage = encoding.CodePage, DetectorName = usedEncoder }; EncodingCache.Instance.SetItem(key, value); } } } else { // Use original encoding method before detect encoding option availalbe usedEncoder = "Default"; encoding = System.Text.Encoding.Default; } } if (encoding == null) { // Could not detect file encoding OnSearchError(file, new Exception("Could not detect file encoding.")); return; } OnFileEncodingDetected(file, encoding, usedEncoder); // process all encoding detectors and display results to output window //var values = EncodingDetector.DetectAll(sampleBytes); //if (values.Count > 0) //{ // System.Diagnostics.Debug.WriteLine(string.Format("File: {0}", file.FullName)); // foreach (var value in values) // { // System.Diagnostics.Debug.WriteLine(string.Format("Encoding: {0} ({1})", value.Encoding != null ? value.Encoding.EncodingName : "None", value.Option.ToString())); // } // System.Diagnostics.Debug.WriteLine(Environment.NewLine); //} #endregion // could have read some data for the encoding check, seek back to start of file if (_stream.CanSeek) { _stream.Seek(0, SeekOrigin.Begin); } _reader = new StreamReader(_stream, encoding); _maxContextLines = SearchSpec.ContextLines + 1; do { string textLine = _reader.ReadLine(); if (textLine == null) break; else { _lineNumber += 1; int _posInStr = -1; if (SearchSpec.UseRegularExpressions) { if (textLine.Length > 0) { string pattern = string.Format("{0}{1}{0}", SearchSpec.UseWholeWordMatching ? "\\b" : string.Empty, SearchSpec.SearchText); RegexOptions options = SearchSpec.UseCaseSensitivity ? RegexOptions.None : RegexOptions.IgnoreCase; _regularExp = new Regex(pattern, options); _regularExpCol = _regularExp.Matches(textLine); if (_regularExpCol.Count > 0) { if (SearchSpec.UseNegation) { _hitOccurred = true; } _posInStr = 1; } } } else { // If we are looking for whole worlds only, perform the check. if (SearchSpec.UseWholeWordMatching) { _regularExp = new Regex("\\b" + Regex.Escape(SearchSpec.SearchText) + "\\b", SearchSpec.UseCaseSensitivity ? RegexOptions.None : RegexOptions.IgnoreCase); // if match is found, also check against our internal line hit count method to be sure they are in sync Match mtc = _regularExp.Match(textLine); if (mtc != null && mtc.Success && RetrieveLineMatches(textLine, SearchSpec).Count > 0) { if (SearchSpec.UseNegation) { _hitOccurred = true; } _posInStr = mtc.Index; } } else { _posInStr = textLine.IndexOf(SearchSpec.SearchText, SearchSpec.UseCaseSensitivity ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase); if (SearchSpec.UseNegation && _posInStr > -1) { _hitOccurred = true; } } } //******************************************* // We found an occurrence of our search text. //******************************************* if (_posInStr > -1) { //since we have a hit, check to see if negation is checked if (SearchSpec.UseNegation) break; // create new hit and add to collection if (match == null) { match = new MatchResult(file) { Index = MatchResults.Count, DetectedEncoding = encoding }; MatchResults.Add(match); } // don't show until passes count check if (!_fileNameDisplayed && DoesPassHitCountCheck(match)) { OnFileHit(file, match.Index); _fileNameDisplayed = true; } // If we are only showing filenames, go to the next file. if (SearchSpec.ReturnOnlyFileNames) { if (!_fileNameDisplayed) { OnFileHit(file, match.Index); _fileNameDisplayed = true; } //notify that at least 1 hit is in file match.SetHitCount(); OnLineHit(match, match.Index); break; } // Display context lines if applicable. if (SearchSpec.ContextLines > 0 && _lastHit <= 0) { if (match.Matches.Count > 0 && _lastHit < -_maxContextLines) { // Insert a blank space before the context lines. var matchLine = new MatchResultLine() { Line = string.Empty, LineNumber = -1 }; match.Matches.Add(matchLine); int _pos = match.Matches.Count - 1; if (DoesPassHitCountCheck(match)) { OnLineHit(match, _pos); } } // Display preceding n context lines before the hit. int tempContextLines = SearchSpec.ContextLines; // But only output the context lines which are not part of the previous context if(_lastHit >= -_maxContextLines) { tempContextLines = -_lastHit; } // Roll back the context index to get the first context line that needs to be displayed _contextIndex = _contextIndex - tempContextLines; if(_contextIndex < 0) { _contextIndex += _maxContextLines; } for (int tempPosInStr = tempContextLines; tempPosInStr >= 1; tempPosInStr--) { _contextIndex = _contextIndex + 1; if (_contextIndex >= _maxContextLines) _contextIndex = 0; // If there is a match in the first one or two lines, // the entire preceeding context may not be available. if (_lineNumber > tempPosInStr) { // Add the context line. var matchLine = new MatchResultLine() { Line = _context[_contextIndex], LineNumber = _lineNumber - tempPosInStr }; match.Matches.Add(matchLine); int _pos = match.Matches.Count - 1; if (DoesPassHitCountCheck(match)) { OnLineHit(match, _pos); } } } } _lastHit = SearchSpec.ContextLines; // // Add the actual "hit". // var matchLineFound = new MatchResultLine() { Line = textLine, LineNumber = _lineNumber, HasMatch = true }; if (SearchSpec.UseRegularExpressions) { _posInStr = _regularExpCol[0].Index; match.SetHitCount(_regularExpCol.Count); foreach (Match regExMatch in _regularExpCol) { matchLineFound.Matches.Add(new MatchResultLineMatch(regExMatch.Index, regExMatch.Length)); } } else { var lineMatches = RetrieveLineMatches(textLine, SearchSpec); match.SetHitCount(lineMatches.Count); matchLineFound.Matches = lineMatches; } matchLineFound.ColumnNumber = _posInStr + 1; match.Matches.Add(matchLineFound); int _index = match.Matches.Count - 1; if (DoesPassHitCountCheck(match)) { OnLineHit(match, _index); } } else if (SearchSpec.ContextLines > 0) { if(_lastHit > 0) { //*************************************************** // We didn't find a hit, but since lastHit is > 0, we // need to display this context line. //*************************************************** var matchLine = new MatchResultLine() { Line = textLine, LineNumber = _lineNumber }; match.Matches.Add(matchLine); int _index = match.Matches.Count - 1; if (DoesPassHitCountCheck(match)) { OnLineHit(match, _index); } } if(_lastHit >= -_maxContextLines) { //***************************************************** // We continue keeping track of the number of potential // context lines since the last displayed context line // until we pass (-_maxContextLines). //***************************************************** _lastHit -= 1; } } // Found a hit or not. // If we are showing context lines, keep the last n+1 lines. if (SearchSpec.ContextLines > 0) { _contextIndex += 1; if (_contextIndex >= _maxContextLines) _contextIndex = 0; _context[_contextIndex] = textLine; } } } while (true); // send event file/line hit if we haven't yet but it should be if (!_fileNameDisplayed && match != null && DoesPassHitCountCheck(match)) { // need to display it OnFileHit(file, match.Index); OnLineHit(match, match.Index); } // send event for file filtered if it fails the file hit count filter if (!SearchSpec.UseNegation && !SearchSpec.ReturnOnlyFileNames && match != null && !DoesPassHitCountCheck(match)) { // remove from grep collection only if // not negation // not filenames only // actually have a hit // doesn't pass the hit count filter MatchResults.RemoveAt(MatchResults.Count - 1); string filterValue = match.HitCount.ToString(); FilterItem filterItem = new FilterItem(new FilterType(FilterType.Categories.File, FilterType.SubCategories.MinimumHitCount), userFilterCount.ToString(), FilterType.ValueOptions.None, false, true); OnFileFiltered(file, filterItem, filterValue); } // // Check for no hits through out the file // if (SearchSpec.UseNegation && _hitOccurred == false) { //add the file to the hit list if (!_fileNameDisplayed) { match = new MatchResult(file) { Index = MatchResults.Count, DetectedEncoding = encoding }; MatchResults.Add(match); OnFileHit(file, match.Index); } } } finally { if (_reader != null) _reader.Close(); if (_stream != null) _stream.Close(); } }