예제 #1
0
        /// <summary>
        /// Sets an EncodingCacheItem in the cache for the given key.
        /// </summary>
        /// <param name="key">Unique key</param>
        /// <param name="item">EncodingCacheItem to add</param>
        /// <history>
        /// [Curtis_Beard]		05/28/2015	FIX: 69, Created for speed improvements for encoding detection
        /// </history>
        public void SetItem(string key, EncodingCacheItem item)
        {
            if (cache.ContainsKey(key))
            {
                // update item incase of change
                cache[key] = item;

                // Remove it from current position
                lruList.Remove(key);

                // Add it again, this will result in it being placed on top
                lruList.AddLast(key);
            }
            else
            {
                cache.Add(key, item);

                if (cache.Count > capacity)
                {
                    // over capacity so remove least used item and key
                    cache.Remove(lruList.First.Value);
                    lruList.RemoveFirst();
                }

                lruList.AddLast(key);
            }
        }
예제 #2
0
파일: Grep.cs 프로젝트: joshball/astrogrep
        /// <summary>
        /// Search a given file for the searchText.
        /// </summary>
        /// <param name="file">FileInfo object for file to search for searchText</param>
        /// <history>
        /// [Curtis_Beard]		09/08/2005	Created
        /// [Curtis_Beard]		11/21/2005	ADD: update hit count when actual line added
        /// [Curtis_Beard]		12/02/2005	CHG: use SearchingFile instead of StatusMessage
        /// [Curtis_Beard]		04/21/2006	CHG: use a regular expression match collection to get
        ///											correct count of hits in a line when using RegEx
        /// [Curtis_Beard]		07/03/2006	FIX: 1500174, use a FileStream to open the files readonly
        /// [Curtis_Beard]		07/07/2006	FIX: 1512029, RegEx use Case Sensitivity and WholeWords,
        ///											also use different whole word matching regex
        /// [Curtis_Beard]		07/26/2006	ADD: 1512026, column position
        /// [Curtis_Beard]		07/26/2006	FIX: 1530023, retrieve file with correct encoding
        /// [Curtis_Beard]		09/12/2006	CHG: Converted to C#
        /// [Curtis_Beard]		09/28/2006	FIX: check for any plugins before looping through them
        /// [Curtis_Beard]		05/18/2006	FIX: 1723815, use correct whole word matching regex
        /// [Curtis_Beard]		06/26/2007	FIX: correctly detect plugin extension support
        /// [Curtis_Beard]		06/26/2007	FIX: 1779270, increase array size holding context lines
        /// [Curtis_Beard]		10/09/2012	FIX: don't overwrite position when getting context lines
        /// [Curtis_Beard]		10/12/2012	FIX: get correct position when using whole word option
        /// [Curtis_Beard]		10/12/2012	CHG: 32, implement a hit count filter
        /// [Curtis_Beard]		10/31/2012	CHG: renamed to SearchFileContents, remove parameter searchText
        /// [Curtis_Beard]		08/19/2014	FIX: 57, escape search text when whole word is enabled but not regular expressions
        /// [Curtis_Beard]      10/27/2014	CHG: 85, remove leading white space, remove use of newline so each line is in hit object
        /// [Curtis_Beard]      02/09/2015	CHG: 92, support for specific file encodings
        /// [Curtis_Beard]		03/05/2015	FIX: 64/35, if whole word doesn't pass our check but does pass regex, make it fail.  Code cleanup.
        /// [Curtis_Beard]		04/02/2015	CHG: remove line number logic and always include line number in MatchResultLine.
        /// [Curtis_Beard]		05/18/2015	FIX: 72, don't grab file sample when detect encoding option is turned off.
        /// [Curtis_Beard]		05/18/2015	FIX: 69, use same stream to detect encoding and grep contents
        /// [Curtis_Beard]	   05/26/2015	FIX: 69, add performance setting for file detection
        /// [Curtis_Beard]		06/02/2015	FIX: 75, use sample size from performance setting
        /// [theblackbunny]		06/25/2015	FIX: 39, remove context lines that intersect with each other in different MatchResults
        /// </history>
        private void SearchFileContents(FileInfo file)
        {
            // Raise SearchFile Event
             OnSearchingFile(file);

             FileStream _stream = null;
             StreamReader _reader = null;
             int _lineNumber = 0;
             MatchResult match = null;
             Regex _regularExp;
             MatchCollection _regularExpCol = null;
             bool _hitOccurred = false;
             bool _fileNameDisplayed = false;
             int _maxContextLines = 0;
             var _context = new string[11];
             int _contextIndex = -1;
             int _lastHit = 0;
             int userFilterCount = 0;

             try
             {
            #region Plugin Processing

            if (Plugins != null)
            {
               for (int i = 0; i < Plugins.Count; i++)
               {
                  // find a valid plugin for this file type
                  if (Plugins[i].Enabled && Plugins[i].Plugin.IsAvailable)
                  {
                     // detect if plugin supports extension
                     bool isFound = Plugins[i].Plugin.IsFileSupported(file);

                     // if extension not supported try another plugin
                     if (!isFound)
                        continue;

                     Exception pluginEx = null;

                     // load plugin and perform grep
                     if (Plugins[i].Plugin.Load())
                     {
                        OnSearchingFileByPlugin(Plugins[i].Plugin.Name);
                        match = Plugins[i].Plugin.Grep(file, SearchSpec, ref pluginEx);
                     }
                     else
                     {
                        OnSearchError(file, new Exception(string.Format("Plugin {0} failed to load.", Plugins[i].Plugin.Name)));
                     }

                     Plugins[i].Plugin.Unload();

                     // if the plugin processed successfully
                     if (pluginEx == null)
                     {
                        // check for a hit
                        if (match != null)
                        {
                           match.FromPlugin = true;

                           // only perform is not using negation
                           if (!SearchSpec.UseNegation)
                           {
                              if (DoesPassHitCountCheck(match))
                              {
                                 match.Index = MatchResults.Count;
                                 MatchResults.Add(match);
                                 OnFileHit(file, match.Index);

                                 if (SearchSpec.ReturnOnlyFileNames)
                                    match.SetHitCount();

                                 OnLineHit(match, match.Index);
                              }
                           }
                        }
                        else if (SearchSpec.UseNegation)
                        {
                           // no hit but using negation so create one
                           match = new MatchResult(file) { Index = MatchResults.Count, FromPlugin = true };
                           MatchResults.Add(match);
                           OnFileHit(file, match.Index);
                        }
                     }
                     else
                     {
                        // the plugin had an error
                        OnSearchError(file, pluginEx);
                     }

                     return;
                  }
               }
            }
            #endregion

            // open stream to file to use in encoding detection if enabled and in grep logic
            _stream = file.Open(FileMode.Open, FileAccess.Read, FileShare.ReadWrite);

            #region Encoding Detection

            string usedEncoder = string.Empty;
            System.Text.Encoding encoding = null;

            //
            // User specified file encoding
            //
            FileEncoding fileEncoding = SearchSpec.FileEncodings != null && SearchSpec.FileEncodings.Count > 0 ?
               (from f in SearchSpec.FileEncodings where f.FilePath.Equals(file.FullName, StringComparison.InvariantCultureIgnoreCase) && f.Enabled select f).ToList().FirstOrDefault()
               :  null;
            if (fileEncoding != null)
            {
               usedEncoder = "User";
               encoding = fileEncoding.Encoding;
            }
            else
            {
               //
               // Detect file encoding if enabled
               //
               if (SearchSpec.EncodingDetectionOptions.DetectFileEncoding)
               {
                  // encoding cache check
                  var key = file.FullName;
                  if (SearchSpec.EncodingDetectionOptions.UseEncodingCache &&
                     EncodingCache.Instance.ContainsKey(key))
                  {
                     var value = EncodingCache.Instance.GetItem(key);

                     usedEncoder = value.DetectorName;
                     encoding = System.Text.Encoding.GetEncoding(value.CodePage);
                  }
                  else
                  {
                     byte[] sampleBytes;

                     //Check if can read first
                     try
                     {
                        int sampleSize = EncodingOptions.GetSampleSizeByPerformance(SearchSpec.EncodingDetectionOptions != null ? SearchSpec.EncodingDetectionOptions.PerformanceSetting : EncodingOptions.Performance.Default);
                        sampleBytes = EncodingTools.ReadFileContentSample(_stream, sampleSize);
                     }
                     catch (Exception ex)
                     {
                        // can't read file for sample bytes
                        OnSearchError(file, ex);
                        return;
                     }

                     // detect encoding based on user set performance level that determines what detectors are used
                     encoding = EncodingDetector.Detect(sampleBytes,
                        out usedEncoder,
                        EncodingOptions.GetEncodingDetectorOptionsByPerformance(SearchSpec.EncodingDetectionOptions != null ? SearchSpec.EncodingDetectionOptions.PerformanceSetting : EncodingOptions.Performance.Default),
                        System.Text.Encoding.Default);

                     // add to cache if enabled
                     if (encoding != null && SearchSpec.EncodingDetectionOptions.UseEncodingCache)
                     {
                        var value = new EncodingCacheItem() { CodePage = encoding.CodePage, DetectorName = usedEncoder };
                        EncodingCache.Instance.SetItem(key, value);
                     }
                  }
               }
               else
               {
                  // Use original encoding method before detect encoding option availalbe
                  usedEncoder = "Default";
                  encoding = System.Text.Encoding.Default;
               }
            }

            if (encoding == null)
            {
               // Could not detect file encoding
               OnSearchError(file, new Exception("Could not detect file encoding."));
               return;
            }

            OnFileEncodingDetected(file, encoding, usedEncoder);

            // process all encoding detectors and display results to output window
            //var values = EncodingDetector.DetectAll(sampleBytes);
            //if (values.Count > 0)
            //{
            //   System.Diagnostics.Debug.WriteLine(string.Format("File: {0}", file.FullName));
            //   foreach (var value in values)
            //   {
            //      System.Diagnostics.Debug.WriteLine(string.Format("Encoding: {0} ({1})", value.Encoding != null ? value.Encoding.EncodingName : "None", value.Option.ToString()));
            //   }
            //   System.Diagnostics.Debug.WriteLine(Environment.NewLine);
            //}

            #endregion

            // could have read some data for the encoding check, seek back to start of file
            if (_stream.CanSeek)
            {
               _stream.Seek(0, SeekOrigin.Begin);
            }
            _reader = new StreamReader(_stream, encoding);

            _maxContextLines = SearchSpec.ContextLines + 1;
            do
            {
               string textLine = _reader.ReadLine();

               if (textLine == null)
                  break;
               else
               {
                  _lineNumber += 1;

                  int _posInStr = -1;
                  if (SearchSpec.UseRegularExpressions)
                  {
                     if (textLine.Length > 0)
                     {
                        string pattern = string.Format("{0}{1}{0}", SearchSpec.UseWholeWordMatching ? "\\b" : string.Empty, SearchSpec.SearchText);
                        RegexOptions options = SearchSpec.UseCaseSensitivity ? RegexOptions.None : RegexOptions.IgnoreCase;
                        _regularExp = new Regex(pattern, options);
                        _regularExpCol = _regularExp.Matches(textLine);

                        if (_regularExpCol.Count > 0)
                        {
                           if (SearchSpec.UseNegation)
                           {
                              _hitOccurred = true;
                           }

                           _posInStr = 1;
                        }
                     }
                  }
                  else
                  {
                     // If we are looking for whole worlds only, perform the check.
                     if (SearchSpec.UseWholeWordMatching)
                     {
                        _regularExp = new Regex("\\b" + Regex.Escape(SearchSpec.SearchText) + "\\b", SearchSpec.UseCaseSensitivity ? RegexOptions.None : RegexOptions.IgnoreCase);

                        // if match is found, also check against our internal line hit count method to be sure they are in sync
                        Match mtc = _regularExp.Match(textLine);
                        if (mtc != null && mtc.Success && RetrieveLineMatches(textLine, SearchSpec).Count > 0)
                        {
                           if (SearchSpec.UseNegation)
                           {
                              _hitOccurred = true;
                           }

                           _posInStr = mtc.Index;
                        }
                     }
                     else
                     {
                        _posInStr = textLine.IndexOf(SearchSpec.SearchText, SearchSpec.UseCaseSensitivity ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase);

                        if (SearchSpec.UseNegation && _posInStr > -1)
                        {
                           _hitOccurred = true;
                        }
                     }
                  }

                  //*******************************************
                  // We found an occurrence of our search text.
                  //*******************************************
                  if (_posInStr > -1)
                  {
                     //since we have a hit, check to see if negation is checked
                     if (SearchSpec.UseNegation)
                        break;

                     // create new hit and add to collection
                     if (match == null)
                     {
                        match = new MatchResult(file) { Index = MatchResults.Count, DetectedEncoding = encoding };
                        MatchResults.Add(match);
                     }

                     // don't show until passes count check
                     if (!_fileNameDisplayed && DoesPassHitCountCheck(match))
                     {
                        OnFileHit(file, match.Index);

                        _fileNameDisplayed = true;
                     }

                     // If we are only showing filenames, go to the next file.
                     if (SearchSpec.ReturnOnlyFileNames)
                     {
                        if (!_fileNameDisplayed)
                        {
                           OnFileHit(file, match.Index);

                           _fileNameDisplayed = true;
                        }

                        //notify that at least 1 hit is in file
                        match.SetHitCount();
                        OnLineHit(match, match.Index);

                        break;
                     }

                     // Display context lines if applicable.
                     if (SearchSpec.ContextLines > 0 && _lastHit <= 0)
                     {
                        if (match.Matches.Count > 0 && _lastHit < -_maxContextLines)
                        {
                           // Insert a blank space before the context lines.
                           var matchLine = new MatchResultLine() { Line = string.Empty, LineNumber = -1 };
                           match.Matches.Add(matchLine);
                           int _pos = match.Matches.Count - 1;

                           if (DoesPassHitCountCheck(match))
                           {
                              OnLineHit(match, _pos);
                           }
                        }

                        // Display preceding n context lines before the hit.
                        int tempContextLines = SearchSpec.ContextLines;
                        // But only output the context lines which are not part of the previous context
                        if(_lastHit >= -_maxContextLines)
                        {
                           tempContextLines = -_lastHit;
                        }
                        // Roll back the context index to get the first context line that needs to be displayed
                        _contextIndex = _contextIndex - tempContextLines;
                        if(_contextIndex < 0)
                        {
                           _contextIndex += _maxContextLines;
                        }
                        for (int tempPosInStr = tempContextLines; tempPosInStr >= 1; tempPosInStr--)
                        {
                           _contextIndex = _contextIndex + 1;
                           if (_contextIndex >= _maxContextLines)
                              _contextIndex = 0;

                           // If there is a match in the first one or two lines,
                           // the entire preceeding context may not be available.
                           if (_lineNumber > tempPosInStr)
                           {
                              // Add the context line.
                              var matchLine = new MatchResultLine() { Line = _context[_contextIndex], LineNumber = _lineNumber - tempPosInStr };
                              match.Matches.Add(matchLine);
                              int _pos = match.Matches.Count - 1;

                              if (DoesPassHitCountCheck(match))
                              {
                                 OnLineHit(match, _pos);
                              }
                           }
                        }
                     }

                     _lastHit = SearchSpec.ContextLines;

                     //
                     // Add the actual "hit".
                     //
                     var matchLineFound = new MatchResultLine() { Line = textLine, LineNumber = _lineNumber, HasMatch = true };

                     if (SearchSpec.UseRegularExpressions)
                     {
                        _posInStr = _regularExpCol[0].Index;
                        match.SetHitCount(_regularExpCol.Count);

                        foreach (Match regExMatch in _regularExpCol)
                        {
                           matchLineFound.Matches.Add(new MatchResultLineMatch(regExMatch.Index, regExMatch.Length));
                        }
                     }
                     else
                     {
                        var lineMatches = RetrieveLineMatches(textLine, SearchSpec);
                        match.SetHitCount(lineMatches.Count);
                        matchLineFound.Matches = lineMatches;
                     }
                     matchLineFound.ColumnNumber = _posInStr + 1;
                     match.Matches.Add(matchLineFound);
                     int _index = match.Matches.Count - 1;

                     if (DoesPassHitCountCheck(match))
                     {
                        OnLineHit(match, _index);
                     }
                  }
                  else if (SearchSpec.ContextLines > 0)
                  {
                     if(_lastHit > 0)
                     {
                        //***************************************************
                        // We didn't find a hit, but since lastHit is > 0, we
                        // need to display this context line.
                        //***************************************************
                        var matchLine = new MatchResultLine() { Line = textLine, LineNumber = _lineNumber };
                        match.Matches.Add(matchLine);
                        int _index = match.Matches.Count - 1;

                        if (DoesPassHitCountCheck(match))
                        {
                           OnLineHit(match, _index);
                        }
                     }
                     if(_lastHit >= -_maxContextLines)
                     {
                        //*****************************************************
                        // We continue keeping track of the number of potential
                        // context lines since the last displayed context line
                        // until we pass (-_maxContextLines).
                        //*****************************************************
                        _lastHit -= 1;
                     }

                  } // Found a hit or not.

                  // If we are showing context lines, keep the last n+1 lines.
                  if (SearchSpec.ContextLines > 0)
                  {
                     _contextIndex += 1;
                     if (_contextIndex >= _maxContextLines)
                        _contextIndex = 0;

                     _context[_contextIndex] = textLine;
                  }
               }
            }
            while (true);

            // send event file/line hit if we haven't yet but it should be
            if (!_fileNameDisplayed && match != null && DoesPassHitCountCheck(match))
            {
               // need to display it
               OnFileHit(file, match.Index);
               OnLineHit(match, match.Index);
            }

            // send event for file filtered if it fails the file hit count filter
            if (!SearchSpec.UseNegation && !SearchSpec.ReturnOnlyFileNames && match != null && !DoesPassHitCountCheck(match))
            {
               // remove from grep collection only if
               // not negation
               // not filenames only
               // actually have a hit
               // doesn't pass the hit count filter
               MatchResults.RemoveAt(MatchResults.Count - 1);

               string filterValue = match.HitCount.ToString();
               FilterItem filterItem = new FilterItem(new FilterType(FilterType.Categories.File, FilterType.SubCategories.MinimumHitCount), userFilterCount.ToString(), FilterType.ValueOptions.None, false, true);
               OnFileFiltered(file, filterItem, filterValue);
            }

            //
            // Check for no hits through out the file
            //
            if (SearchSpec.UseNegation && _hitOccurred == false)
            {
               //add the file to the hit list
               if (!_fileNameDisplayed)
               {
                  match = new MatchResult(file) { Index = MatchResults.Count, DetectedEncoding = encoding };
                  MatchResults.Add(match);
                  OnFileHit(file, match.Index);
               }
            }
             }
             finally
             {
            if (_reader != null)
               _reader.Close();

            if (_stream != null)
               _stream.Close();
             }
        }