public static Encoding Detect(byte[] bytes, EncodingDetector.Options opts = Options.KlerkSoftBom | Options.MLang, Encoding defaultEncoding = null) { Encoding encoding = null; if ((opts & Options.KlerkSoftBom) == Options.KlerkSoftBom) { StopWatch.Start("DetectEncoding: UsingKlerksSoftBom"); encoding = DetectEncodingUsingKlerksSoftBom(bytes); StopWatch.Stop("DetectEncoding: UsingKlerksSoftBom"); } if (encoding != null) { return(encoding); } if ((opts & Options.KlerkSoftHeuristics) == Options.KlerkSoftHeuristics) { StopWatch.Start("DetectEncoding: UsingKlerksSoftHeuristics"); encoding = DetectEncodingUsingKlerksSoftHeuristics(bytes); StopWatch.Stop("DetectEncoding: UsingKlerksSoftHeuristics"); } if (encoding != null) { return(encoding); } if ((opts & Options.MLang) == Options.MLang) { StopWatch.Start("DetectEncoding: UsingMLang"); encoding = DetectEncodingUsingMLang(bytes); StopWatch.Stop("DetectEncoding: UsingMLang"); } if (encoding == null) { encoding = defaultEncoding; } return(encoding); }
private FindResultItem FindInFile(string filePath) { var resultItem = new FindResultItem(); resultItem.IsSuccess = true; resultItem.IncludeFilesWithoutMatches = IncludeFilesWithoutMatches; resultItem.FileName = Path.GetFileName(filePath); resultItem.FilePath = filePath; resultItem.FileRelativePath = "." + filePath.Substring(Dir.Length); byte[] sampleBytes; StopWatch.Start("ReadSampleFileContent"); //Check if can read first try { sampleBytes = Utils.ReadFileContentSample(filePath); } catch (Exception exception) { StopWatch.Stop("ReadSampleFileContent"); resultItem.IsSuccess = false; resultItem.FailedToOpen = true; resultItem.ErrorMessage = exception.Message; return(resultItem); } StopWatch.Stop("ReadSampleFileContent"); if (!SkipBinaryFileDetection) { StopWatch.Start("IsBinaryFile"); if (resultItem.IsSuccess) { // check for /0/0/0/0 if (Utils.IsBinaryFile(sampleBytes)) { StopWatch.Stop("IsBinaryFile"); resultItem.IsSuccess = false; resultItem.IsBinaryFile = true; return(resultItem); } } StopWatch.Stop("IsBinaryFile"); } Encoding encoding = DetectEncoding(sampleBytes); if (encoding == null) { resultItem.IsSuccess = false; resultItem.FailedToOpen = true; resultItem.ErrorMessage = "Could not detect file encoding."; return(resultItem); } resultItem.FileEncoding = encoding; StopWatch.Start("ReadFullFileContent"); string fileContent; using (var sr = new StreamReader(filePath, encoding)) { fileContent = sr.ReadToEnd(); } StopWatch.Stop("ReadFullFileContent"); StopWatch.Start("FindMatches"); RegexOptions regexOptions = Utils.GetRegExOptions(IsCaseSensitive); resultItem.Matches = Utils.FindMatches(fileContent, FindText, FindTextHasRegEx, UseEscapeChars, regexOptions); StopWatch.Stop("FindMatches"); resultItem.NumMatches = resultItem.Matches.Count; return(resultItem); }
public FindResult Find() { Verify.Argument.IsNotEmpty(Dir, "Dir"); Verify.Argument.IsNotEmpty(FileMask, "FileMask"); Verify.Argument.IsNotEmpty(FindText, "FindText"); Status status = Status.Processing; StopWatch.Start("GetFilesInDirectory"); //time var startTime = DateTime.Now; string[] filesInDirectory = Utils.GetFilesInDirectory(Dir, FileMask, IncludeSubDirectories, ExcludeFileMask, ExcludeDir); var resultItems = new List <FindResultItem>(); var stats = new Stats(); stats.Files.Total = filesInDirectory.Length; StopWatch.Stop("GetFilesInDirectory"); var startTimeProcessingFiles = DateTime.Now; foreach (string filePath in filesInDirectory) { stats.Files.Processed++; var resultItem = FindInFile(filePath); if (resultItem.IsSuccess) { stats.Matches.Found += resultItem.Matches.Count; if (resultItem.Matches.Count > 0) { stats.Files.WithMatches++; } else { stats.Files.WithoutMatches++; } } else { if (resultItem.FailedToOpen) { stats.Files.FailedToRead++; } if (resultItem.IsBinaryFile) { stats.Files.Binary++; } } stats.UpdateTime(startTime, startTimeProcessingFiles); //Update status if (IsCancelRequested) { status = Status.Cancelled; } if (stats.Files.Total == stats.Files.Processed) { status = Status.Completed; } //Skip files that don't have matches if (resultItem.IncludeInResultsList) { resultItems.Add(resultItem); } //Handle event OnFileProcessed(new FinderEventArgs(resultItem, stats, status, IsSilent)); if (status == Status.Cancelled) { break; } } if (filesInDirectory.Length == 0) { status = Status.Completed; OnFileProcessed(new FinderEventArgs(new FindResultItem(), stats, status, IsSilent)); } return(new FindResult { Items = resultItems, Stats = stats }); }
/// <summary> /// Rerurns up to maxEncodings codpages that are assumed to be apropriate /// </summary> /// <param name="input">array containing the raw data</param> /// <param name="maxEncodings">maxiumum number of encodings to detect</param> /// <returns>an array of Encoding with assumed encodings</returns> public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings) { StopWatch.Start("DetectInputCodepages_" + Thread.CurrentThread.ManagedThreadId); if (maxEncodings < 1) { throw new ArgumentOutOfRangeException("at least one encoding must be returend", "maxEncodings"); } if (input == null) { throw new ArgumentNullException("input"); } // empty strings can always be encoded as ASCII if (input.Length == 0) { return new Encoding[] { Encoding.ASCII } } ; // expand the string to be at least 256 bytes if (input.Length < 256) { byte[] newInput = new byte[256]; int steps = 256 / input.Length; for (int i = 0; i < steps; i++) { Array.Copy(input, 0, newInput, input.Length * i, input.Length); } int rest = 256 % input.Length; if (rest > 0) { Array.Copy(input, 0, newInput, steps * input.Length, rest); } input = newInput; } List <Encoding> result = new List <Encoding>(); // get the IMultiLanguage" interface IMultiLanguage2 multilang2 = new CMultiLanguageClass(); if (multilang2 == null) { throw new System.Runtime.InteropServices.COMException("Failed to get IMultilang2"); } try { DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings]; int scores = detectedEncdings.Length; int srcLen = input.Length; // setup options (none) MLDETECTCP options = MLDETECTCP.MLDETECTCP_NONE; StopWatch.Start("multilang2.DetectInputCodepage_" + Thread.CurrentThread.ManagedThreadId); // finally... call to DetectInputCodepage multilang2.DetectInputCodepage(options, 0, ref input[0], ref srcLen, ref detectedEncdings[0], ref scores); StopWatch.Stop("multilang2.DetectInputCodepage_" + Thread.CurrentThread.ManagedThreadId); // get result if (scores > 0) { for (int i = 0; i < scores; i++) { // add the result result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage)); } } } finally { Marshal.FinalReleaseComObject(multilang2); } StopWatch.Stop("DetectInputCodepages_" + Thread.CurrentThread.ManagedThreadId); // nothing found return(result.ToArray()); }
private void Run() { IsCancelled = false; _fileCount = 0; StopWatch.Start("FileGetter.Run"); foreach (var fileMask in FileMasks) { StopWatch.Start("FileGetter.Run Directory.EnumerateFiles"); var files = Directory.EnumerateFiles(DirPath, fileMask, SearchOption); StopWatch.Stop("FileGetter.Run Directory.EnumerateFiles"); foreach (string filePath in files) { StopWatch.Start("FileGetter.Run IsMatchWithExcludeFileMasks"); bool isMatchWithExcludeFileMasks = IsMatchWithExcludeFileMasks(filePath); StopWatch.Stop("FileGetter.Run IsMatchWithExcludeFileMasks"); if (!isMatchWithExcludeFileMasks) { _fileCount++; if (UseBlockingCollection) { //StopWatch.Start("FileGetter.FileCollection.Add"); FileCollection.Add(filePath); //StopWatch.Stop("FileGetter.FileCollection.Add"); } else { StopWatch.Start("FileGetter.Run FileQueue.Enqueue"); FileQueue.Enqueue(filePath); StopWatch.Stop("FileGetter.Run FileQueue.Enqueue"); } } if (IsCancelRequested) { break; } } if (IsCancelRequested) { break; } } if (IsCancelRequested) { IsCancelled = true; } else { IsFileCountFinal = true; } if (UseBlockingCollection) { FileCollection.CompleteAdding(); } StopWatch.Stop("FileGetter.Run"); StopWatch.PrintCollection(StopWatch.Collection["FileGetter.Run"].Milliseconds); }