コード例 #1
0
        public static Encoding Detect(byte[] bytes, EncodingDetector.Options opts = Options.KlerkSoftBom | Options.MLang, Encoding defaultEncoding = null)
        {
            Encoding encoding = null;

            if ((opts & Options.KlerkSoftBom) == Options.KlerkSoftBom)
            {
                StopWatch.Start("DetectEncoding: UsingKlerksSoftBom");

                encoding = DetectEncodingUsingKlerksSoftBom(bytes);

                StopWatch.Stop("DetectEncoding: UsingKlerksSoftBom");
            }

            if (encoding != null)
            {
                return(encoding);
            }

            if ((opts & Options.KlerkSoftHeuristics) == Options.KlerkSoftHeuristics)
            {
                StopWatch.Start("DetectEncoding: UsingKlerksSoftHeuristics");
                encoding = DetectEncodingUsingKlerksSoftHeuristics(bytes);
                StopWatch.Stop("DetectEncoding: UsingKlerksSoftHeuristics");
            }

            if (encoding != null)
            {
                return(encoding);
            }

            if ((opts & Options.MLang) == Options.MLang)
            {
                StopWatch.Start("DetectEncoding: UsingMLang");
                encoding = DetectEncodingUsingMLang(bytes);
                StopWatch.Stop("DetectEncoding: UsingMLang");
            }

            if (encoding == null)
            {
                encoding = defaultEncoding;
            }

            return(encoding);
        }
コード例 #2
0
ファイル: Finder.cs プロジェクト: GirusVirus/Extended-Toolkit
        private FindResultItem FindInFile(string filePath)
        {
            var resultItem = new FindResultItem();

            resultItem.IsSuccess = true;
            resultItem.IncludeFilesWithoutMatches = IncludeFilesWithoutMatches;

            resultItem.FileName         = Path.GetFileName(filePath);
            resultItem.FilePath         = filePath;
            resultItem.FileRelativePath = "." + filePath.Substring(Dir.Length);

            byte[] sampleBytes;

            StopWatch.Start("ReadSampleFileContent");

            //Check if can read first
            try
            {
                sampleBytes = Utils.ReadFileContentSample(filePath);
            }
            catch (Exception exception)
            {
                StopWatch.Stop("ReadSampleFileContent");

                resultItem.IsSuccess    = false;
                resultItem.FailedToOpen = true;
                resultItem.ErrorMessage = exception.Message;
                return(resultItem);
            }

            StopWatch.Stop("ReadSampleFileContent");


            if (!SkipBinaryFileDetection)
            {
                StopWatch.Start("IsBinaryFile");

                if (resultItem.IsSuccess)
                {
                    // check for /0/0/0/0
                    if (Utils.IsBinaryFile(sampleBytes))
                    {
                        StopWatch.Stop("IsBinaryFile");

                        resultItem.IsSuccess    = false;
                        resultItem.IsBinaryFile = true;
                        return(resultItem);
                    }
                }

                StopWatch.Stop("IsBinaryFile");
            }

            Encoding encoding = DetectEncoding(sampleBytes);

            if (encoding == null)
            {
                resultItem.IsSuccess    = false;
                resultItem.FailedToOpen = true;
                resultItem.ErrorMessage = "Could not detect file encoding.";
                return(resultItem);
            }

            resultItem.FileEncoding = encoding;

            StopWatch.Start("ReadFullFileContent");

            string fileContent;

            using (var sr = new StreamReader(filePath, encoding))
            {
                fileContent = sr.ReadToEnd();
            }

            StopWatch.Stop("ReadFullFileContent");

            StopWatch.Start("FindMatches");
            RegexOptions regexOptions = Utils.GetRegExOptions(IsCaseSensitive);

            resultItem.Matches = Utils.FindMatches(fileContent, FindText, FindTextHasRegEx, UseEscapeChars, regexOptions);

            StopWatch.Stop("FindMatches");

            resultItem.NumMatches = resultItem.Matches.Count;
            return(resultItem);
        }
コード例 #3
0
ファイル: Finder.cs プロジェクト: GirusVirus/Extended-Toolkit
        public FindResult Find()
        {
            Verify.Argument.IsNotEmpty(Dir, "Dir");
            Verify.Argument.IsNotEmpty(FileMask, "FileMask");
            Verify.Argument.IsNotEmpty(FindText, "FindText");

            Status status = Status.Processing;

            StopWatch.Start("GetFilesInDirectory");

            //time
            var startTime = DateTime.Now;


            string[] filesInDirectory = Utils.GetFilesInDirectory(Dir, FileMask, IncludeSubDirectories, ExcludeFileMask, ExcludeDir);

            var resultItems = new List <FindResultItem>();
            var stats       = new Stats();

            stats.Files.Total = filesInDirectory.Length;

            StopWatch.Stop("GetFilesInDirectory");


            var startTimeProcessingFiles = DateTime.Now;

            foreach (string filePath in filesInDirectory)
            {
                stats.Files.Processed++;


                var resultItem = FindInFile(filePath);

                if (resultItem.IsSuccess)
                {
                    stats.Matches.Found += resultItem.Matches.Count;

                    if (resultItem.Matches.Count > 0)
                    {
                        stats.Files.WithMatches++;
                    }
                    else
                    {
                        stats.Files.WithoutMatches++;
                    }
                }
                else
                {
                    if (resultItem.FailedToOpen)
                    {
                        stats.Files.FailedToRead++;
                    }

                    if (resultItem.IsBinaryFile)
                    {
                        stats.Files.Binary++;
                    }
                }


                stats.UpdateTime(startTime, startTimeProcessingFiles);


                //Update status
                if (IsCancelRequested)
                {
                    status = Status.Cancelled;
                }

                if (stats.Files.Total == stats.Files.Processed)
                {
                    status = Status.Completed;
                }


                //Skip files that don't have matches
                if (resultItem.IncludeInResultsList)
                {
                    resultItems.Add(resultItem);
                }

                //Handle event
                OnFileProcessed(new FinderEventArgs(resultItem, stats, status, IsSilent));


                if (status == Status.Cancelled)
                {
                    break;
                }
            }



            if (filesInDirectory.Length == 0)
            {
                status = Status.Completed;
                OnFileProcessed(new FinderEventArgs(new FindResultItem(), stats, status, IsSilent));
            }

            return(new FindResult {
                Items = resultItems, Stats = stats
            });
        }
コード例 #4
0
        /// <summary>
        /// Rerurns up to maxEncodings codpages that are assumed to be apropriate
        /// </summary>
        /// <param name="input">array containing the raw data</param>
        /// <param name="maxEncodings">maxiumum number of encodings to detect</param>
        /// <returns>an array of Encoding with assumed encodings</returns>
        public static Encoding[] DetectInputCodepages(byte[] input, int maxEncodings)
        {
            StopWatch.Start("DetectInputCodepages_" + Thread.CurrentThread.ManagedThreadId);

            if (maxEncodings < 1)
            {
                throw new ArgumentOutOfRangeException("at least one encoding must be returend", "maxEncodings");
            }

            if (input == null)
            {
                throw new ArgumentNullException("input");
            }

            // empty strings can always be encoded as ASCII
            if (input.Length == 0)
            {
                return new Encoding[] { Encoding.ASCII }
            }
            ;

            // expand the string to be at least 256 bytes
            if (input.Length < 256)
            {
                byte[] newInput = new byte[256];

                int steps = 256 / input.Length;
                for (int i = 0; i < steps; i++)
                {
                    Array.Copy(input, 0, newInput, input.Length * i, input.Length);
                }

                int rest = 256 % input.Length;
                if (rest > 0)
                {
                    Array.Copy(input, 0, newInput, steps * input.Length, rest);
                }
                input = newInput;
            }



            List <Encoding> result = new List <Encoding>();

            // get the IMultiLanguage" interface
            IMultiLanguage2 multilang2 = new CMultiLanguageClass();

            if (multilang2 == null)
            {
                throw new System.Runtime.InteropServices.COMException("Failed to get IMultilang2");
            }
            try
            {
                DetectEncodingInfo[] detectedEncdings = new DetectEncodingInfo[maxEncodings];

                int scores = detectedEncdings.Length;
                int srcLen = input.Length;

                // setup options (none)
                MLDETECTCP options = MLDETECTCP.MLDETECTCP_NONE;


                StopWatch.Start("multilang2.DetectInputCodepage_" + Thread.CurrentThread.ManagedThreadId);

                // finally... call to DetectInputCodepage
                multilang2.DetectInputCodepage(options, 0,
                                               ref input[0], ref srcLen, ref detectedEncdings[0], ref scores);

                StopWatch.Stop("multilang2.DetectInputCodepage_" + Thread.CurrentThread.ManagedThreadId);

                // get result
                if (scores > 0)
                {
                    for (int i = 0; i < scores; i++)
                    {
                        // add the result
                        result.Add(Encoding.GetEncoding((int)detectedEncdings[i].nCodePage));
                    }
                }
            }
            finally
            {
                Marshal.FinalReleaseComObject(multilang2);
            }

            StopWatch.Stop("DetectInputCodepages_" + Thread.CurrentThread.ManagedThreadId);

            // nothing found
            return(result.ToArray());
        }
コード例 #5
0
        private void Run()
        {
            IsCancelled = false;
            _fileCount  = 0;

            StopWatch.Start("FileGetter.Run");

            foreach (var fileMask in FileMasks)
            {
                StopWatch.Start("FileGetter.Run Directory.EnumerateFiles");

                var files = Directory.EnumerateFiles(DirPath, fileMask, SearchOption);

                StopWatch.Stop("FileGetter.Run Directory.EnumerateFiles");

                foreach (string filePath in files)
                {
                    StopWatch.Start("FileGetter.Run IsMatchWithExcludeFileMasks");
                    bool isMatchWithExcludeFileMasks = IsMatchWithExcludeFileMasks(filePath);
                    StopWatch.Stop("FileGetter.Run IsMatchWithExcludeFileMasks");

                    if (!isMatchWithExcludeFileMasks)
                    {
                        _fileCount++;

                        if (UseBlockingCollection)
                        {
                            //StopWatch.Start("FileGetter.FileCollection.Add");
                            FileCollection.Add(filePath);
                            //StopWatch.Stop("FileGetter.FileCollection.Add");
                        }
                        else
                        {
                            StopWatch.Start("FileGetter.Run FileQueue.Enqueue");
                            FileQueue.Enqueue(filePath);
                            StopWatch.Stop("FileGetter.Run FileQueue.Enqueue");
                        }
                    }


                    if (IsCancelRequested)
                    {
                        break;
                    }
                }

                if (IsCancelRequested)
                {
                    break;
                }
            }



            if (IsCancelRequested)
            {
                IsCancelled = true;
            }
            else
            {
                IsFileCountFinal = true;
            }

            if (UseBlockingCollection)
            {
                FileCollection.CompleteAdding();
            }

            StopWatch.Stop("FileGetter.Run");

            StopWatch.PrintCollection(StopWatch.Collection["FileGetter.Run"].Milliseconds);
        }