Ejemplo n.º 1
0
 private bool CheckCUE()
 {
     State  = FileState.ValidFile;
     Encode = EncodingDetector.GetEncoding(FullPath, out _confidence);
     if (Encode != "UTF-8")
     {
         State = FileState.InValidEncode;
         return(false);
     }
     using (var fs = File.OpenRead(FullPath))
     {
         var buffer = new byte[3];
         fs.Read(buffer, 0, 3);
         if (buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF)
         {
             if (!CueCurer.CueMatchCheck(this))
             {
                 State = FileState.InValidCue;
             }
             return(true);
         }
     }
     State = FileState.NonUTF8WBOM;
     return(false);
 }
        static void Main(string[] args)
        {
            var parser = new CsvParser();
            var config = BuildConfiguration();

            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            var encoding = EncodingDetector.GetEncoding(config["data-file-path"]);
            var data     = parser.ParseFile(config["data-file-path"], encoding);

            if (parser.ErrorMessage != null)
            {
                Console.WriteLine("Parsing failed");
                Console.WriteLine(parser.ErrorMessage);
            }

            var report = DataAnalyser.Analyze(data, Convert.ToUInt32(config["min-number-for-adv-stat"]));

            using (var stream = new FileStream("report.txt", FileMode.Create))
            {
                TextReporter.ToStream(stream, report);
            }
            var reporter = new ExcelReport.ExcelReporter();

            using (var stream = new FileStream("report.xlsx", FileMode.Create))
            {
                reporter.ToStream(stream, report);
            }
        }
Ejemplo n.º 3
0
 public void IsUTF8Test()
 {
     foreach (var item in Directory.GetFiles(@"..\..\[Encode Sample]"))
     {
         float  confindence;
         string encode = EncodingDetector.GetEncoding(item, out confindence);
         Console.WriteLine($"{Path.GetFileName(item)}: {encode == "UTF-8"} confidence: {confindence:F3}");
     }
 }
Ejemplo n.º 4
0
        public void TestDetectingCodepages(int codepage)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            var stream = Common.CreateStreamFromText(
                Common.TestResultsDataString,
                Encoding.GetEncoding(codepage));
            var encoding = EncodingDetector.GetEncoding(stream);

            Assert.Equal(codepage, encoding.CodePage);
        }
Ejemplo n.º 5
0
        public void EncodeTest()
        {
            float confindence;

            foreach (var item in Directory.GetFiles(@"..\..\[Encode Sample]"))
            {
                Console.WriteLine($"{Path.GetFileName(item)}: {EncodingDetector.GetEncoding(item, out confindence)} ({confindence:F3})");
            }
            foreach (var item in Directory.GetFiles(@"..\..\[Encoding All Star]"))
            {
                Console.WriteLine($"{Path.GetFileName(item)}: {EncodingDetector.GetEncoding(item, out confindence)} ({confindence:F3})");
            }
        }
Ejemplo n.º 6
0
        private void FileValidation()
        {
            if (BaseValidation() /* || State == FileState.InValidFile*/)
            {
                return;
            }
            switch (Extension)
            {
            case ".flac":
            {
                if (!GlobalConfiguration.Instance().InspectionOptions.FLACCompressRate)
                {
                    goto SKIP_FLAC_COMPRESS_RATE;
                }
                Flac = FlacData.GetMetadataFromFlac(FullPath);
                // _confidence = (float)Flac.CompressRate;
                Suffix += $"[{Flac.CompressRate * 100:00.00}%]";
                if (Flac.IsHiRes)
                {
                    Suffix += "[HR]";
                }
                if (Flac.HasCover)
                {
                    Suffix += "[图]";
                }
                Encode = Flac.Encoder;
                if (Flac.CompressRate > 0.9)     //Maybe an uncompressed file
                {
                    State = FileState.InValidFlacLevel;
                }
            }
SKIP_FLAC_COMPRESS_RATE:
                break;

            case ".cue":
                if (!GlobalConfiguration.Instance().InspectionOptions.CUEEncoding)
                {
                    break;
                }
                CheckCUE();
                break;

            case ".log":
            {
                if (!GlobalConfiguration.Instance().InspectionOptions.LogValidation)
                {
                    break;
                }
                Logger.Log(Logger.Level.Info, $"Log check for '{FullPath}'");
                Encode = EncodingDetector.GetEncoding(FullPath, out var confidence);
                if (confidence < 0.9)
                {
                    break;
                }
                var text  = File.ReadAllText(FullPath, System.Text.Encoding.GetEncoding(Encode));
                var index = 1;
                foreach (var(version, oldSignature, actualSignature) in LogChecker.Core.eac_verify(text))
                {
                    if (oldSignature == "")
                    {
                        Logger.Log(Logger.Level.Debug, $"No signature found, it could be '{actualSignature}'");
                        continue;
                    }
                    if (oldSignature != actualSignature)
                    {
                        Logger.Log(Logger.Level.Debug, $"Expect signature '{actualSignature}', but get '{oldSignature}'");
                        State = FileState.TamperedLog;
                    }
                    else
                    {
                        Logger.Log(Logger.Level.Fine, $"{index++}. Log entry is fine!");
                    }
                }
                break;
            }

            case ".png":
            {
                Logger.Log(Logger.Level.Info, $"Png check for '{FullPath}'");
                var pngInfo = PngData.GetMetadataFrom(FullPath);
                Suffix += $"[{pngInfo.CompressRate * 100:00.00}%]";
                if (pngInfo.CompressRate > 0.9)     //Maybe an uncompressed file
                {
                    State = FileState.InValidFlacLevel;
                }
                break;
            }

            default:
                if (!GlobalConfiguration.Instance().InspectionOptions.FileHeader)
                {
                    break;
                }
                if (!FileHeader.Check(FullPath))
                {
                    State = FileState.InValidFileSignature;
                }
                break;
            }
        }
Ejemplo n.º 7
0
        public void GetEncoding_WhenDataHasBom(byte[] bom, EncodingType expectedEncoding)
        {
            // Arrange
            byte[] data = GetData();
            bom.CopyTo(data, 0);

            CreateSut();

            // Act
            (var encoding, bool hasBom) = sut.GetEncoding(data, data.Length);

            // Assert
            Assert.True(hasBom);
            Assert.Equal(expectedEncoding, encoding);
        }
Ejemplo n.º 8
0
        private string DetectEncoding(string filePath, string defaultEncoding)
        {
            string detectedCharset = null;

            int  lineCount = 0;
            bool isHtml    = false;

            foreach (var line in File.ReadLines(filePath))
            {
                var lineContent = line.ToLower().Trim();

                // skip empty lines
                if (lineContent == "")
                {
                    continue;
                }

                // 10 lines but no <html>, give up
                if (lineCount++ > 10 && !isHtml)
                {
                    break;
                }
                // found <html>
                if (!isHtml && lineContent.Contains("<html"))
                {
                    isHtml = true;
                }
                // Arrived <body>, give up
                if (lineContent.Contains("<body"))
                {
                    break;
                }

                // Already detected <html>, then found <meta>
                if (isHtml)
                {
                    var match = Html4CharsetRegex.Match(lineContent);
                    if (match.Success && match.Groups.Count == 2)
                    {
                        detectedCharset = match.Groups[1].Value;
                        break;
                    }

                    match = Html5CharsetRegex.Match(lineContent);
                    if (match.Success && match.Groups.Count == 2)
                    {
                        detectedCharset = match.Groups[1].Value;
                        break;
                    }
                }
            }

            var autoDetectedCharset = EncodingDetector.GetEncoding(filePath);

            if (detectedCharset == null || autoDetectedCharset == "UTF-8")
            {
                detectedCharset = autoDetectedCharset;
                if (detectedCharset == null)
                {
                    detectedCharset = defaultEncoding;
                }
            }

            return(detectedCharset.ToUpper());
        }