Exemplo n.º 1
0
        public MBCSGroupProber()
        {
            probers[0] = new UTF8Prober();
            probers[1] = new SJISProber();
            probers[2] = new EUCJPProber();
            probers[3] = new GB18030Prober();
            probers[4] = new EUCKRProber();
            probers[5] = new Big5Prober();
            probers[6] = new EUCTWProber();

            Reset();
        }
Exemplo n.º 2
0
        private static void ConvertToUtf8WithBOM(string file)
        {
            var bytes     = File.ReadAllBytes(file);
            var asciiOnly = bytes.All(c => c <= 127);

            if (asciiOnly)
            {
                return;
            }
            var preamble      = Encoding.UTF8.GetPreamble();
            var isUtf8WithBom = bytes.Take(preamble.Length).SequenceEqual(preamble);

            if (isUtf8WithBom)
            {
                return;
            }

            var dd = new UTF8Prober();
            var utf8DetectionResult = dd.HandleData(bytes, 0, bytes.Length);

            var encoding = Encoding.UTF8;

            if (utf8DetectionResult == ProbingState.NotMe)
            {
                encoding = Encoding.GetEncoding(1251);
            }
            else
            {
                return;
            }
            Console.WriteLine("Converting {0}. {1}", file, encoding.EncodingName);
            var content       = File.ReadAllText(file, encoding);
            var firstNonAscii = content.Zip(Enumerable.Range(0, int.MaxValue), Tuple.Create)
                                .FirstOrDefault(t => t.Item1 > 127);

            if (firstNonAscii != null)
            {
                var index = Math.Max(0, firstNonAscii.Item2 - 5);
                var len   = Math.Min(content.Length - index, 35);
                Console.WriteLine("  non ascii text {0}", content.Substring(index, len).Replace('\r', ' ').Replace('\n', ' '));
            }
            File.WriteAllText(file, content, Encoding.UTF8);
        }