예제 #1
0
		private static void ConvertToUtf8WithBOM(string file)
		{
			var bytes = File.ReadAllBytes(file);
			var asciiOnly = bytes.All(c => c <= 127);
			if (asciiOnly) return;
			var preamble = Encoding.UTF8.GetPreamble();
			var isUtf8WithBom = bytes.Take(preamble.Length).SequenceEqual(preamble);
			if (isUtf8WithBom) return;

			var dd = new UTF8Prober();
			var utf8DetectionResult = dd.HandleData(bytes, 0, bytes.Length);
			
			var encoding = Encoding.UTF8;
			if (utf8DetectionResult == ProbingState.NotMe) encoding = Encoding.GetEncoding(1251);
			else return;
			Console.WriteLine("Converting {0}. {1}", file, encoding.EncodingName);
			var content = File.ReadAllText(file, encoding);
			var firstNonAscii = content.Zip(Enumerable.Range(0, int.MaxValue), Tuple.Create)
				.FirstOrDefault(t => t.Item1 > 127);
			if (firstNonAscii != null)
			{
				var index = Math.Max(0, firstNonAscii.Item2 - 5);
				var len = Math.Min(content.Length - index, 35);
				Console.WriteLine("  non ascii text {0}", content.Substring(index, len).Replace('\r', ' ').Replace('\n', ' '));
			}
			File.WriteAllText(file, content, Encoding.UTF8);
		}
예제 #2
0
 public MBCSGroupProber()
 {
     probers[0] = new UTF8Prober();
     probers[1] = new SJISProber();
     probers[2] = new EUCJPProber();
     probers[3] = new GB18030Prober();
     probers[4] = new EUCKRProber();
     probers[5] = new Big5Prober();
     probers[6] = new EUCTWProber();
     Reset();
 }
예제 #3
0
 public MBCSGroupProber()
 {
     probers[0] = new UTF8Prober();
     probers[1] = new SJISProber();
     probers[2] = new EUCJPProber();
     probers[3] = new GB18030Prober();
     probers[4] = new EUCKRProber();
     probers[5] = new Big5Prober();
     probers[6] = new EUCTWProber();
     Reset();        
 }