private static void ConvertToUtf8WithBOM(string file) { var bytes = File.ReadAllBytes(file); var asciiOnly = bytes.All(c => c <= 127); if (asciiOnly) return; var preamble = Encoding.UTF8.GetPreamble(); var isUtf8WithBom = bytes.Take(preamble.Length).SequenceEqual(preamble); if (isUtf8WithBom) return; var dd = new UTF8Prober(); var utf8DetectionResult = dd.HandleData(bytes, 0, bytes.Length); var encoding = Encoding.UTF8; if (utf8DetectionResult == ProbingState.NotMe) encoding = Encoding.GetEncoding(1251); else return; Console.WriteLine("Converting {0}. {1}", file, encoding.EncodingName); var content = File.ReadAllText(file, encoding); var firstNonAscii = content.Zip(Enumerable.Range(0, int.MaxValue), Tuple.Create) .FirstOrDefault(t => t.Item1 > 127); if (firstNonAscii != null) { var index = Math.Max(0, firstNonAscii.Item2 - 5); var len = Math.Min(content.Length - index, 35); Console.WriteLine(" non ascii text {0}", content.Substring(index, len).Replace('\r', ' ').Replace('\n', ' ')); } File.WriteAllText(file, content, Encoding.UTF8); }
public MBCSGroupProber() { probers[0] = new UTF8Prober(); probers[1] = new SJISProber(); probers[2] = new EUCJPProber(); probers[3] = new GB18030Prober(); probers[4] = new EUCKRProber(); probers[5] = new Big5Prober(); probers[6] = new EUCTWProber(); Reset(); }