Example #1
        static void ProcessLine(char cleanLetter, string filterText, string[] lx)
            if (lx[1].Contains(filterText + Char.ToUpper(cleanLetter) + " WITH") || lx[1].EndsWith(filterText + Char.ToUpper(cleanLetter)))
                var isOk = Int32.TryParse(lx[0], NumberStyles.AllowHexSpecifier, null, out int point);

                string utf16Char  = Char.ConvertFromUtf32(point);
                byte[] utf16Bytes = Encoding.Unicode.GetBytes(utf16Char);
                byte[] cp1252Byte = Encoding.Convert(Encoding.Unicode, cp1252, utf16Bytes);

                if (cp1252Byte.Length == 0)
                    GenMap1252.Map1252Add(point, (byte)cleanLetter, lx[1]);
Example #2
        static void Main(string[] args)
            // See comment at top for usage.
            using (var db = new StreamReader("UnicodeData.txt"))
                while (!db.EndOfStream)

            for (byte[] b1252 = new byte[] { 0 }; ; ++b1252[0])
                string utf16 = cp1252.GetString(b1252);
                int    p32   = Char.ConvertToUtf32(utf16, 0);
                var    row   = udb.First(xx => Int32.Parse(xx[0], NumberStyles.AllowHexSpecifier) == p32);
                if (p32 != b1252[0])
                    GenMap1252.Map1252Add(p32, b1252[0], row[1] + "**");

                if (b1252[0] == 0xFF)
            var totalExactMaps = GenMap1252.map1252.Count;

            foreach (var lx in udb)
                if (lx[1].Contains("LATIN"))
                    for (var ch = 'A'; ch <= 'Z'; ++ch)
                        ProcessLine(ch, "LATIN CAPITAL LETTER ", lx);
                    for (var ch = 'a'; ch <= 'z'; ++ch)
                        ProcessLine(ch, "LATIN SMALL LETTER ", lx);

            // There are many more potential custom remaps like these:
            Map1252Add(Char.ConvertToUtf32("⁓", 0), (byte)'~');
            Map1252Add(Char.ConvertToUtf32("‒", 0), (byte)'-');
            Map1252Add(Char.ConvertToUtf32("―", 0), (byte)'-');

            Console.WriteLine("// Generated from v8.0 of www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt");
            Console.WriteLine("// Total = " + map1252.Count + ", Scrubbed = " + (map1252.Count - totalExactMaps));
            var b1        = new byte[1];
            var countdown = map1252.Count;

            foreach (var kv in map1252)
                char delim = --countdown == 0? ' ': ',';
                b1[0] = kv.Value.Octet;
                char[] char1252 = cp1252.GetChars(b1);
                Console.WriteLine("0x" + kv.Key.ToString("X6") + kv.Value.Octet.ToString("X2") + delim + " // " + kv.Value.Desc);

            /* Output:
             * 0x00010041, // LATIN CAPITAL LETTER A WITH MACRON
             * 0x00010161, // LATIN SMALL LETTER A WITH MACRON
             * .
             * .
             * .
             * 0x0E007979, // TAG LATIN SMALL LETTER Y
             * 0x0E007A7A  // TAG LATIN SMALL LETTER Z