public static long FromUCS(int cp)
        {
            long rawOffset   = 0;
            long startIgnore = 0x80;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (cp < m.UStart)
                {
                    return(ToGbxRaw((int)(cp
                                          - startIgnore + rawOffset)));
                }
                if (cp <= m.UEnd)
                {
                    return(cp - m.UStart + m.GStart);
                }
                if (m.GStart != 0)
                {
                    rawOffset  += m.UStart - startIgnore;
                    startIgnore = m.UEnd + 1;
                }
            }
            throw new SystemException(String.Format("GB18030 INTERNAL ERROR (should not happen): UCS {0:x06}", cp));
        }
Example #2
0
        public static long FromUCS(int cp)
        {
            long rawOffset   = 0;
            long startIgnore = 0x80;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (cp < m.UStart)
                {
                    return(ToGbxRaw((int)(cp
                                          - startIgnore + rawOffset)));
                }
                if (cp <= m.UEnd)
                {
                    return(cp - m.UStart + m.GStart);
                }
                if (m.GStart != 0)
                {
                    rawOffset  += m.UStart - startIgnore;
                    startIgnore = m.UEnd + 1;
                }
            }

            // Consider it as invalid character
            return(-1);
        }
        void Run()
        {
            int  ustart = 0x80;
            long gstart = 0;
            int  ucount = 0;
            long gcount = 0;
            bool skip   = false;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (!skip)
                {
//Console.WriteLine ("---- adding {0:X04} umap. {1:X04} gmap, skip range between {2:X04} and {3:X04}", m.UStart - ustart, m.GStart != 0 ? m.GStart - gstart : 0, m.UStart, m.UEnd);
                    ucount += m.UStart - ustart;
                }
                if (m.GStart != 0)
                {
                    gcount += m.GStart - gstart;
                }
                skip   = m.GStart == 0;
                ustart = m.UEnd + 1;
                if (m.GStart != 0)
                {
                    gstart = m.GEnd + 1;
                }
            }

            Console.Error.WriteLine("Total UCS codepoints: {0} ({1:X04})", ucount, ucount);
            Console.Error.WriteLine("Total GBX codepoints: {0} ({1:X04})", gcount, gcount);

            uni2gbxMap = new byte [ucount * 2];
            gbx2uniMap = new byte [gcount * 2];

            XmlDocument doc = new XmlDocument();

            doc.XmlResolver = null;
            doc.Load("gb-18030-2000.xml");
            foreach (XmlElement e in doc.SelectNodes(
                         "/characterMapping/assignments/a"))
            {
                AddMap(e);
            }

            using (FileStream fs = File.Create("gb18030.table"))
            {
                byte [] size = new byte [4];
                for (int i = 0, len = gbx2uniMap.Length;
                     i < 4; i++, len >>= 8)
                {
                    size [3 - i] = (byte)(len % 0x100);
                }
                fs.Write(size, 0, 4);
                fs.Write(gbx2uniMap, 0, gbx2uniMap.Length);
                fs.Write(uni2gbxMap, 0, uni2gbxMap.Length);
            }
            Console.WriteLine("done.");
        }
Example #4
0
        // negative (invalid) or positive (valid)
        public static long FromGBX(byte [] bytes, int start)
        {
            byte b1 = bytes [start];
            byte b2 = bytes [start + 1];
            byte b3 = bytes [start + 2];
            byte b4 = bytes [start + 3];

            if (b1 < 0x81 || b1 == 0xFF)
            {
                return(-1);
            }
            if (b2 < 0x30 || b2 > 0x39)
            {
                return(-2);
            }
            if (b3 < 0x81 || b3 == 0xFF)
            {
                return(-3);
            }
            if (b4 < 0x30 || b4 > 0x39)
            {
                return(-4);
            }
            if (b1 >= 0x90)
            {
                return(FromGBXRaw(b1, b2, b3, b4, true));
            }
            long linear = FromGBXRaw(b1, b2, b3, b4, false);

            long rawOffset   = 0;
            long startIgnore = 0;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (linear < m.GStart)
                {
                    return(ToUcsRaw((int)(linear
                                          - startIgnore + rawOffset)));
                }
                if (linear <= m.GEnd)
                {
                    return(linear - gbxBase - m.GStart
                           + m.UStart);
                }
                if (m.GStart != 0)
                {
                    rawOffset  += m.GStart - startIgnore;
                    startIgnore = m.GEnd + 1;
                }
            }

            // All 4 bytes look valid but we didn't find any appropriate range.
            // So just return negative result for it.
            return(-4);
        }
        // negative (invalid) or positive (valid)
        public static long FromGBX(byte [] bytes, int start)
        {
            byte b1 = bytes [start];
            byte b2 = bytes [start + 1];
            byte b3 = bytes [start + 2];
            byte b4 = bytes [start + 3];

            if (b1 < 0x81 || b1 == 0xFF)
            {
                return(-1);
            }
            if (b2 < 0x30 || b2 > 0x39)
            {
                return(-2);
            }
            if (b3 < 0x81 || b3 == 0xFF)
            {
                return(-3);
            }
            if (b4 < 0x30 || b4 > 0x39)
            {
                return(-4);
            }
            if (b1 >= 0x90)
            {
                return(FromGBXRaw(b1, b2, b3, b4, true));
            }
            long linear = FromGBXRaw(b1, b2, b3, b4, false);

            long rawOffset   = 0;
            long startIgnore = 0;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (linear < m.GStart)
                {
                    return(ToUcsRaw((int)(linear
                                          - startIgnore + rawOffset)));
                }
                if (linear <= m.GEnd)
                {
                    return(linear - gbxBase - m.GStart
                           + m.UStart);
                }
                if (m.GStart != 0)
                {
                    rawOffset  += m.GStart - startIgnore;
                    startIgnore = m.GEnd + 1;
                }
            }
//			return ToUcsRaw ((int) (linear - gbxBase));
            throw new SystemException(String.Format("GB18030 INTERNAL ERROR (should not happen): GBX {0:x02} {1:x02} {2:x02} {3:x02}", b1, b2, b3, b4));
        }
        static int IndexForGbx(int gbx)
        {
            long start = 0;
            long count = 0;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (m.GStart == 0)
                {
                    continue;
                }
                if (gbx < m.GStart)
                {
                    return((int)(count + gbx - start));
                }
                count += m.GStart - start;
                start  = m.GEnd + 1;
            }
            return(-1);
        }
        static int IndexForUcs(int ucs)
        {
            int  start = 0x80;
            int  count = 0;
            bool skip  = false;

            for (int i = 0; i < ranges.Length; i++)
            {
                GB18030Map m = ranges [i];
                if (!skip)
                {
                    if (ucs < m.UStart)
                    {
                        return(count + ucs - start);
                    }
                    count += m.UStart - start;
                }
                skip  = m.GStart == 0;
                start = m.UEnd + 1;
            }
            return(-1);
        }