Example #1
0
        public byte[] GetBytes(string text, String vr)
        {
            if (encodings.Count == 1)
            {
                return(encodings[0].Encoding.GetBytes(text));
            }
            else
            {
                CharacterSet set = encodings[0];

                List <byte> result   = new List <byte>();
                List <byte> tokens   = GetTokens(vr);
                byte[]      fragment = null;
                foreach (char c in text)
                {
                    if (tokens.Contains((byte)c))
                    {
                        result.Add((byte)c);
                        set = encodings[0];
                        continue;
                    }
                    fragment = set.Encoding.GetBytes(new String(c, 1));
                    if (fragment.Length == 1 && fragment[0] == (byte)'?')
                    {
                        set = FindCharacterSet(c);
                        result.Add(0x1b);
                        foreach (byte e in set.G1)
                        {
                            result.Add(e);
                        }
                        fragment = set.Encoding.GetBytes(new String(c, 1));
                    }
                    foreach (byte b in fragment)
                    {
                        result.Add(b);
                    }
                }
                return(result.ToArray());
            }
        }
Example #2
0
        protected void parse(object value)
        {
            encodings = new List <CharacterSet>();

            if (!(value is string[]) && !(value is string) && value != null)
            {
                throw new ArgumentException("Expecting a null, string, or string[].", "value");
            }

            string[] sets = value as string[];
            if (value == null)
            {
                sets    = new string[1];
                sets[0] = String.Empty;
            }
            else if (value is string)
            {
                sets    = new string[1];
                sets[0] = value as String;
            }
            // treat "ISO 2022 IR 6\ISO 2022 IR 87" and "\ISO 2022 IR 87" as just "ISO 2022 IR 87"
            else if (sets.Length == 2 && (sets[0] == "ISO_IR 6" || sets[0] == "ISO 2022 IR 6" || sets[0] == String.Empty) && sets[1] == "ISO 2022 IR 87")
            {
                sets    = new string[1];
                sets[0] = "ISO 2022 IR 87";
            }
            // treat "ISO 2022 IR 149" as "ISO 2022 IR 6\ISO 2022 IR 149"
            else if (sets.Length == 1 && (sets[0] == "ISO_IR 149" || sets[0] == "ISO 2022 IR 149"))
            {
                sets    = new string[2];
                sets[0] = "ISO 2022 IR 6";
                sets[1] = "ISO 2022 IR 149";
            }
            else
            {
                // currently multiple character sets are not fully supported
                if (sets.Length > 1 && (sets[0] != "ISO_IR 6" && sets[0] != "ISO 2022 IR 6" && sets[0] != String.Empty))
                {
                    sets    = new string[1];
                    sets[0] = "";
                }
            }

            if (sets.Length == 1)
            {
                encodings.Add(CharacterSet.Create(sets[0]));
            }
            else
            {
                foreach (string set in sets)
                {
                    // TODO there are certain rules here for what is legal when vm > 1
                    // 6.1.2.5.4 Levels of Implementation and Initial Designation, part 5
                    if (set == String.Empty)
                    {
                        encodings.Add(CharacterSet.Create("ISO 2022 IR 6"));
                    }
                    else
                    {
                        // coerce character sets to ISO 2022
                        CharacterSet temp = CharacterSet.Create(set.Replace("_IR", " 2022 IR"));
                        encodings.Add(temp);
                    }
                }
            }
        }
Example #3
0
        public string GetString(byte[] bytes, String vr)
        {
            if (encodings.Count == 1)
            {
                return(encodings[0].Encoding.GetString(bytes));
            }
            else
            {
                StringBuilder text = new StringBuilder();

                Encoding    encoding = encodings[0].Encoding;
                List <byte> tokens   = GetTokens(vr);
                byte[]      fragment;
                int         n, start;
                for (n = 0, start = 0; n < bytes.Length; n++)
                {
                    byte b = bytes[n];
                    if (tokens.Contains(b) || b == 0x1b)
                    {
                        if (n - start > 0)
                        {
                            fragment = new byte[n - start];
                            Buffer.BlockCopy(bytes, start, fragment, 0, n - start);
                            text.Append(encoding.GetString(fragment));
                        }
                        if (b == 0x1b)
                        {
                            byte[] key;
                            if ((n + 3 < bytes.Length - 1) && bytes[n + 1] == 0x24 && bytes[n + 2] == 0x29 && bytes[n + 3] == 0x43)
                            {
                                key = new byte[] { bytes[++n], bytes[++n], bytes[++n] };
                            }
                            else if (n + 2 < bytes.Length - 1)
                            {
                                key = new byte[] { bytes[++n], bytes[++n] };
                            }
                            else
                            {
                                n = start;
                                break;
                            }
                            string temp = CharacterSet.GetKey(key);
                            encoding = FindCharacterSet(temp).Encoding;
                        }
                        else
                        {
                            // TODO if we are using Japanese maybe '\' must be '? as per spec
                            text.Append((char)b);
                            encoding = encodings[0].Encoding;
                        }
                        start = n + 1;
                    }
                }
                if (n - start > 0)
                {
                    fragment = new byte[n - start];
                    Buffer.BlockCopy(bytes, start, fragment, 0, n - start);
                    text.Append(encoding.GetString(fragment));
                }
                return(text.ToString());
            }
        }
Example #4
0
        /*
         *
         * JIS X 0201
         * ISO-IR 13 katakana
         * ISO-IR 14 7-bit romaji
         * JIS X 0208
         * ISO-IR 87 kanji (ideographic), hiragana (phonetic), and katakana
         * JIS X 0212
         * ISO-IR 159 kanji (ideographic)
         *
         *    ISO 2022 IR 6\ISO 2022 IR 87\ISO 2022 IR 100\ISO 2022 IR 159\ISO 2022 IR 13
         *    ASCII\JIS X 0208\Latin-1\JIS X 0212\JIS X 0201
         *
         *
         * ISO-2022-JP
         * ESC ( B     ASCII
         * ESC ( J     JIS X 0201-1976 (ISO/IEC 646:JP) Roman set
         * ESC $ @     JIS X 0208-1978
         * ESC $ B     JIS X 0208-1983
         * ISO-2022-JP-1 The same as ISO-2022-JP with one additional escape sequence
         * ESC $ (     DJIS X 0212-1990
         * ISO-2022-JP-2 The same as ISO-2022-JP-1 with the following additional escape sequences
         * ESC $ A     GB 2312-1980
         * ESC $ ( C   KS X 1001-1992 (2 bytes per character)
         * ESC - A     ISO/IEC 8859-1 high part, Extended Latin 1 set
         * ESC - F     ISO/IEC 8859-7 high part, Basic Greek set
         * ISO-2022-JP-3 The same as ISO-2022-JP with three additional escape sequences
         * ESC ( I     JIS X 0201-1976 Kana set
         * ESC $ ( O   JIS X 0213-2000 Plane 1
         * ESC $ ( P   JIS X 0213-2000 Plane 2
         * ISO-2022-JP-2004 The same as ISO-2022-JP-3 with one additional escape sequence
         * ESC $ ( Q   JIS X 0213-2004 Plane 1
         *
         *
         *
         * ISO-2022-KR
         * ESC $ ) C   KS X 1001-1992 previously named KS C 5601-1987
         * ISO-2022-CN
         * ESC $ ( A   GB 2312-1980
         * ESC $ ( G   CNS 11643-1992 Plane 1
         * ESC $ ( H   CNS 11643-1992 Plane 2
         * ISO-2022-CN-EXT The same as ISO-2022-CN
         * ESC $ ( E   ISO-IR-165
         * ESC $ ( I   CNS 11643-1992 Plane 3
         * ESC $ ( J   CNS 11643-1992 Plane 4
         * ESC $ ( K   CNS 11643-1992 Plane 5
         * ESC $ ( L   CNS 11643-1992 Plane 6
         * ESC $ ( M   CNS 11643-1992 Plane 7
         *
         */
        public static CharacterSet Create(string code)
        {
            CharacterSet value = null;

            switch (code)
            {
            case "GB18030":                 // Chinese
                value = new CharacterSet(code, Encoding.GetEncoding("GB18030"));
                break;

            //case "ISO_IR 13":         // Japanese
            //    value = new CharacterSet(code, Encoding.GetEncoding("iso-2022-jp"));
            //    break;
            //case "ISO 2022 IR 13":         // Japanese
            //    value = new CharacterSet(code, Encoding.GetEncoding("iso-2022-jp"), new byte[] {0x28, 0x4a}, new byte[] {0x29, 0x49});
            //    break;
            case "ISO 2022 IR 87":          // Japanese
                value = new CharacterSet(code, Encoding.GetEncoding("iso-2022-jp"), new byte[] { 0x24, 0x42 }, new byte[] { 0x24, 0x42 });
                break;

            case "ISO_IR 100":              // Latin-1
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-1"));
                break;

            case "ISO 2022 IR 100":         // Latin-1
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-1"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x41 });
                break;

            case "ISO_IR 101":              // Latin-2
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-2"));
                break;

            case "ISO 2022 IR 101":         // Latin-2
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-2"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x42 });
                break;

            case "ISO_IR 109":              // Latin-3
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-3"));
                break;

            case "ISO 2022 IR 109":         // Latin-3
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-3"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x43 });
                break;

            case "ISO_IR 110":              // Latin-4
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-4"));
                break;

            case "ISO 2022 IR 110":         // Latin-4
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-4"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x44 });
                break;

            case "ISO_IR 126":              // Greek
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-7"));
                break;

            case "ISO 2022 IR 126":         // Greek
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-7"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x46 });
                break;

            case "ISO_IR 127":              // Arabic
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-6"));
                break;

            case "ISO 2022 IR 127":         // Arabic
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-6"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x47 });
                break;

            case "ISO_IR 138":              // Hebrew
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-8"));
                break;

            case "ISO 2022 IR 138":         // Hebrew
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-8"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x48 });
                break;

            case "ISO_IR 144":              // Cyrillic
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-5"));
                break;

            case "ISO 2022 IR 144":         // Cyrillic
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-5"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x4c });
                break;

            case "ISO_IR 148":              // Latin-5
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-9"));
                break;

            case "ISO 2022 IR 148":         // Latin-5
                value = new CharacterSet(code, Encoding.GetEncoding("iso-8859-9"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x4d });
                break;

            case "ISO_IR 149":              // Korean
                value = new CharacterSet(code, Encoding.GetEncoding("ks_c_5601-1987"));
                break;

            case "ISO 2022 IR 149":         // Korean
                value = new CharacterSet(code, Encoding.GetEncoding("EUC-KR"), null, new byte[] { 0x24, 0x29, 0x43 });
                //value = new CharacterSet(code, Encoding.GetEncoding("ks_c_5601-1987"), null, new byte[] { 0x24, 0x29, 0x43 });
                break;

            case "ISO 2022 IR 159":         // Japanese
                value = new CharacterSet(code, Encoding.GetEncoding("EUC-JP"), null, null);
                break;

            case "ISO_IR 166":              // Thai
                value = new CharacterSet(code, Encoding.GetEncoding("windows-874"));
                break;

            //case "ISO 2022 IR 166":     // Thai
            //    value = new CharacterSet(code, Encoding.GetEncoding("windows-874"), new byte[] { 0x28, 0x42 }, new byte[] { 0x2d, 0x54 });
            //    break;
            case "ISO_IR 192":              // UTF-8
                value = new CharacterSet(code, Encoding.GetEncoding("UTF-8"));
                break;

            case "ISO_IR 6":                //  ASCII
                value = new CharacterSet(code, Encoding.GetEncoding("us-ascii"));
                break;

            case "ISO 2022 IR 6":           // ASCII
                value = new CharacterSet(code, Encoding.GetEncoding("us-ascii"), new byte[] { 0x28, 0x42 }, null);
                break;

            case "":
            default:
                value = new CharacterSet(code, new DefaultEncoding());
                break;
            }
            return(value);
        }