Exemplo n.º 1
0
        public void RegexEncoding2()
        {
            var SJIS = RubyEncoding.KCodeSJIS.StrictEncoding;

            // 1.9 encodings:
            var invalidUtf8 = MutableString.CreateBinary(new byte[] { 0x80 }, RubyEncoding.UTF8);

            AssertExceptionThrown <ArgumentException>(() => new RubyRegex(invalidUtf8, RubyRegexOptions.NONE));

            // LastMatch

            MatchData m;
            var       u = MutableString.CreateBinary(SJIS.GetBytes("あああ"), RubyEncoding.KCodeSJIS);
            var       p = MutableString.CreateBinary(SJIS.GetBytes("あ{2}"), RubyEncoding.KCodeSJIS);

            var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

            // /あ{2}/ matches "あああ", the resulting index is in bytes:
            m = rs.LastMatch(null, u);
            Assert(m != null && m.Index == 2);

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("あ")), RubyRegexOptions.SJIS);

            // "start at" in the middle of a character:
            m = rs.LastMatch(null, u, 0);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 1);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 2);
            Assert(m != null && m.Index == 2);

            m = rs.LastMatch(null, u, 3);
            Assert(m != null && m.Index == 2);

            // Split

            u  = MutableString.CreateBinary(SJIS.GetBytes("あちあちあ"), RubyEncoding.UTF8);
            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち")), RubyRegexOptions.SJIS);
            var parts = rs.Split(null, u);

            Assert(parts.Length == 3);
            foreach (var part in parts)
            {
                Assert(part.Encoding == RubyEncoding.KCodeSJIS);
                Assert(part.ToString() == "あ");
            }

            // groups

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち(a(あ+)(b+))+あ")), RubyRegexOptions.SJIS);
            u  = MutableString.CreateBinary(SJIS.GetBytes("ちaああbaあbbbあ"));

            m = rs.Match(null, u);
            Assert(m.GroupCount == 4);

            int s, l;

            Assert(m.GetGroupStart(0) == (s = 0));
            Assert(m.GetGroupLength(0) == (l = u.GetByteCount()));
            Assert(m.GetGroupEnd(0) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(1) == (s = SJIS.GetByteCount("ちaああb")));
            Assert(m.GetGroupLength(1) == (l = SJIS.GetByteCount("aあbbb")));
            Assert(m.GetGroupEnd(1) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(2) == (s = SJIS.GetByteCount("ちaああba")));
            Assert(m.GetGroupLength(2) == (l = SJIS.GetByteCount("あ")));
            Assert(m.GetGroupEnd(2) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(3) == (s = SJIS.GetByteCount("ちaああbaあ")));
            Assert(m.GetGroupLength(3) == (l = SJIS.GetByteCount("bbb")));
            Assert(m.GetGroupEnd(3) == s + l);
        }
Exemplo n.º 2
0
        public void RegexEncoding2() {
            var SJIS = RubyEncoding.KCodeSJIS.StrictEncoding;

            // 1.9 encodings:
            var invalidUtf8 = MutableString.CreateBinary(new byte[] { 0x80 }, RubyEncoding.UTF8);
            AssertExceptionThrown<ArgumentException>(() => new RubyRegex(invalidUtf8, RubyRegexOptions.NONE));

            // LastMatch

            MatchData m;
            var u = MutableString.CreateBinary(SJIS.GetBytes("あああ"), RubyEncoding.KCodeSJIS);
            var p = MutableString.CreateBinary(SJIS.GetBytes("あ{2}"), RubyEncoding.KCodeSJIS);

            var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

            // /あ{2}/ matches "あああ", the resulting index is in bytes:
            m = rs.LastMatch(null, u);
            Assert(m != null && m.Index == 2);

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("あ")), RubyRegexOptions.SJIS);

            // "start at" in the middle of a character:
            m = rs.LastMatch(null, u, 0);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 1);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 2);
            Assert(m != null && m.Index == 2);

            m = rs.LastMatch(null, u, 3);
            Assert(m != null && m.Index == 2);

            // Split
            
            u = MutableString.CreateBinary(SJIS.GetBytes("あちあちあ"), RubyEncoding.UTF8);
            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち")), RubyRegexOptions.SJIS);
            var parts = rs.Split(null, u);
            Assert(parts.Length == 3);
            foreach (var part in parts) {
                Assert(part.Encoding == RubyEncoding.KCodeSJIS);
                Assert(part.ToString() == "あ");
            }

            // groups

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち(a(あ+)(b+))+あ")), RubyRegexOptions.SJIS);
            u = MutableString.CreateBinary(SJIS.GetBytes("ちaああbaあbbbあ"));

            m = rs.Match(null, u);
            Assert(m.GroupCount == 4);

            int s, l;
            Assert(m.GetGroupStart(0) == (s = 0));
            Assert(m.GetGroupLength(0) == (l = u.GetByteCount()));
            Assert(m.GetGroupEnd(0) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(1) == (s = SJIS.GetByteCount("ちaああb")));
            Assert(m.GetGroupLength(1) == (l = SJIS.GetByteCount("aあbbb")));
            Assert(m.GetGroupEnd(1) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(2) == (s = SJIS.GetByteCount("ちaああba")));
            Assert(m.GetGroupLength(2) == (l = SJIS.GetByteCount("あ")));
            Assert(m.GetGroupEnd(2) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(3) == (s = SJIS.GetByteCount("ちaああbaあ")));
            Assert(m.GetGroupLength(3) == (l = SJIS.GetByteCount("bbb")));
            Assert(m.GetGroupEnd(3) == s + l);
        }
Exemplo n.º 3
0
        public void RegexEncoding1()
        {
            MatchData m;

            // the k-coding of the pattern string is irrelevant:
            foreach (var pe in new[] { RubyEncoding.Binary })
            {
                var p = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, (byte)'{', (byte)'2', (byte)'}' }, pe);

                var r  = new RubyRegex(p, RubyRegexOptions.NONE);
                var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

                // the k-coding of the string is irrelevant:
                foreach (var se in new[] { RubyEncoding.Binary })
                {
                    var s = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0 }, se);
                    var t = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0, 0x82, 0xa0, 0xa0, 0xff }, se);
                    var u = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0x82, 0xa0, 0x82, 0xa0 }, se);

                    // /あ{2}/ does not match "あ\xa0"
                    m = r.Match(RubyEncoding.KCodeSJIS, s);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "[ \x82\xa0\xa0 ] \x82\xa0\xa0\xff"
                    m = r.Match(null, s);
                    Assert(m != null && m.Index == 0);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" starting from byte #1:
                    m = r.Match(null, t, 1, false);
                    Assert(m != null && m.Index == 3 && m.Length == 3);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(null, s);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, s);
                    Assert(m == null);

                    // /あ{2}/s matches "ああ\xff", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, u, 2, false);
                    Assert(m != null && m.Index == 2 && m.Length == 4);


                    // /あ{2}/ does not match "あ\xa0あ\xa0"
                    m = r.LastMatch(RubyEncoding.KCodeSJIS, t);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff"
                    m = r.LastMatch(null, t);
                    Assert(m != null && m.Index == 3);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(null, t);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(RubyEncoding.KCodeUTF8, t);
                    Assert(m == null);
                }
            }
        }
Exemplo n.º 4
0
        public void RegexEncoding1() {
            MatchData m;
            // the k-coding of the pattern string is irrelevant:
            foreach (var pe in new[] { RubyEncoding.KCodeSJIS, RubyEncoding.KCodeUTF8, RubyEncoding.Binary }) {
                var p = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, (byte)'{', (byte)'2', (byte)'}' }, pe);

                var r = new RubyRegex(p, RubyRegexOptions.NONE);
                var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

                // the k-coding of the string is irrelevant:
                foreach (var se in new[] { RubyEncoding.KCodeSJIS, RubyEncoding.KCodeUTF8, RubyEncoding.Binary }) {
                    var s = MutableString.CreateBinary(new byte[] { 0x82, 0xa0,  0xa0 }, se);
                    var t = MutableString.CreateBinary(new byte[] { 0x82, 0xa0,  0xa0,  0x82, 0xa0,  0xa0, 0xff }, se);
                    var u = MutableString.CreateBinary(new byte[] { 0x82, 0xa0,  0x82, 0xa0,  0x82, 0xa0 }, se);

                    // /あ{2}/ does not match "あ\xa0"
                    m = r.Match(RubyEncoding.KCodeSJIS, s);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "[ \x82\xa0\xa0 ] \x82\xa0\xa0\xff"
                    m = r.Match(null, s);
                    Assert(m != null && m.Index == 0);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" starting from byte #1:
                    m = r.Match(null, t, 1, false);
                    Assert(m != null && m.Index == 3 && m.Length == 3);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(null, s);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, s);
                    Assert(m == null);

                    // /あ{2}/s matches "ああ\xff", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, u, 2, false);
                    Assert(m != null && m.Index == 2 && m.Length == 4);


                    // /あ{2}/ does not match "あ\xa0あ\xa0"
                    m = r.LastMatch(RubyEncoding.KCodeSJIS, t);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff"
                    m = r.LastMatch(null, t);
                    Assert(m != null && m.Index == 3);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(null, t);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(RubyEncoding.KCodeUTF8, t);
                    Assert(m == null);
                }
            }
        }