예제 #1
0
        private bool Match(RubyRegex /*!*/ pattern, bool currentPositionOnly, bool advancePosition)
        {
            // TODO: repeated calls on the same ScanString can be optimized:
            MatchData match = pattern.Match(null, _scanString, _currentPosition, false);

            _lastMatch          = null;
            _lastMatchingGroups = null;
            _foundPosition      = 0;
            if (match == null)
            {
                return(false);
            }
            if (currentPositionOnly && match.Index != _currentPosition)
            {
                return(false);
            }
            int length = (match.Index - _currentPosition) + match.Length;

            _foundPosition      = match.Index;
            _previousPosition   = _currentPosition;
            _lastMatch          = _scanString.GetSlice(_foundPosition, match.Length);
            _lastMatchingGroups = match;
            if (advancePosition)
            {
                _currentPosition += length;
            }
            return(true);
        }
예제 #2
0
        private bool Match(RubyRegex /*!*/ pattern, bool currentPositionOnly, bool advancePosition)
        {
            Match match = pattern.Match(_scanString, _currentPosition);

            _lastMatch          = null;
            _lastMatchingGroups = null;
            _foundPosition      = 0;
            if (!match.Success)
            {
                return(false);
            }
            if (currentPositionOnly && match.Index != _currentPosition)
            {
                return(false);
            }
            int length = (match.Index - _currentPosition) + match.Length;

            _foundPosition      = match.Index;
            _previousPosition   = _currentPosition;
            _lastMatch          = _scanString.GetSlice(_foundPosition, match.Length);
            _lastMatchingGroups = match.Groups;
            if (advancePosition)
            {
                _currentPosition += length;
            }
            return(true);
        }
예제 #3
0
 private bool Match(RubyRegex/*!*/ pattern, bool currentPositionOnly, bool advancePosition)
 {
     // TODO: repeated calls on the same ScanString can be optimized:
     MatchData match = pattern.Match(_scanString, _currentPosition, false);
     _lastMatch = null;
     _lastMatchingGroups = null;
     _foundPosition = 0;
     if (match == null) {
         return false;
     }
     if (currentPositionOnly && match.Index != _currentPosition) {
         return false;
     }
     int length = (match.Index - _currentPosition) + match.Length;
     _foundPosition = match.Index;
     _previousPosition = _currentPosition;
     _lastMatch = _scanString.GetSlice(_foundPosition, match.Length);
     _lastMatchingGroups = match;
     if (advancePosition) {
         _currentPosition += length;
     }
     return true;
 }
예제 #4
0
        public void RegexEncoding2()
        {
            var SJIS = RubyEncoding.KCodeSJIS.StrictEncoding;

            // 1.9 encodings:
            var invalidUtf8 = MutableString.CreateBinary(new byte[] { 0x80 }, RubyEncoding.UTF8);

            AssertExceptionThrown <ArgumentException>(() => new RubyRegex(invalidUtf8, RubyRegexOptions.NONE));

            // LastMatch

            MatchData m;
            var       u = MutableString.CreateBinary(SJIS.GetBytes("あああ"), RubyEncoding.KCodeSJIS);
            var       p = MutableString.CreateBinary(SJIS.GetBytes("あ{2}"), RubyEncoding.KCodeSJIS);

            var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

            // /あ{2}/ matches "あああ", the resulting index is in bytes:
            m = rs.LastMatch(null, u);
            Assert(m != null && m.Index == 2);

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("あ")), RubyRegexOptions.SJIS);

            // "start at" in the middle of a character:
            m = rs.LastMatch(null, u, 0);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 1);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 2);
            Assert(m != null && m.Index == 2);

            m = rs.LastMatch(null, u, 3);
            Assert(m != null && m.Index == 2);

            // Split

            u  = MutableString.CreateBinary(SJIS.GetBytes("あちあちあ"), RubyEncoding.UTF8);
            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち")), RubyRegexOptions.SJIS);
            var parts = rs.Split(null, u);

            Assert(parts.Length == 3);
            foreach (var part in parts)
            {
                Assert(part.Encoding == RubyEncoding.KCodeSJIS);
                Assert(part.ToString() == "あ");
            }

            // groups

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち(a(あ+)(b+))+あ")), RubyRegexOptions.SJIS);
            u  = MutableString.CreateBinary(SJIS.GetBytes("ちaああbaあbbbあ"));

            m = rs.Match(null, u);
            Assert(m.GroupCount == 4);

            int s, l;

            Assert(m.GetGroupStart(0) == (s = 0));
            Assert(m.GetGroupLength(0) == (l = u.GetByteCount()));
            Assert(m.GetGroupEnd(0) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(1) == (s = SJIS.GetByteCount("ちaああb")));
            Assert(m.GetGroupLength(1) == (l = SJIS.GetByteCount("aあbbb")));
            Assert(m.GetGroupEnd(1) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(2) == (s = SJIS.GetByteCount("ちaああba")));
            Assert(m.GetGroupLength(2) == (l = SJIS.GetByteCount("あ")));
            Assert(m.GetGroupEnd(2) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(3) == (s = SJIS.GetByteCount("ちaああbaあ")));
            Assert(m.GetGroupLength(3) == (l = SJIS.GetByteCount("bbb")));
            Assert(m.GetGroupEnd(3) == s + l);
        }
예제 #5
0
        public void RegexEncoding1()
        {
            MatchData m;

            // the k-coding of the pattern string is irrelevant:
            foreach (var pe in new[] { RubyEncoding.Binary })
            {
                var p = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, (byte)'{', (byte)'2', (byte)'}' }, pe);

                var r  = new RubyRegex(p, RubyRegexOptions.NONE);
                var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

                // the k-coding of the string is irrelevant:
                foreach (var se in new[] { RubyEncoding.Binary })
                {
                    var s = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0 }, se);
                    var t = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0, 0x82, 0xa0, 0xa0, 0xff }, se);
                    var u = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0x82, 0xa0, 0x82, 0xa0 }, se);

                    // /あ{2}/ does not match "あ\xa0"
                    m = r.Match(RubyEncoding.KCodeSJIS, s);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "[ \x82\xa0\xa0 ] \x82\xa0\xa0\xff"
                    m = r.Match(null, s);
                    Assert(m != null && m.Index == 0);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" starting from byte #1:
                    m = r.Match(null, t, 1, false);
                    Assert(m != null && m.Index == 3 && m.Length == 3);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(null, s);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, s);
                    Assert(m == null);

                    // /あ{2}/s matches "ああ\xff", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, u, 2, false);
                    Assert(m != null && m.Index == 2 && m.Length == 4);


                    // /あ{2}/ does not match "あ\xa0あ\xa0"
                    m = r.LastMatch(RubyEncoding.KCodeSJIS, t);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff"
                    m = r.LastMatch(null, t);
                    Assert(m != null && m.Index == 3);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(null, t);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(RubyEncoding.KCodeUTF8, t);
                    Assert(m == null);
                }
            }
        }
예제 #6
0
 private bool Match(RubyRegex/*!*/ pattern, bool currentPositionOnly, bool advancePosition) {
     Match match = pattern.Match(_scanString, _currentPosition);
     _lastMatch = null;
     _lastMatchingGroups = null;
     _foundPosition = 0;
     if (!match.Success) {
         return false;
     }
     if (currentPositionOnly && match.Index != _currentPosition) {
         return false;
     }
     int length = (match.Index - _currentPosition) + match.Length;
     _foundPosition = match.Index;
     _previousPosition = _currentPosition;
     _lastMatch = _scanString.GetSlice(_foundPosition, match.Length);
     _lastMatchingGroups = match.Groups;
     if (advancePosition) {
         _currentPosition += length;
     }
     return true;
 }
예제 #7
0
        public void RegexEncoding2() {
            var SJIS = RubyEncoding.KCodeSJIS.StrictEncoding;

            // 1.9 encodings:
            var invalidUtf8 = MutableString.CreateBinary(new byte[] { 0x80 }, RubyEncoding.UTF8);
            AssertExceptionThrown<ArgumentException>(() => new RubyRegex(invalidUtf8, RubyRegexOptions.NONE));

            // LastMatch

            MatchData m;
            var u = MutableString.CreateBinary(SJIS.GetBytes("あああ"), RubyEncoding.KCodeSJIS);
            var p = MutableString.CreateBinary(SJIS.GetBytes("あ{2}"), RubyEncoding.KCodeSJIS);

            var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

            // /あ{2}/ matches "あああ", the resulting index is in bytes:
            m = rs.LastMatch(null, u);
            Assert(m != null && m.Index == 2);

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("あ")), RubyRegexOptions.SJIS);

            // "start at" in the middle of a character:
            m = rs.LastMatch(null, u, 0);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 1);
            Assert(m != null && m.Index == 0);

            m = rs.LastMatch(null, u, 2);
            Assert(m != null && m.Index == 2);

            m = rs.LastMatch(null, u, 3);
            Assert(m != null && m.Index == 2);

            // Split
            
            u = MutableString.CreateBinary(SJIS.GetBytes("あちあちあ"), RubyEncoding.UTF8);
            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち")), RubyRegexOptions.SJIS);
            var parts = rs.Split(null, u);
            Assert(parts.Length == 3);
            foreach (var part in parts) {
                Assert(part.Encoding == RubyEncoding.KCodeSJIS);
                Assert(part.ToString() == "あ");
            }

            // groups

            rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち(a(あ+)(b+))+あ")), RubyRegexOptions.SJIS);
            u = MutableString.CreateBinary(SJIS.GetBytes("ちaああbaあbbbあ"));

            m = rs.Match(null, u);
            Assert(m.GroupCount == 4);

            int s, l;
            Assert(m.GetGroupStart(0) == (s = 0));
            Assert(m.GetGroupLength(0) == (l = u.GetByteCount()));
            Assert(m.GetGroupEnd(0) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(1) == (s = SJIS.GetByteCount("ちaああb")));
            Assert(m.GetGroupLength(1) == (l = SJIS.GetByteCount("aあbbb")));
            Assert(m.GetGroupEnd(1) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(2) == (s = SJIS.GetByteCount("ちaああba")));
            Assert(m.GetGroupLength(2) == (l = SJIS.GetByteCount("あ")));
            Assert(m.GetGroupEnd(2) == s + l);

            // the group has 2 captures, the last one is its value:
            Assert(m.GetGroupStart(3) == (s = SJIS.GetByteCount("ちaああbaあ")));
            Assert(m.GetGroupLength(3) == (l = SJIS.GetByteCount("bbb")));
            Assert(m.GetGroupEnd(3) == s + l);
        }
예제 #8
0
        public void RegexEncoding1() {
            MatchData m;
            // the k-coding of the pattern string is irrelevant:
            foreach (var pe in new[] { RubyEncoding.KCodeSJIS, RubyEncoding.KCodeUTF8, RubyEncoding.Binary }) {
                var p = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, (byte)'{', (byte)'2', (byte)'}' }, pe);

                var r = new RubyRegex(p, RubyRegexOptions.NONE);
                var rs = new RubyRegex(p, RubyRegexOptions.SJIS);

                // the k-coding of the string is irrelevant:
                foreach (var se in new[] { RubyEncoding.KCodeSJIS, RubyEncoding.KCodeUTF8, RubyEncoding.Binary }) {
                    var s = MutableString.CreateBinary(new byte[] { 0x82, 0xa0,  0xa0 }, se);
                    var t = MutableString.CreateBinary(new byte[] { 0x82, 0xa0,  0xa0,  0x82, 0xa0,  0xa0, 0xff }, se);
                    var u = MutableString.CreateBinary(new byte[] { 0x82, 0xa0,  0x82, 0xa0,  0x82, 0xa0 }, se);

                    // /あ{2}/ does not match "あ\xa0"
                    m = r.Match(RubyEncoding.KCodeSJIS, s);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "[ \x82\xa0\xa0 ] \x82\xa0\xa0\xff"
                    m = r.Match(null, s);
                    Assert(m != null && m.Index == 0);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" starting from byte #1:
                    m = r.Match(null, t, 1, false);
                    Assert(m != null && m.Index == 3 && m.Length == 3);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(null, s);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, s);
                    Assert(m == null);

                    // /あ{2}/s matches "ああ\xff", current KCODE is ignored
                    m = rs.Match(RubyEncoding.KCodeUTF8, u, 2, false);
                    Assert(m != null && m.Index == 2 && m.Length == 4);


                    // /あ{2}/ does not match "あ\xa0あ\xa0"
                    m = r.LastMatch(RubyEncoding.KCodeSJIS, t);
                    Assert(m == null);

                    // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff"
                    m = r.LastMatch(null, t);
                    Assert(m != null && m.Index == 3);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(null, t);
                    Assert(m == null);

                    // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored
                    m = rs.LastMatch(RubyEncoding.KCodeUTF8, t);
                    Assert(m == null);
                }
            }
        }