private bool Match(RubyRegex /*!*/ pattern, bool currentPositionOnly, bool advancePosition) { // TODO: repeated calls on the same ScanString can be optimized: MatchData match = pattern.Match(null, _scanString, _currentPosition, false); _lastMatch = null; _lastMatchingGroups = null; _foundPosition = 0; if (match == null) { return(false); } if (currentPositionOnly && match.Index != _currentPosition) { return(false); } int length = (match.Index - _currentPosition) + match.Length; _foundPosition = match.Index; _previousPosition = _currentPosition; _lastMatch = _scanString.GetSlice(_foundPosition, match.Length); _lastMatchingGroups = match; if (advancePosition) { _currentPosition += length; } return(true); }
private bool Match(RubyRegex /*!*/ pattern, bool currentPositionOnly, bool advancePosition) { Match match = pattern.Match(_scanString, _currentPosition); _lastMatch = null; _lastMatchingGroups = null; _foundPosition = 0; if (!match.Success) { return(false); } if (currentPositionOnly && match.Index != _currentPosition) { return(false); } int length = (match.Index - _currentPosition) + match.Length; _foundPosition = match.Index; _previousPosition = _currentPosition; _lastMatch = _scanString.GetSlice(_foundPosition, match.Length); _lastMatchingGroups = match.Groups; if (advancePosition) { _currentPosition += length; } return(true); }
private bool Match(RubyRegex/*!*/ pattern, bool currentPositionOnly, bool advancePosition) { // TODO: repeated calls on the same ScanString can be optimized: MatchData match = pattern.Match(_scanString, _currentPosition, false); _lastMatch = null; _lastMatchingGroups = null; _foundPosition = 0; if (match == null) { return false; } if (currentPositionOnly && match.Index != _currentPosition) { return false; } int length = (match.Index - _currentPosition) + match.Length; _foundPosition = match.Index; _previousPosition = _currentPosition; _lastMatch = _scanString.GetSlice(_foundPosition, match.Length); _lastMatchingGroups = match; if (advancePosition) { _currentPosition += length; } return true; }
public void RegexEncoding2() { var SJIS = RubyEncoding.KCodeSJIS.StrictEncoding; // 1.9 encodings: var invalidUtf8 = MutableString.CreateBinary(new byte[] { 0x80 }, RubyEncoding.UTF8); AssertExceptionThrown <ArgumentException>(() => new RubyRegex(invalidUtf8, RubyRegexOptions.NONE)); // LastMatch MatchData m; var u = MutableString.CreateBinary(SJIS.GetBytes("あああ"), RubyEncoding.KCodeSJIS); var p = MutableString.CreateBinary(SJIS.GetBytes("あ{2}"), RubyEncoding.KCodeSJIS); var rs = new RubyRegex(p, RubyRegexOptions.SJIS); // /あ{2}/ matches "あああ", the resulting index is in bytes: m = rs.LastMatch(null, u); Assert(m != null && m.Index == 2); rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("あ")), RubyRegexOptions.SJIS); // "start at" in the middle of a character: m = rs.LastMatch(null, u, 0); Assert(m != null && m.Index == 0); m = rs.LastMatch(null, u, 1); Assert(m != null && m.Index == 0); m = rs.LastMatch(null, u, 2); Assert(m != null && m.Index == 2); m = rs.LastMatch(null, u, 3); Assert(m != null && m.Index == 2); // Split u = MutableString.CreateBinary(SJIS.GetBytes("あちあちあ"), RubyEncoding.UTF8); rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち")), RubyRegexOptions.SJIS); var parts = rs.Split(null, u); Assert(parts.Length == 3); foreach (var part in parts) { Assert(part.Encoding == RubyEncoding.KCodeSJIS); Assert(part.ToString() == "あ"); } // groups rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち(a(あ+)(b+))+あ")), RubyRegexOptions.SJIS); u = MutableString.CreateBinary(SJIS.GetBytes("ちaああbaあbbbあ")); m = rs.Match(null, u); Assert(m.GroupCount == 4); int s, l; Assert(m.GetGroupStart(0) == (s = 0)); Assert(m.GetGroupLength(0) == (l = u.GetByteCount())); Assert(m.GetGroupEnd(0) == s + l); // the group has 2 captures, the last one is its value: Assert(m.GetGroupStart(1) == (s = SJIS.GetByteCount("ちaああb"))); Assert(m.GetGroupLength(1) == (l = SJIS.GetByteCount("aあbbb"))); Assert(m.GetGroupEnd(1) == s + l); // the group has 2 captures, the last one is its value: Assert(m.GetGroupStart(2) == (s = SJIS.GetByteCount("ちaああba"))); Assert(m.GetGroupLength(2) == (l = SJIS.GetByteCount("あ"))); Assert(m.GetGroupEnd(2) == s + l); // the group has 2 captures, the last one is its value: Assert(m.GetGroupStart(3) == (s = SJIS.GetByteCount("ちaああbaあ"))); Assert(m.GetGroupLength(3) == (l = SJIS.GetByteCount("bbb"))); Assert(m.GetGroupEnd(3) == s + l); }
public void RegexEncoding1() { MatchData m; // the k-coding of the pattern string is irrelevant: foreach (var pe in new[] { RubyEncoding.Binary }) { var p = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, (byte)'{', (byte)'2', (byte)'}' }, pe); var r = new RubyRegex(p, RubyRegexOptions.NONE); var rs = new RubyRegex(p, RubyRegexOptions.SJIS); // the k-coding of the string is irrelevant: foreach (var se in new[] { RubyEncoding.Binary }) { var s = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0 }, se); var t = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0, 0x82, 0xa0, 0xa0, 0xff }, se); var u = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0x82, 0xa0, 0x82, 0xa0 }, se); // /あ{2}/ does not match "あ\xa0" m = r.Match(RubyEncoding.KCodeSJIS, s); Assert(m == null); // /\x82\xa0{2}/ matches "[ \x82\xa0\xa0 ] \x82\xa0\xa0\xff" m = r.Match(null, s); Assert(m != null && m.Index == 0); // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" starting from byte #1: m = r.Match(null, t, 1, false); Assert(m != null && m.Index == 3 && m.Length == 3); // /あ{2}/s does not match "あ\xa0", current KCODE is ignored m = rs.Match(null, s); Assert(m == null); // /あ{2}/s does not match "あ\xa0", current KCODE is ignored m = rs.Match(RubyEncoding.KCodeUTF8, s); Assert(m == null); // /あ{2}/s matches "ああ\xff", current KCODE is ignored m = rs.Match(RubyEncoding.KCodeUTF8, u, 2, false); Assert(m != null && m.Index == 2 && m.Length == 4); // /あ{2}/ does not match "あ\xa0あ\xa0" m = r.LastMatch(RubyEncoding.KCodeSJIS, t); Assert(m == null); // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" m = r.LastMatch(null, t); Assert(m != null && m.Index == 3); // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored m = rs.LastMatch(null, t); Assert(m == null); // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored m = rs.LastMatch(RubyEncoding.KCodeUTF8, t); Assert(m == null); } } }
private bool Match(RubyRegex/*!*/ pattern, bool currentPositionOnly, bool advancePosition) { Match match = pattern.Match(_scanString, _currentPosition); _lastMatch = null; _lastMatchingGroups = null; _foundPosition = 0; if (!match.Success) { return false; } if (currentPositionOnly && match.Index != _currentPosition) { return false; } int length = (match.Index - _currentPosition) + match.Length; _foundPosition = match.Index; _previousPosition = _currentPosition; _lastMatch = _scanString.GetSlice(_foundPosition, match.Length); _lastMatchingGroups = match.Groups; if (advancePosition) { _currentPosition += length; } return true; }
public void RegexEncoding2() { var SJIS = RubyEncoding.KCodeSJIS.StrictEncoding; // 1.9 encodings: var invalidUtf8 = MutableString.CreateBinary(new byte[] { 0x80 }, RubyEncoding.UTF8); AssertExceptionThrown<ArgumentException>(() => new RubyRegex(invalidUtf8, RubyRegexOptions.NONE)); // LastMatch MatchData m; var u = MutableString.CreateBinary(SJIS.GetBytes("あああ"), RubyEncoding.KCodeSJIS); var p = MutableString.CreateBinary(SJIS.GetBytes("あ{2}"), RubyEncoding.KCodeSJIS); var rs = new RubyRegex(p, RubyRegexOptions.SJIS); // /あ{2}/ matches "あああ", the resulting index is in bytes: m = rs.LastMatch(null, u); Assert(m != null && m.Index == 2); rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("あ")), RubyRegexOptions.SJIS); // "start at" in the middle of a character: m = rs.LastMatch(null, u, 0); Assert(m != null && m.Index == 0); m = rs.LastMatch(null, u, 1); Assert(m != null && m.Index == 0); m = rs.LastMatch(null, u, 2); Assert(m != null && m.Index == 2); m = rs.LastMatch(null, u, 3); Assert(m != null && m.Index == 2); // Split u = MutableString.CreateBinary(SJIS.GetBytes("あちあちあ"), RubyEncoding.UTF8); rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち")), RubyRegexOptions.SJIS); var parts = rs.Split(null, u); Assert(parts.Length == 3); foreach (var part in parts) { Assert(part.Encoding == RubyEncoding.KCodeSJIS); Assert(part.ToString() == "あ"); } // groups rs = new RubyRegex(MutableString.CreateBinary(SJIS.GetBytes("ち(a(あ+)(b+))+あ")), RubyRegexOptions.SJIS); u = MutableString.CreateBinary(SJIS.GetBytes("ちaああbaあbbbあ")); m = rs.Match(null, u); Assert(m.GroupCount == 4); int s, l; Assert(m.GetGroupStart(0) == (s = 0)); Assert(m.GetGroupLength(0) == (l = u.GetByteCount())); Assert(m.GetGroupEnd(0) == s + l); // the group has 2 captures, the last one is its value: Assert(m.GetGroupStart(1) == (s = SJIS.GetByteCount("ちaああb"))); Assert(m.GetGroupLength(1) == (l = SJIS.GetByteCount("aあbbb"))); Assert(m.GetGroupEnd(1) == s + l); // the group has 2 captures, the last one is its value: Assert(m.GetGroupStart(2) == (s = SJIS.GetByteCount("ちaああba"))); Assert(m.GetGroupLength(2) == (l = SJIS.GetByteCount("あ"))); Assert(m.GetGroupEnd(2) == s + l); // the group has 2 captures, the last one is its value: Assert(m.GetGroupStart(3) == (s = SJIS.GetByteCount("ちaああbaあ"))); Assert(m.GetGroupLength(3) == (l = SJIS.GetByteCount("bbb"))); Assert(m.GetGroupEnd(3) == s + l); }
public void RegexEncoding1() { MatchData m; // the k-coding of the pattern string is irrelevant: foreach (var pe in new[] { RubyEncoding.KCodeSJIS, RubyEncoding.KCodeUTF8, RubyEncoding.Binary }) { var p = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, (byte)'{', (byte)'2', (byte)'}' }, pe); var r = new RubyRegex(p, RubyRegexOptions.NONE); var rs = new RubyRegex(p, RubyRegexOptions.SJIS); // the k-coding of the string is irrelevant: foreach (var se in new[] { RubyEncoding.KCodeSJIS, RubyEncoding.KCodeUTF8, RubyEncoding.Binary }) { var s = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0 }, se); var t = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0xa0, 0x82, 0xa0, 0xa0, 0xff }, se); var u = MutableString.CreateBinary(new byte[] { 0x82, 0xa0, 0x82, 0xa0, 0x82, 0xa0 }, se); // /あ{2}/ does not match "あ\xa0" m = r.Match(RubyEncoding.KCodeSJIS, s); Assert(m == null); // /\x82\xa0{2}/ matches "[ \x82\xa0\xa0 ] \x82\xa0\xa0\xff" m = r.Match(null, s); Assert(m != null && m.Index == 0); // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" starting from byte #1: m = r.Match(null, t, 1, false); Assert(m != null && m.Index == 3 && m.Length == 3); // /あ{2}/s does not match "あ\xa0", current KCODE is ignored m = rs.Match(null, s); Assert(m == null); // /あ{2}/s does not match "あ\xa0", current KCODE is ignored m = rs.Match(RubyEncoding.KCodeUTF8, s); Assert(m == null); // /あ{2}/s matches "ああ\xff", current KCODE is ignored m = rs.Match(RubyEncoding.KCodeUTF8, u, 2, false); Assert(m != null && m.Index == 2 && m.Length == 4); // /あ{2}/ does not match "あ\xa0あ\xa0" m = r.LastMatch(RubyEncoding.KCodeSJIS, t); Assert(m == null); // /\x82\xa0{2}/ matches "\x82\xa0\xa0 [ \x82\xa0\xa0 ] \xff" m = r.LastMatch(null, t); Assert(m != null && m.Index == 3); // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored m = rs.LastMatch(null, t); Assert(m == null); // /あ{2}/s does not match "あ\xa0あ\xa0", current KCODE is ignored m = rs.LastMatch(RubyEncoding.KCodeUTF8, t); Assert(m == null); } } }