// flags : parser flags private Frag rune(int[] runes, int flags) { Frag f = newInst(Inst.InstOp.RUNE); Inst i = prog.getInst(f.i); i.runes = runes; flags &= RE2.FOLD_CASE; // only relevant flag is FoldCase if (runes.Length != 1 || Unicode.simpleFold(runes[0]) == runes[0]) { flags &= ~RE2.FOLD_CASE; // and sometimes not even that } i.arg = flags; f.@out = f.i << 1; // Special cases for exec machine. if (((flags & RE2.FOLD_CASE) == 0 && runes.Length == 1) || (runes.Length == 2 && runes[0] == runes[1])) { i.op = Inst.InstOp.RUNE1; } else if (runes.Length == 2 && runes[0] == 0 && runes[1] == Unicode.MAX_RUNE) { i.op = Inst.InstOp.RUNE_ANY; } else if (runes.Length == 4 && runes[0] == 0 && runes[1] == '\n' - 1 && runes[2] == '\n' + 1 && runes[3] == Unicode.MAX_RUNE) { i.op = Inst.InstOp.RUNE_ANY_NOT_NL; } return(f); }
// appendFoldedRange() appends the range [lo-hi] and its case // folding-equivalent runes to this CharClass. public CharClass appendFoldedRange(int lo, int hi) { // Optimizations. if (lo <= Unicode.MIN_FOLD && hi >= Unicode.MAX_FOLD) { // Range is full: folding can't add more. return(appendRange(lo, hi)); } if (hi < Unicode.MIN_FOLD || lo > Unicode.MAX_FOLD) { // Range is outside folding possibilities. return(appendRange(lo, hi)); } if (lo < Unicode.MIN_FOLD) { // [lo, minFold-1] needs no folding. appendRange(lo, Unicode.MIN_FOLD - 1); lo = Unicode.MIN_FOLD; } if (hi > Unicode.MAX_FOLD) { // [maxFold+1, hi] needs no folding. appendRange(Unicode.MAX_FOLD + 1, hi); hi = Unicode.MAX_FOLD; } // Brute force. Depend on appendRange to coalesce ranges on the fly. for (int c = lo; c <= hi; c++) { appendRange(c, c); for (int f = Unicode.simpleFold(c); f != c; f = Unicode.simpleFold(f)) { appendRange(f, f); } } return(this); }
// Appends a RE2 literal to |out| for rune |rune|, // with regexp metacharacters escaped. public static void escapeRune(StringBuilder @out, int rune) { if (Unicode.isPrint(rune)) { if (METACHARACTERS.IndexOf((char)rune) >= 0) { @out.Append('\\'); } // Extremely painful Dot NET! // Convert UTF-32 character to a UTF-16 String. var strC = Char.ConvertFromUtf32(rune); @out.Append(strC); return; } switch (rune) { case '"': @out.Append("\\\""); break; case '\\': @out.Append("\\\\"); break; case '\t': @out.Append("\\t"); break; case '\n': @out.Append("\\n"); break; case '\r': @out.Append("\\r"); break; case '\b': @out.Append("\\b"); break; case '\f': @out.Append("\\f"); break; default: { String s = rune.ToString("X"); if (rune < 0x100) { @out.Append("\\x"); if (s.Length == 1) { @out.Append('0'); } @out.Append(s); } else { @out.Append("\\x{").Append(s).Append('}'); } break; } } }
// MatchRune returns true if the instruction matches (and consumes) r. // It should only be called when op == InstRune. public bool matchRune(int r) { // Special case: single-rune slice is from literal string, not char // class. if (runes.Length == 1) { int r0 = runes[0]; if (r == r0) { return(true); } if ((arg & RE2.FOLD_CASE) != 0) { for (int r1 = Unicode.simpleFold(r0); r1 != r0; r1 = Unicode.simpleFold(r1)) { if (r == r1) { return(true); } } } return(false); } // Peek at the first few pairs. // Should handle ASCII well. for (int j = 0; j < runes.Length && j <= 8; j += 2) { if (r < runes[j]) { return(false); } if (r <= runes[j + 1]) { return(true); } } // Otherwise binary search. for (int lo = 0, hi = runes.Length / 2; lo < hi;) { int m = lo + (hi - lo) / 2; int c = runes[2 * m]; if (c <= r) { if (r <= runes[2 * m + 1]) { return(true); } lo = m + 1; } else { hi = m; } } return(false); }