int FilterExtender (int i, ExtenderType ext, COpt opt) { if (ext == ExtenderType.Conditional && Uni.HasSpecialWeight ((char) i)) { bool half = IsHalfKana ((char) i, opt); bool katakana = !Uni.IsHiragana ((char) i); switch (Level1 (i) & 7) { case 2: return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042; case 3: return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044; case 4: return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046; case 5: return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048; case 6: return half ? 0xFF75 : katakana ? 0x30AA : 0x304A; } } return i; }
void FillSortKeyRaw (int i, ExtenderType ext, SortKeyBuffer buf, CompareOptions opt) { if (0x3400 <= i && i <= 0x4DB5) { int diff = i - 0x3400; buf.AppendCJKExtension ( (byte) (0x10 + diff / 254), (byte) (diff % 254 + 2)); return; } UnicodeCategory uc = char.GetUnicodeCategory ((char) i); switch (uc) { case UnicodeCategory.PrivateUse: int diff = i - 0xE000; buf.AppendNormal ( (byte) (0xE5 + diff / 254), (byte) (diff % 254 + 2), 0, 0); return; case UnicodeCategory.Surrogate: FillSurrogateSortKeyRaw (i, buf); return; } byte level2 = Level2 (i, ext); if (Uni.HasSpecialWeight ((char) i)) { byte level1 = Level1 (i); buf.AppendKana ( Category (i), level1, level2, Uni.Level3 (i), Uni.IsJapaneseSmallLetter ((char) i), ToDashTypeValue (ext, opt), !Uni.IsHiragana ((char) i), IsHalfKana ((char) i, opt) ); if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced) // Append voice weight buf.AppendNormal (1, 1, 1, 0); } else buf.AppendNormal ( Category (i), Level1 (i), level2, Uni.Level3 (i)); }
static byte ToDashTypeValue (ExtenderType ext, COpt opt) { if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why? return 3; switch (ext) { case ExtenderType.None: return 3; case ExtenderType.Conditional: return 5; default: return 4; } }
unsafe byte Level2 (int cp, ExtenderType ext) { if (ext == ExtenderType.Buggy) return 5; else if (ext == ExtenderType.Conditional) return 0; if (cp < 0x3000 || cjkLv2Table == null) return Uni.Level2 (cp); int idx = cjkLv2Indexer.ToIndex (cp); byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx]; if (ret != 0) return ret; ret = Uni.Level2 (cp); if (level2Maps.Length == 0) return ret; for (int i = 0; i < level2Maps.Length; i++) { if (level2Maps [i].Source == ret) return level2Maps [i].Replace; else if (level2Maps [i].Source > ret) break; } return ret; }
unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx) { COpt opt = ctx.Option; byte* charSortKey = ctx.Buffer1; bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0; int cur = idx; int si = -1; // To handle extenders in source, we need to // check next _primary_ character. if (ext != ExtenderType.None) { byte diacritical = 0; for (int tmp = idx; ; tmp--) { if (tmp < 0) // heading extender return false; if (IsIgnorable (s [tmp], opt)) continue; int tmpi = FilterOptions (s [tmp], opt); byte category = Category (tmpi); if (category == 1) { diacritical = Level2 (tmpi, ExtenderType.None); continue; } si = FilterExtender (tmpi, ext, opt); charSortKey [0] = category; charSortKey [1] = Level1 (si); if (!ignoreNonSpace) charSortKey [2] = Level2 (si, ext); charSortKey [3] = Uni.Level3 (si); if (ext != ExtenderType.Conditional && diacritical != 0) charSortKey [2] = (charSortKey [2] == 0) ? (byte) (diacritical + 2) : diacritical; break; } idx--; } if (ext == ExtenderType.None) ct = GetTailContraction (s, idx, end); // if lv4 exists, it never matches contraction if (ct != null) { idx -= ct.Source.Length; if (!noLv4) return false; if (ct.SortKey != null) { for (int i = 0; i < 4; i++) charSortKey [i] = sortkey [i]; ctx.PrevCode = -1; ctx.PrevSortKey = charSortKey; } else { // Here is the core of LAMESPEC // described at the top of the source. int dummy = ct.Replacement.Length - 1; return 0 <= LastIndexOfSortKey ( ct.Replacement, dummy, dummy, ct.Replacement.Length, sortkey, ti, noLv4, ref ctx); } } else if (ext == ExtenderType.None) { if (si < 0) si = FilterOptions (s [idx], opt); idx--; bool noMatch = false; charSortKey [0] = Category (si); if (charSortKey [0] == sortkey [0]) charSortKey [1] = Level1 (si); else noMatch = true; if (!ignoreNonSpace && charSortKey [1] == sortkey [1]) charSortKey [2] = Level2 (si, ext); else if (!ignoreNonSpace) noMatch = true; if (noMatch) return false; charSortKey [3] = Uni.Level3 (si); if (charSortKey [0] != 1) ctx.PrevCode = si; } if (ext == ExtenderType.None) { for (int tmp = cur + 1; tmp < orgStart; tmp++) { if (Category (s [tmp]) != 1) break; if (ignoreNonSpace) continue; if (charSortKey [2] == 0) charSortKey [2] = 2; charSortKey [2] = (byte) (charSortKey [2] + Level2 (s [tmp], ExtenderType.None)); } } return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4); }
unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4) { bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0; if (source [0] != target [0] || source [1] != target [1] || (!ignoreNonSpace && source [2] != target [2]) || source [3] != target [3]) return false; if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si))) return true; else if (noLv4) return false; // Since target can never be an extender, if the source // is an expander and it matters, then they never match. if (!ignoreNonSpace && ext == ExtenderType.Conditional) return false; if (Uni.IsJapaneseSmallLetter ((char) si) != Uni.IsJapaneseSmallLetter ((char) ti) || ToDashTypeValue (ext, opt) != // FIXME: we will have to specify correct value for target ToDashTypeValue (ExtenderType.None, opt) || !Uni.IsHiragana ((char) si) != !Uni.IsHiragana ((char) ti) || IsHalfKana ((char) si, opt) != IsHalfKana ((char) ti, opt)) return false; return true; }
unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx) { COpt opt = ctx.Option; byte* charSortKey = ctx.Buffer1; bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0; int si = -1; if (ext == ExtenderType.None) ct = GetContraction (s, idx, end); else if (ctx.PrevCode < 0) { if (ctx.PrevSortKey == null) { idx++; return false; } charSortKey = ctx.PrevSortKey; } else si = FilterExtender (ctx.PrevCode, ext, opt); // if lv4 exists, it never matches contraction if (ct != null) { idx += ct.Source.Length; if (!noLv4) return false; if (ct.SortKey != null) { for (int i = 0; i < 4; i++) charSortKey [i] = sortkey [i]; ctx.PrevCode = -1; ctx.PrevSortKey = charSortKey; } else { // Here is the core of LAMESPEC // described at the top of the source. int dummy = 0; return MatchesForward (ct.Replacement, ref dummy, ct.Replacement.Length, ti, sortkey, noLv4, ref ctx); } } else { if (si < 0) si = FilterOptions (s [idx], opt); idx++; charSortKey [0] = Category (si); bool noMatch = false; if (sortkey [0] == charSortKey [0]) charSortKey [1] = Level1 (si); else noMatch = true; if (!ignoreNonSpace && sortkey [1] == charSortKey [1]) charSortKey [2] = Level2 (si, ext); else if (!ignoreNonSpace) noMatch = true; if (noMatch) { for (; idx < end; idx++) { if (Category (s [idx]) != 1) break; } return false; } charSortKey [3] = Uni.Level3 (si); if (charSortKey [0] != 1) ctx.PrevCode = si; } for (; idx < end; idx++) { if (Category (s [idx]) != 1) break; if (ignoreNonSpace) continue; if (charSortKey [2] == 0) charSortKey [2] = 2; charSortKey [2] = (byte) (charSortKey [2] + Level2 (s [idx], ExtenderType.None)); } return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4); }