/// <summary> /// For debugging purposes; format the given text in the form /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start and limit, /// and the || indicate the start and limit. /// </summary> /// public static StringBuilder FormatInput(StringBuilder appendTo, ReplaceableString input, Transliterator.Position pos) { if (0 <= pos.contextStart && pos.contextStart <= pos.start && pos.start <= pos.limit && pos.limit <= pos.contextLimit && pos.contextLimit <= input.Length()) { String b, c, d; // a = input.substring(0, pos.contextStart); b = input.Substring(pos.contextStart, pos.start); c = input.Substring(pos.start, pos.limit); d = input.Substring(pos.limit, pos.contextLimit); // e = input.substring(pos.contextLimit, input.length()); appendTo.// append(a). Append('{').Append(b).Append('|').Append(c).Append('|').Append(d) .Append('}') // .append(e) ; } else { appendTo.Append("INVALID Position {cs=" + pos.contextStart + ", s=" + pos.start + ", l=" + pos.limit + ", cl=" + pos.contextLimit + "} on " + input); } return(appendTo); }
/// <summary> /// For debugging purposes; format the given text in the form /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start /// and limit, and the || indicate the start and limit. /// </summary> /// <param name="appendTo"></param> /// <param name="input"></param> /// <param name="pos"></param> /// <returns></returns> public static StringBuffer FormatInput(StringBuffer appendTo, ReplaceableString input, Transliterator.Position pos) { if (0 <= pos.ContextStart && pos.ContextStart <= pos.Start && pos.Start <= pos.Limit && pos.Limit <= pos.ContextLimit && pos.ContextLimit <= input.Length) { string b, c, d; //a = input.substring(0, pos.contextStart); b = input.Substring(pos.ContextStart, pos.Start - pos.ContextStart); // ICU4N: Corrected 2nd parameter c = input.Substring(pos.Start, pos.Limit - pos.Start); // ICU4N: Corrected 2nd parameter d = input.Substring(pos.Limit, pos.ContextLimit - pos.Limit); // ICU4N: Corrected 2nd parameter //e = input.substring(pos.contextLimit, input.length()); appendTo. //Append(a). Append('{').Append(b). Append('|').Append(c).Append('|').Append(d). Append('}') //.Append(e) ; } else { appendTo.Append("INVALID Position {cs=" + pos.ContextStart + ", s=" + pos.Start + ", l=" + pos.Limit + ", cl=" + pos.ContextLimit + "} on " + input); } return(appendTo); }
public void CheckIncrementalAux(Transliterator t, String input) { IReplaceable test = new ReplaceableString(input); Transliterator.Position pos = new Transliterator.Position(0, test.Length, 0, test.Length); t.Transliterate(test, pos); bool gotError = false; // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? if (pos.Start == 0 && pos.Limit != 0 && !t.ID.Equals("Hex-Any/Unicode")) { Errln("No Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); gotError = true; } else { Logln("PASS Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); } t.FinishTransliteration(test, pos); if (pos.Start != pos.Limit) { Errln("Incomplete, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); gotError = true; } if (!gotError) { //Errln("FAIL: Did not get expected error"); } }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { int start = offsets.start; int limit = offsets.limit; if (start >= limit) { return; } int overallDelta = 0; // Walk through the string looking for safe characters. // Whenever you hit one normalize from the start of the last // safe character up to just before the next safe character // Also, if you hit the end and we are not in incremental mode, // do to end. // TODO: fix for surrogates // TODO: add QuickCheck, so we rarely convert OK stuff int lastSafe = start; // go back to start in any event int cp; for (int i = start + 1; i < limit; i += IBM.ICU.Text.UTF16.GetCharCount(cp)) { cp = text.Char32At(i); if (IBM.ICU.Lang.UCharacter.GetCombiningClass(cp) == 0 && !unsafeStart.Contains(cp)) { int delta = Convert(text, lastSafe, i, null); i += delta; limit += delta; overallDelta += delta; lastSafe = i; } } if (!isIncremental) { int delta_0 = Convert(text, lastSafe, limit, null); overallDelta += delta_0; lastSafe = limit + delta_0; } else { // We are incremental, so accept the last characters IF they turn // into skippables int delta_1 = Convert(text, lastSafe, limit, skippable); if (delta_1 != Int32.MinValue) { overallDelta += delta_1; lastSafe = limit + delta_1; } } offsets.contextLimit += overallDelta; offsets.limit += overallDelta; offsets.start = lastSafe; }
/// <summary> /// For debugging purposes; format the given text in the form /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start /// and limit, and the || indicate the start and limit. /// </summary> public static string FormatInput(ReplaceableString input, Transliterator.Position pos) { StringBuffer appendTo = new StringBuffer(); FormatInput(appendTo, input, pos); return(Utility.Escape(appendTo.ToString())); }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index, bool incremental) { // Our caller (filteredTransliterate) has already narrowed us // to an unfiltered run. Delete it. text.Replace(index.start, index.limit, ""); int len = index.limit - index.start; index.contextLimit -= len; index.limit -= len; }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool isIncremental) { int allStart = pos.start; int allLimit = pos.limit; AnyTransliterator.ScriptRunIterator it = new AnyTransliterator.ScriptRunIterator(text, pos.contextStart, pos.contextLimit); while (it.Next()) { // Ignore runs in the ante context if (it.limit <= allStart) { continue; } // Try to instantiate transliterator from it.scriptCode to // our target or target/variant Transliterator t = GetTransliterator(it.scriptCode); if (t == null) { // We have no transliterator. Do nothing, but keep // pos.start up to date. pos.start = it.limit; continue; } // If the run end is before the transliteration limit, do // a non-incremental transliteration. Otherwise do an // incremental one. bool incremental = isIncremental && (it.limit >= allLimit); pos.start = Math.Max(allStart, it.start); pos.limit = Math.Min(allLimit, it.limit); int limit = pos.limit; t.FilteredTransliterate(text, pos, incremental); int delta = pos.limit - limit; allLimit += delta; it.AdjustLimit(delta); // We're done if we enter the post context if (it.limit >= allLimit) { break; } } // Restore limit. pos.start is fine where the last transliterator // left it, or at the end of the last run. pos.limit = allLimit; }
/// <summary> /// Transliterate the given text with the given UTransPosition indices. /// Return TRUE if the transliteration should continue or FALSE if it should /// halt (because of a U_PARTIAL_MATCH match). Note that FALSE is only ever /// returned if isIncremental is TRUE. /// </summary> /// /// <param name="text">the text to be transliterated</param> /// <param name="pos">the position indices, which will be updated</param> /// <param name="incremental">if TRUE, assume new text may be inserted at index.limit, andreturn FALSE if thre is a partial match.</param> /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, indicating that /// transliteration should stop until more text arrives.</returns> public bool Transliterate(Replaceable text, Transliterator.Position pos, bool incremental) { int indexByte = text.Char32At(pos.start) & 0xFF; for (int i = index[indexByte]; i < index[indexByte + 1]; ++i) { int m = rules[i].MatchAndReplace(text, pos, incremental); switch (m) { case IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH: if (IBM.ICU.Text.Transliterator.DEBUG) { System.Console.Out.WriteLine(((incremental) ? "Rule.i: match " : "Rule: match ") + rules[i].ToRule(true) + " => " + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos)); } return(true); case IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH: if (IBM.ICU.Text.Transliterator.DEBUG) { System.Console.Out .WriteLine(((incremental) ? "Rule.i: partial match " : "Rule: partial match ") + rules[i].ToRule(true) + " => " + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos)); } return(false); default: if (IBM.ICU.Text.Transliterator.DEBUG) { System.Console.Out.WriteLine("Rule: no match " + rules[i]); } break; } } // No match or partial match from any rule pos.start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(pos.start)); if (IBM.ICU.Text.Transliterator.DEBUG) { System.Console.Out.WriteLine(((incremental) ? "Rule.i: no match => " : "Rule: no match => ") + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos)); } return(true); }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool incremental) { int start = pos.start; int limit = pos.limit; StringBuilder buf = new StringBuilder(prefix); int prefixLen = prefix.Length; bool redoPrefix = false; while (start < limit) { int c = (grokSupplementals) ? (int)(text.Char32At(start)) : (int)(text.CharAt(start)); int charLen = (grokSupplementals) ? IBM.ICU.Text.UTF16.GetCharCount(c) : 1; if ((c & -65536) != 0 && supplementalHandler != null) { buf.Length = 0; buf.Append(supplementalHandler.prefix); IBM.ICU.Impl.Utility.AppendNumber(buf, c, supplementalHandler.radix, supplementalHandler.minDigits); buf.Append(supplementalHandler.suffix); redoPrefix = true; } else { if (redoPrefix) { buf.Length = 0; buf.Append(prefix); redoPrefix = false; } else { buf.Length = prefixLen; } IBM.ICU.Impl.Utility.AppendNumber(buf, c, radix, minDigits); buf.Append(suffix); } text.Replace(start, start + charLen, buf.ToString()); start += buf.Length; limit += buf.Length - charLen; } pos.contextLimit += limit - pos.limit; pos.limit = limit; pos.start = start; }
/// <summary> /// Transliterate the given text with the given UTransPosition /// indices. Return TRUE if the transliteration should continue /// or FALSE if it should halt (because of a U_PARTIAL_MATCH match). /// Note that FALSE is only ever returned if isIncremental is TRUE. /// </summary> /// <param name="text">The text to be transliterated.</param> /// <param name="pos">The position indices, which will be updated.</param> /// <param name="incremental">If TRUE, assume new text may be inserted /// at index.Limit, and return FALSE if thre is a partial match.</param> /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, /// indicating that transliteration should stop until more text /// arrives.</returns> public virtual bool Transliterate(IReplaceable text, Transliterator.Position pos, bool incremental) { int indexByte = text.Char32At(pos.Start) & 0xFF; for (int i = index[indexByte]; i < index[indexByte + 1]; ++i) { MatchDegree m = rules[i].MatchAndReplace(text, pos, incremental); switch (m) { case MatchDegree.Match: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: match " : "Rule: match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(true); case MatchDegree.PartialMatch: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: partial match " : "Rule: partial match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(false); default: if (Transliterator.DEBUG) { Console.Out.WriteLine("Rule: no match " + rules[i]); } break; } } // No match or partial match from any rule pos.Start += UTF16.GetCharCount(text.Char32At(pos.Start)); if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: no match => " : "Rule: no match => ") + UtilityExtensions.FormatInput(text, pos)); } return(true); }
private void expect(Transliterator t, String source, String expectedResult) { String result = t.Transliterate(source); expectAux(t.ID + ":String", source, result, expectedResult); ReplaceableString rsource = new ReplaceableString(source); t.Transliterate(rsource); result = rsource.ToString(); expectAux(t.ID + ":Replaceable", source, result, expectedResult); // Test keyboard (incremental) transliteration -- this result // must be the same after we finalize (see below). rsource.Replace(0, rsource.Length, ""); Transliterator.Position index = new Transliterator.Position(); StringBuffer log = new StringBuffer(); for (int i = 0; i < source.Length; ++i) { if (i != 0) { log.Append(" + "); } log.Append(source[i]).Append(" -> "); t.Transliterate(rsource, index, source[i] + ""); // Append the string buffer with a vertical bar '|' where // the committed index is. String s = rsource.ToString(); log.Append(s.Substring(0, index.Start)). // ICU4N: Checked 2nd parameter Append('|'). Append(s.Substring(index.Start)); } // As a final step in keyboard transliteration, we must call // transliterate to finish off any pending partial matches that // were waiting for more input. t.FinishTransliteration(rsource, index); result = rsource.ToString(); log.Append(" => ").Append(rsource.ToString()); expectAux(t.ID + ":Keyboard", log.ToString(), result.Equals(expectedResult), expectedResult); }
protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index, bool incremental) { /* * We keep start and limit fixed the entire time, relative to the text * -- limit may move numerically if text is inserted or removed. The * cursor moves from start to limit, with replacements happening under * it. * * Example: rules 1. ab>x|y 2. yc>z * * |eabcd start - no match, advance cursor e|abcd match rule 1 - change * text & adjust cursor ex|ycd match rule 2 - change text & adjust * cursor exz|d no match, advance cursor exzd| done */ /* * A rule like a>b|a creates an infinite loop. To prevent that, we put * an arbitrary limit on the number of iterations that we take, one that * is high enough that any reasonable rules are ok, but low enough to * prevent a server from hanging. The limit is 16 times the number of * characters n, unless n is so large that 16n exceeds a uint32_t. */ int loopCount = 0; int loopLimit = (index.limit - index.start) << 4; if (loopLimit < 0) { loopLimit = 0x7FFFFFFF; } while (index.start < index.limit && loopCount <= loopLimit && data.ruleSet.Transliterate(text, index, incremental)) { ++loopCount; } }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { int cursor = offsets.start; int limit = offsets.limit; StringBuilder str = new StringBuilder(); str.Append(OPEN_DELIM); int len; String name; while (cursor < limit) { int c = text.Char32At(cursor); if ((name = IBM.ICU.Lang.UCharacter.GetExtendedName(c)) != null) { str.Length = OPEN_DELIM_LEN; str.Append(name).Append(CLOSE_DELIM); int clen = IBM.ICU.Text.UTF16.GetCharCount(c); text.Replace(cursor, cursor + clen, str.ToString()); len = str.Length; cursor += len; // advance cursor by 1 and adjust for new text limit += len - clen; // change in length } else { ++cursor; } } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; offsets.start = cursor; }
public void TestTransliteratorErrors() { String trans = "Latin-Greek"; String bogusID = "LATINGREEK-GREEKLATIN"; String newID = "Bogus-Latin"; String newIDRules = "zzz > Z; f <> ph"; String bogusRules = "a } [b-g m-p "; ReplaceableString testString = new ReplaceableString("A quick fox jumped over the lazy dog."); String insertString = "cats and dogs"; int stoppedAt = 0, len; Transliterator.Position pos = new Transliterator.Position(); Transliterator t = Transliterator.GetInstance(trans, Transliterator.FORWARD); if (t == null) { Errln("FAIL: construction of Latin-Greek"); return; } len = testString.Length; stoppedAt = t.Transliterate(testString, 0, 100); if (stoppedAt != -1) { Errln("FAIL: Out of bounds check failed (1)."); } else if (testString.Length != len) { testString = new ReplaceableString("A quick fox jumped over the lazy dog."); Errln("FAIL: Transliterate fails and the target string was modified."); } stoppedAt = t.Transliterate(testString, 100, testString.Length - 1); if (stoppedAt != -1) { Errln("FAIL: Out of bounds check failed (2)."); } else if (testString.Length != len) { testString = new ReplaceableString("A quick fox jumped over the lazy dog."); Errln("FAIL: Transliterate fails and the target string was modified."); } pos.Start = 100; pos.Limit = testString.Length; try { t.Transliterate(testString, pos); Errln("FAIL: Start offset is out of bounds, error not reported."); } catch (ArgumentException e) { Logln("Start offset is out of bounds and detected."); } pos.Limit = 100; pos.Start = 0; try { t.Transliterate(testString, pos); Errln("FAIL: Limit offset is out of bounds, error not reported.\n"); } catch (ArgumentException e) { Logln("Start offset is out of bounds and detected."); } len = pos.ContextLimit = testString.Length; pos.ContextStart = 0; pos.Limit = len - 1; pos.Start = 5; try { t.Transliterate(testString, pos, insertString); if (len == pos.Limit) { Errln("FAIL: Test insertion with string: the transliteration position limit didn't change as expected."); } } catch (ArgumentException e) { Errln("Insertion test with string failed for some reason."); } pos.ContextStart = 0; pos.ContextLimit = testString.Length; pos.Limit = testString.Length - 1; pos.Start = 5; try { t.Transliterate(testString, pos, 0x0061); if (len == pos.Limit) { Errln("FAIL: Test insertion with character: the transliteration position limit didn't change as expected."); } } catch (ArgumentException e) { Errln("FAIL: Insertion test with UTF-16 code point failed for some reason."); } len = pos.Limit = testString.Length; pos.ContextStart = 0; pos.ContextLimit = testString.Length - 1; pos.Start = 5; try { t.Transliterate(testString, pos, insertString); Errln("FAIL: Out of bounds check failed (3)."); if (testString.Length != len) { Errln("FAIL: The input string was modified though the offsets were out of bounds."); } } catch (ArgumentException e) { Logln("Insertion test with out of bounds indexes."); } Transliterator t1 = null; try { t1 = Transliterator.GetInstance(bogusID, Transliterator.FORWARD); if (t1 != null) { Errln("FAIL: construction of bogus ID \"LATINGREEK-GREEKLATIN\""); } } catch (ArgumentException e) { } //try { // unneeded - Exception cannot be thrown Transliterator t2 = Transliterator.CreateFromRules( newID, newIDRules, Transliterator.FORWARD); try { Transliterator t3 = t2.GetInverse(); Errln("FAIL: The newID transliterator was not registered so createInverse should fail."); if (t3 != null) { Errln("FAIL: The newID transliterator was not registered so createInverse should fail."); } } catch (Exception e) { } //} catch (Exception e) { } try { Transliterator t4 = Transliterator.CreateFromRules( newID, bogusRules, Transliterator.FORWARD); if (t4 != null) { Errln("FAIL: The rules is malformed but error was not reported."); } } catch (Exception e) { } }
/// <exclude/> /// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool incremental) { offsets.start = offsets.limit; }
public void TestTransliterate() { Logln("Testing the handleTransliterate() API of CompoundTransliterator"); Transliterator ct1 = null; try { ct1 = Transliterator.GetInstance("Any-Hex;Hex-Any"); } catch (ArgumentException iae) { Errln("FAIL: construction using CompoundTransliterator(String ID) failed for " + "Any-Hex;Hex-Any"); throw iae; } String s = "abcabc"; expect(ct1, s, s); Transliterator.Position index = new Transliterator.Position(); ReplaceableString rsource2 = new ReplaceableString(s); String expectedResult = s; ct1.Transliterate(rsource2, index); ct1.FinishTransliteration(rsource2, index); String result = rsource2.ToString(); expectAux(ct1.ID + ":ReplaceableString, index(0,0,0,0)", s + "->" + rsource2, result.Equals(expectedResult), expectedResult); Transliterator.Position index2 = new Transliterator.Position(1, 3, 2, 3); ReplaceableString rsource3 = new ReplaceableString(s); ct1.Transliterate(rsource3, index2); ct1.FinishTransliteration(rsource3, index2); result = rsource3.ToString(); expectAux(ct1.ID + ":String, index2(1,2,2,3)", s + "->" + rsource3, result.Equals(expectedResult), expectedResult); String[] Data = { //ID, input string, transliterated string "Any-Hex;Hex-Any;Any-Hex", "hello", "\\u0068\\u0065\\u006C\\u006C\\u006F", "Any-Hex;Hex-Any", "hello! How are you?", "hello! How are you?", "Devanagari-Latin;Latin-Devanagari", "\u092D\u0948'\u0930'\u0935", "\u092D\u0948\u0930\u0935", // quotes lost "Latin-Cyrillic;Cyrillic-Latin", "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h", "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h", "Latin-Greek;Greek-Latin", "ABGabgAKLMN", "ABGabgAKLMN", //"Latin-Arabic;Arabic-Latin", "Ad'r'a'b'i'k'dh'dd'gh", "Adrabikdhddgh", "Hiragana-Katakana", "\u3041\u308f\u3099\u306e\u304b\u3092\u3099", "\u30A1\u30f7\u30ce\u30ab\u30fa", "Hiragana-Katakana;Katakana-Hiragana", "\u3041\u308f\u3099\u306e\u304b\u3051", "\u3041\u308f\u3099\u306e\u304b\u3051", "Katakana-Hiragana;Hiragana-Katakana", "\u30A1\u30f7\u30ce\u30f5\u30f6", "\u30A1\u30f7\u30ce\u30ab\u30b1", "Latin-Katakana;Katakana-Latin", "vavivuvevohuzizuzoninunasesuzezu", "vavivuvevohuzizuzoninunasesuzezu", }; Transliterator ct2 = null; for (int i = 0; i < Data.Length; i += 3) { try { ct2 = Transliterator.GetInstance(Data[i + 0]); } catch (ArgumentException iae2) { Errln("FAIL: CompoundTransliterator construction failed for " + Data[i + 0]); throw iae2; } expect(ct2, Data[i + 1], Data[i + 2]); } }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { int maxLen = IBM.ICU.Impl.UCharacterName.GetInstance().GetMaxCharNameLength() + 1; // allow // for // temporary // trailing // space StringBuilder name = new StringBuilder(maxLen); // Get the legal character set UnicodeSet legal = new UnicodeSet(); IBM.ICU.Impl.UCharacterName.GetInstance().GetCharNameCharacters(legal); int cursor = offsets.start; int limit = offsets.limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos int c; while (cursor < limit) { c = text.Char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) // quick check first { openPos = cursor; int i = IBM.ICU.Impl.Utility.ParsePattern(OPEN_PAT, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.Length = 0; cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (IBM.ICU.Impl.UCharacterProperty.IsRuleWhiteSpace(c)) { // Ignore leading whitespace if (name.Length > 0 && name[name.Length - 1] != SPACE) { name.Append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.Length > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int len = name.Length; // Delete trailing space, if any if (len > 0 && name[len - 1] == SPACE) { name.Length = --len; } c = IBM.ICU.Lang.UCharacter.GetCharFromExtendedName(name.ToString()); if (c != -1) { // Lookup succeeded // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM String str = IBM.ICU.Text.UTF16.ValueOf(c); text.Replace(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int delta = cursor - openPos - str.Length; cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } if (legal.Contains(c)) { IBM.ICU.Text.UTF16.Append(name, c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.Length >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += IBM.ICU.Text.UTF16.GetCharCount(c); } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; }
/// <summary> /// Convenience method. /// </summary> public static StringBuffer FormatInput(StringBuffer appendTo, IReplaceable input, Transliterator.Position pos) { return(FormatInput(appendTo, (ReplaceableString)input, pos)); }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { if (csp == null) { return; } if (offsets.start >= offsets.limit) { return; } iter.SetText(text); result.Length = 0; int c, delta; // Walk through original string // If there is a case change, modify corresponding position in // replaceable iter.SetIndex(offsets.start); iter.SetLimit(offsets.limit); iter.SetContextLimits(offsets.contextStart, offsets.contextLimit); while ((c = iter.NextCaseMapCP()) >= 0) { c = csp.ToFullUpper(c, iter, result, locale, locCache); if (iter.DidReachLimit() && isIncremental) { // the case mapping function tried to look beyond the context // limit // wait for more input offsets.start = iter.GetCaseMapCPStart(); return; } /* decode the result */ if (c < 0) { /* c mapped to itself, no change */ continue; } else if (c <= IBM.ICU.Impl.UCaseProps.MAX_STRING_LENGTH) { /* replace by the mapping string */ delta = iter.Replace(result.ToString()); result.Length = 0; } else { /* replace by single-code point mapping */ delta = iter.Replace(IBM.ICU.Text.UTF16.ValueOf(c)); } if (delta != 0) { offsets.limit += delta; offsets.contextLimit += delta; } } offsets.start = offsets.limit; }
/** * Attempt a match and replacement at the given position. Return * the degree of match between this rule and the given text. The * degree of match may be mismatch, a partial match, or a full * match. A mismatch means at least one character of the text * does not match the context or key. A partial match means some * context and key characters match, but the text is not long * enough to match all of them. A full match means all context * and key characters match. * * If a full match is obtained, perform a replacement, update pos, * and return U_MATCH. Otherwise both text and pos are unchanged. * * @param text the text * @param pos the position indices * @param incremental if TRUE, test for partial matches that may * be completed by additional text inserted at pos.limit. * @return one of <code>U_MISMATCH</code>, * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>. If * incremental is FALSE then U_PARTIAL_MATCH will not be returned. */ public virtual MatchDegree MatchAndReplace(IReplaceable text, Transliterator.Position pos, bool incremental) { // Matching and replacing are done in one method because the // replacement operation needs information obtained during the // match. Another way to do this is to have the match method // create a match result struct with relevant offsets, and to pass // this into the replace method. // ============================ MATCH =========================== // Reset segment match data if (segments != null) { for (int i = 0; i < segments.Length; ++i) { ((StringMatcher)segments[i]).ResetMatch(); } } int keyLimit; int[] intRef = new int[1]; // ------------------------ Ante Context ------------------------ // A mismatch in the ante context, or with the start anchor, // is an outright U_MISMATCH regardless of whether we are // incremental or not. int oText; // offset into 'text' int minOText; // Note (1): We process text in 16-bit code units, rather than // 32-bit code points. This works because stand-ins are // always in the BMP and because we are doing a literal match // operation, which can be done 16-bits at a time. int anteLimit = PosBefore(text, pos.ContextStart); MatchDegree match; // Start reverse match at char before pos.start intRef[0] = PosBefore(text, pos.Start); if (anteContext != null) { match = anteContext.Matches(text, intRef, anteLimit, false); if (match != MatchDegree.Match) { return(MatchDegree.Mismatch); } } oText = intRef[0]; minOText = PosAfter(text, oText); // ------------------------ Start Anchor ------------------------ if (((flags & ANCHOR_START) != 0) && oText != anteLimit) { return(MatchDegree.Mismatch); } // -------------------- Key and Post Context -------------------- intRef[0] = pos.Start; if (key != null) { match = key.Matches(text, intRef, pos.Limit, incremental); if (match != MatchDegree.Match) { return(match); } } keyLimit = intRef[0]; if (postContext != null) { if (incremental && keyLimit == pos.Limit) { // The key matches just before pos.limit, and there is // a postContext. Since we are in incremental mode, // we must assume more characters may be inserted at // pos.limit -- this is a partial match. return(MatchDegree.PartialMatch); } match = postContext.Matches(text, intRef, pos.ContextLimit, incremental); if (match != MatchDegree.Match) { return(match); } } oText = intRef[0]; // ------------------------- Stop Anchor ------------------------ if (((flags & ANCHOR_END)) != 0) { if (oText != pos.ContextLimit) { return(MatchDegree.Mismatch); } if (incremental) { return(MatchDegree.PartialMatch); } } // =========================== REPLACE ========================== // We have a full match. The key is between pos.start and // keyLimit. int newLength = output.Replace(text, pos.Start, keyLimit, intRef); int lenDelta = newLength - (keyLimit - pos.Start); int newStart = intRef[0]; oText += lenDelta; pos.Limit += lenDelta; pos.ContextLimit += lenDelta; // Restrict new value of start to [minOText, min(oText, pos.limit)]. pos.Start = Math.Max(minOText, Math.Min(Math.Min(oText, pos.Limit), newStart)); return(MatchDegree.Match); }
/// <exclude/> /// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index, bool incremental) { /* * Call each transliterator with the same start value and initial cursor * index, but with the limit index as modified by preceding * transliterators. The cursor index must be reset for each * transliterator to give each a chance to transliterate the text. The * initial cursor index is known to still point to the same place after * each transliterator is called because each transliterator will not * change the text between start and the initial value of cursor. * * IMPORTANT: After the first transliterator, each subsequent * transliterator only gets to transliterate text committed by preceding * transliterators; that is, the cursor (output value) of transliterator * i becomes the limit (input value) of transliterator i+1. Finally, the * overall limit is fixed up before we return. * * Assumptions we make here: (1) contextStart <= start <= limit <= * contextLimit <= text.length() (2) start <= start' <= limit' ;cursor * doesn't move back (3) start <= limit' ;text before cursor unchanged - * start' is the value of start after calling handleKT - limit' is the * value of limit after calling handleKT */ /** * Example: 3 transliterators. This example illustrates the mechanics we * need to implement. C, S, and L are the contextStart, start, and * limit. gl is the globalLimit. contextLimit is equal to limit * throughout. * * 1. h-u, changes hex to Unicode * * 4 7 a d 0 4 7 a abc/u0061/u => abca/u C S L C S L gl=f->a * * 2. upup, changes "x" to "XX" * * 4 7 a 4 7 a abca/u => abcAA/u C SL C S L gl=a->b 3. u-h, changes * Unicode to hex * * 4 7 a 4 7 a d 0 3 abcAA/u => abc/u0041/u0041/u C S L C S L gl=b->15 * 4. return * * 4 7 a d 0 3 abc/u0041/u0041/u C S L */ if (trans.Length < 1) { index.start = index.limit; return; // Short circuit for empty compound transliterators } // compoundLimit is the limit value for the entire compound // operation. We overwrite index.limit with the previous // index.start. After each transliteration, we update // compoundLimit for insertions or deletions that have happened. int compoundLimit = index.limit; // compoundStart is the start for the entire compound // operation. int compoundStart = index.start; int delta = 0; // delta in length StringBuilder log = null; if (IBM.ICU.Text.Transliterator.DEBUG) { log = new StringBuilder("CompoundTransliterator{" + GetID() + ((incremental) ? "}i: IN=" : "}: IN=")); IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index); System.Console.Out.WriteLine(IBM.ICU.Impl.Utility.Escape(log.ToString())); } // Give each transliterator a crack at the run of characters. // See comments at the top of the method for more detail. for (int i = 0; i < trans.Length; ++i) { index.start = compoundStart; // Reset start int limit = index.limit; if (index.start == index.limit) { // Short circuit for empty range if (IBM.ICU.Text.Transliterator.DEBUG) { System.Console.Out.WriteLine("CompoundTransliterator[" + i + ".." + (trans.Length - 1) + ((incremental) ? "]i: " : "]: ") + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, index) + " (NOTHING TO DO)"); } break; } if (IBM.ICU.Text.Transliterator.DEBUG) { log.Length = 0; log.Append("CompoundTransliterator[" + i + "=" + trans[i].GetID() + ((incremental) ? "]i: " : "]: ")); IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index); } trans[i].FilteredTransliterate(text, index, incremental); // In a properly written transliterator, start == limit after // handleTransliterate() returns when incremental is false. // Catch cases where the subclass doesn't do this, and throw // an exception. (Just pinning start to limit is a bad idea, // because what's probably happening is that the subclass // isn't transliterating all the way to the end, and it should // in non-incremental mode.) if (!incremental && index.start != index.limit) { throw new Exception( "ERROR: Incomplete non-incremental transliteration by " + trans[i].GetID()); } if (IBM.ICU.Text.Transliterator.DEBUG) { log.Append(" => "); IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index); System.Console.Out.WriteLine(IBM.ICU.Impl.Utility.Escape(log.ToString())); } // Cumulative delta for insertions/deletions delta += index.limit - limit; if (incremental) { // In the incremental case, only allow subsequent // transliterators to modify what has already been // completely processed by prior transliterators. In the // non-incrmental case, allow each transliterator to // process the entire text. index.limit = index.start; } } compoundLimit += delta; // Start is good where it is -- where the last transliterator left // it. Limit needs to be put back where it was, modulo // adjustments for deletions/insertions. index.limit = compoundLimit; if (IBM.ICU.Text.Transliterator.DEBUG) { log.Length = 0; log.Append("CompoundTransliterator{" + GetID() + ((incremental) ? "}i: OUT=" : "}: OUT=")); IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index); System.Console.Out.WriteLine(IBM.ICU.Impl.Utility.Escape(log.ToString())); } }
protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool incremental) { boundaryCount = 0; int boundary = 0; GetBreakIterator(); // Lazy-create it if necessary bi.SetText(new BreakTransliterator.ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start)); // TODO: fix clumsy workaround used below. /* * char[] tempBuffer = new char[text.length()]; text.getChars(0, * text.length(), tempBuffer, 0); bi.setText(new * StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, * pos.start)); */ // end debugging // To make things much easier, we will stack the boundaries, and then // insert at the end. // generally, we won't need too many, since we will be filtered. for (boundary = bi.First(); boundary != IBM.ICU.Text.BreakIterator.DONE && boundary < pos.limit; boundary = bi.Next()) { if (boundary == 0) { continue; } // HACK: Check to see that preceeding item was a letter int cp = IBM.ICU.Text.UTF16.CharAt(text, boundary - 1); int type = IBM.ICU.Lang.UCharacter.GetType(cp); // System.out.println(Integer.toString(cp,16) + " (before): " + // type); if (((1 << type) & LETTER_OR_MARK_MASK) == 0) { continue; } cp = IBM.ICU.Text.UTF16.CharAt(text, boundary); type = IBM.ICU.Lang.UCharacter.GetType(cp); // System.out.println(Integer.toString(cp,16) + " (after): " + // type); if (((1 << type) & LETTER_OR_MARK_MASK) == 0) { continue; } if (boundaryCount >= boundaries.Length) // realloc if necessary { int[] temp = new int[boundaries.Length * 2]; System.Array.Copy((Array)(boundaries), 0, (Array)(temp), 0, boundaries.Length); boundaries = temp; } boundaries[boundaryCount++] = boundary; // System.out.println(boundary); } int delta = 0; int lastBoundary = 0; if (boundaryCount != 0) // if we found something, adjust { delta = boundaryCount * insertion.Length; lastBoundary = boundaries[boundaryCount - 1]; // we do this from the end backwards, so that we don't have to keep // updating. while (boundaryCount > 0) { boundary = boundaries[--boundaryCount]; text.Replace(boundary, boundary, insertion); } } // Now fix up the return values pos.contextLimit += delta; pos.limit += delta; pos.start = (incremental) ? lastBoundary + delta : pos.limit; }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { // TODO reimplement, see ustrcase.c // using a real word break iterator // instead of just looking for a transition between cased and uncased // characters // call CaseMapTransliterator::handleTransliterate() for lowercasing? // (set fMap) // needs to take isIncremental into account because case mappings are // context-sensitive // also detect when lowercasing function did not finish because of // context if (offsets.start >= offsets.limit) { return; } // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 // case-ignorable int type; // Our mode; we are either converting letter toTitle or // toLower. bool doTitle = true; // Determine if there is a preceding context of cased case-ignorable*, // in which case we want to start in toLower mode. If the // prior context is anything else (including empty) then start // in toTitle mode. int c, start; for (start = offsets.start - 1; start >= offsets.contextStart; start -= IBM.ICU.Text.UTF16 .GetCharCount(c)) { c = text.Char32At(start); type = csp.GetTypeOrIgnorable(c); if (type > 0) // cased { doTitle = false; break; } else if (type == 0) // uncased but not ignorable { break; } // else (type<0) case-ignorable: continue } // Convert things after a cased character toLower; things // after a uncased, non-case-ignorable character toTitle. Case-ignorable // characters are copied directly and do not change the mode. iter.SetText(text); iter.SetIndex(offsets.start); iter.SetLimit(offsets.limit); iter.SetContextLimits(offsets.contextStart, offsets.contextLimit); result.Length = 0; // Walk through original string // If there is a case change, modify corresponding position in // replaceable int delta; while ((c = iter.NextCaseMapCP()) >= 0) { type = csp.GetTypeOrIgnorable(c); if (type >= 0) // not case-ignorable { if (doTitle) { c = csp.ToFullTitle(c, iter, result, locale, locCache); } else { c = csp.ToFullLower(c, iter, result, locale, locCache); } doTitle = type == 0; // doTitle=isUncased if (iter.DidReachLimit() && isIncremental) { // the case mapping function tried to look beyond the // context limit // wait for more input offsets.start = iter.GetCaseMapCPStart(); return; } /* decode the result */ if (c < 0) { /* c mapped to itself, no change */ continue; } else if (c <= IBM.ICU.Impl.UCaseProps.MAX_STRING_LENGTH) { /* replace by the mapping string */ delta = iter.Replace(result.ToString()); result.Length = 0; } else { /* replace by single-code point mapping */ delta = iter.Replace(IBM.ICU.Text.UTF16.ValueOf(c)); } if (delta != 0) { offsets.limit += delta; offsets.contextLimit += delta; } } } offsets.start = offsets.limit; }
/// <summary> /// Convenience method. /// </summary> public static string FormatInput(IReplaceable input, Transliterator.Position pos) { return(FormatInput((ReplaceableString)input, pos)); }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool isIncremental) { int start = pos.start; int limit = pos.limit; int i, j, ipat; loop : { while (start < limit) { // Loop over the forms in spec[]. Exit this loop when we // match one of the specs. Exit the outer loop if a // partial match is detected and isIncremental is true. for (j = 0, ipat = 0; spec[ipat] != END; ++j) { // Read the header int prefixLen = spec[ipat++]; int suffixLen = spec[ipat++]; int radix = spec[ipat++]; int minDigits = spec[ipat++]; int maxDigits = spec[ipat++]; // s is a copy of start that is advanced over the // characters as we parse them. int s = start; bool match = true; for (i = 0; i < prefixLen; ++i) { if (s >= limit) { if (i > 0) { // We've already matched a character. This is // a partial match, so we return if in // incremental mode. In non-incremental mode, // go to the next spec. if (isIncremental) { goto gotoloop; } match = false; break; } } char c = text.CharAt(s++); if (c != spec[ipat + i]) { match = false; break; } } if (match) { int u = 0; int digitCount = 0; for (;;) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto gotoloop; } break; } int ch = text.Char32At(s); int digit = IBM.ICU.Lang.UCharacter.Digit(ch, radix); if (digit < 0) { break; } s += IBM.ICU.Text.UTF16.GetCharCount(ch); u = (u * radix) + digit; if (++digitCount == maxDigits) { break; } } match = (digitCount >= minDigits); if (match) { for (i = 0; i < suffixLen; ++i) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto gotoloop; } match = false; break; } char c_0 = text.CharAt(s++); if (c_0 != spec[ipat + prefixLen + i]) { match = false; break; } } if (match) { // At this point, we have a match String str = IBM.ICU.Text.UTF16.ValueOf(u); text.Replace(start, s, str); limit -= s - start - str.Length; // The following break statement leaves the // loop that is traversing the forms in // spec[]. We then parse the next input // character. break; } } } ipat += prefixLen + suffixLen; } if (start < limit) { start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start)); } } } gotoloop: ; pos.contextLimit += limit - pos.limit; pos.limit = limit; pos.start = start; }