/// <summary> /// For debugging purposes; format the given text in the form /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start /// and limit, and the || indicate the start and limit. /// </summary> /// <param name="appendTo"></param> /// <param name="input"></param> /// <param name="pos"></param> /// <returns></returns> public static StringBuffer FormatInput(StringBuffer appendTo, ReplaceableString input, TransliterationPosition pos) { if (0 <= pos.ContextStart && pos.ContextStart <= pos.Start && pos.Start <= pos.Limit && pos.Limit <= pos.ContextLimit && pos.ContextLimit <= input.Length) { string b, c, d; //a = input.substring(0, pos.contextStart); b = input.Substring(pos.ContextStart, pos.Start - pos.ContextStart); // ICU4N: Corrected 2nd parameter c = input.Substring(pos.Start, pos.Limit - pos.Start); // ICU4N: Corrected 2nd parameter d = input.Substring(pos.Limit, pos.ContextLimit - pos.Limit); // ICU4N: Corrected 2nd parameter //e = input.substring(pos.contextLimit, input.length()); appendTo. //Append(a). Append('{').Append(b). Append('|').Append(c).Append('|').Append(d). Append('}') //.Append(e) ; } else { appendTo.Append("INVALID Position {cs=" + pos.ContextStart + ", s=" + pos.Start + ", l=" + pos.Limit + ", cl=" + pos.ContextLimit + "} on " + input); } return(appendTo); }
public void CheckIncrementalAux(Transliterator t, String input) { IReplaceable test = new ReplaceableString(input); TransliterationPosition pos = new TransliterationPosition(0, test.Length, 0, test.Length); t.Transliterate(test, pos); bool gotError = false; // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? if (pos.Start == 0 && pos.Limit != 0 && !t.ID.Equals("Hex-Any/Unicode")) { Errln("No Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); gotError = true; } else { Logln("PASS Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); } t.FinishTransliteration(test, pos); if (pos.Start != pos.Limit) { Errln("Incomplete, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); gotError = true; } if (!gotError) { //Errln("FAIL: Did not get expected error"); } }
/// <summary> /// For debugging purposes; format the given text in the form /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start /// and limit, and the || indicate the start and limit. /// </summary> public static string FormatInput(ReplaceableString input, TransliterationPosition pos) { StringBuffer appendTo = new StringBuffer(); FormatInput(appendTo, input, pos); return(Utility.Escape(appendTo.ToString())); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> /// <param name="text"></param> /// <param name="index"></param> /// <param name="incremental"></param> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition index, bool incremental) { // Our caller (filteredTransliterate) has already narrowed us // to an unfiltered run. Delete it. text.Replace(index.Start, index.Limit, ""); int len = index.Limit - index.Start; index.ContextLimit -= len; index.Limit -= len; }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition pos, bool isIncremental) { int allStart = pos.Start; int allLimit = pos.Limit; ScriptRunIterator it = new ScriptRunIterator(text, pos.ContextStart, pos.ContextLimit); while (it.Next()) { // Ignore runs in the ante context if (it.Limit <= allStart) { continue; } // Try to instantiate transliterator from it.scriptCode to // our target or target/variant Transliterator t = GetTransliterator(it.ScriptCode); if (t == null) { // We have no transliterator. Do nothing, but keep // pos.start up to date. pos.Start = it.Limit; continue; } // If the run end is before the transliteration limit, do // a non-incremental transliteration. Otherwise do an // incremental one. bool incremental = isIncremental && (it.Limit >= allLimit); pos.Start = Math.Max(allStart, it.Start); pos.Limit = Math.Min(allLimit, it.Limit); int limit = pos.Limit; t.FilteredTransliterate(text, pos, incremental); int delta = pos.Limit - limit; allLimit += delta; it.AdjustLimit(delta); // We're done if we enter the post context if (it.Limit >= allLimit) { break; } } // Restore limit. pos.start is fine where the last transliterator // left it, or at the end of the last run. pos.Limit = allLimit; }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition pos, bool incremental) { int start = pos.Start; int limit = pos.Limit; StringBuilder buf = new StringBuilder(prefix); int prefixLen = prefix.Length; bool redoPrefix = false; while (start < limit) { int c = grokSupplementals ? text.Char32At(start) : text[start]; int charLen = grokSupplementals ? UTF16.GetCharCount(c) : 1; if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) { buf.Length = 0; buf.Append(supplementalHandler.prefix); Utility.AppendNumber(buf, c, supplementalHandler.radix, supplementalHandler.minDigits); buf.Append(supplementalHandler.suffix); redoPrefix = true; } else { if (redoPrefix) { buf.Length = 0; buf.Append(prefix); redoPrefix = false; } else { buf.Length = prefixLen; } Utility.AppendNumber(buf, c, radix, minDigits); buf.Append(suffix); } text.Replace(start, charLen, buf.ToString()); // ICU4N: Corrected 2nd parameter start += buf.Length; limit += buf.Length - charLen; } pos.ContextLimit += limit - pos.Limit; pos.Limit = limit; pos.Start = start; }
/// <summary> /// Transliterate the given text with the given UTransPosition /// indices. Return TRUE if the transliteration should continue /// or FALSE if it should halt (because of a U_PARTIAL_MATCH match). /// Note that FALSE is only ever returned if isIncremental is TRUE. /// </summary> /// <param name="text">The text to be transliterated.</param> /// <param name="pos">The position indices, which will be updated.</param> /// <param name="incremental">If TRUE, assume new text may be inserted /// at index.Limit, and return FALSE if thre is a partial match.</param> /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, /// indicating that transliteration should stop until more text /// arrives.</returns> public virtual bool Transliterate(IReplaceable text, TransliterationPosition pos, bool incremental) { int indexByte = text.Char32At(pos.Start) & 0xFF; for (int i = index[indexByte]; i < index[indexByte + 1]; ++i) { MatchDegree m = rules[i].MatchAndReplace(text, pos, incremental); switch (m) { case MatchDegree.Match: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: match " : "Rule: match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(true); case MatchDegree.PartialMatch: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: partial match " : "Rule: partial match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(false); default: if (Transliterator.DEBUG) { Console.Out.WriteLine("Rule: no match " + rules[i]); } break; } } // No match or partial match from any rule pos.Start += UTF16.GetCharCount(text.Char32At(pos.Start)); if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: no match => " : "Rule: no match => ") + UtilityExtensions.FormatInput(text, pos)); } return(true); }
#pragma warning disable 809 protected override void HandleTransliterate(IReplaceable text, TransliterationPosition index, bool incremental) #pragma warning disable 809 { /* We keep start and limit fixed the entire time, * relative to the text -- limit may move numerically if text is * inserted or removed. The cursor moves from start to limit, with * replacements happening under it. * * Example: rules 1. ab>x|y * 2. yc>z * * |eabcd start - no match, advance cursor * e|abcd match rule 1 - change text & adjust cursor * ex|ycd match rule 2 - change text & adjust cursor * exz|d no match, advance cursor * exzd| done */ /* A rule like * a>b|a * creates an infinite loop. To prevent that, we put an arbitrary * limit on the number of iterations that we take, one that is * high enough that any reasonable rules are ok, but low enough to * prevent a server from hanging. The limit is 16 times the * number of characters n, unless n is so large that 16n exceeds a * uint32_t. */ lock (data) { int loopCount = 0; int loopLimit = (index.Limit - index.Start) << 4; if (loopLimit < 0) { loopLimit = 0x7FFFFFFF; } while (index.Start < index.Limit && loopCount <= loopLimit && data.RuleSet.Transliterate(text, index, incremental)) { ++loopCount; } } }
private void expect(Transliterator t, String source, String expectedResult) { String result = t.Transliterate(source); expectAux(t.ID + ":String", source, result, expectedResult); ReplaceableString rsource = new ReplaceableString(source); t.Transliterate(rsource); result = rsource.ToString(); expectAux(t.ID + ":Replaceable", source, result, expectedResult); // Test keyboard (incremental) transliteration -- this result // must be the same after we finalize (see below). rsource.Replace(0, rsource.Length, ""); TransliterationPosition index = new TransliterationPosition(); StringBuffer log = new StringBuffer(); for (int i = 0; i < source.Length; ++i) { if (i != 0) { log.Append(" + "); } log.Append(source[i]).Append(" -> "); t.Transliterate(rsource, index, source[i] + ""); // Append the string buffer with a vertical bar '|' where // the committed index is. String s = rsource.ToString(); log.Append(s.Substring(0, index.Start)). // ICU4N: Checked 2nd parameter Append('|'). Append(s.Substring(index.Start)); } // As a final step in keyboard transliteration, we must call // transliterate to finish off any pending partial matches that // were waiting for more input. t.FinishTransliteration(rsource, index); result = rsource.ToString(); log.Append(" => ").Append(rsource.ToString()); expectAux(t.ID + ":Keyboard", log.ToString(), result.Equals(expectedResult), expectedResult); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition offsets, bool isIncremental) { int cursor = offsets.Start; int limit = offsets.Limit; StringBuilder str = new StringBuilder(); str.Append(OPEN_DELIM); int len; string name; while (cursor < limit) { int c = text.Char32At(cursor); if ((name = UChar.GetExtendedName(c)) != null) { str.Length = OPEN_DELIM_LEN; str.Append(name).Append(CLOSE_DELIM); int clen = UTF16.GetCharCount(c); text.Replace(cursor, clen, str.ToString()); // ICU4N: Corrected 2nd parameter len = str.Length; cursor += len; // advance cursor by 1 and adjust for new text limit += len - clen; // change in length } else { ++cursor; } } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; offsets.Start = cursor; }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/> /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition offsets, bool isIncremental) { lock (syncLock) { if (csp == null) { return; } if (offsets.Start >= offsets.Limit) { return; } iter.SetText(text); result.Length = 0; int c, delta; // Walk through original string // If there is a case change, modify corresponding position in replaceable iter.SetIndex(offsets.Start); iter.SetLimit(offsets.Limit); iter.SetContextLimits(offsets.ContextStart, offsets.ContextLimit); while ((c = iter.NextCaseMapCP()) >= 0) { c = csp.ToFullFolding(c, result, 0); // toFullFolding(int c, StringBuffer out, int options) if (iter.DidReachLimit && isIncremental) { // the case mapping function tried to look beyond the context limit // wait for more input offsets.Start = iter.CaseMapCPStart; return; } /* decode the result */ if (c < 0) { /* c mapped to itself, no change */ continue; } else if (c <= UCaseProperties.MaxStringLength) { /* replace by the mapping string */ delta = iter.Replace(result.ToString()); result.Length = 0; } else { /* replace by single-code point mapping */ delta = iter.Replace(UTF16.ValueOf(c)); } if (delta != 0) { offsets.Limit += delta; offsets.ContextLimit += delta; } } offsets.Start = offsets.Limit; } }
protected override void HandleTransliterate(IReplaceable text, TransliterationPosition pos, bool incremental) { lock (this) { boundaryCount = 0; int boundary = 0; GetBreakIterator(); // Lazy-create it if necessary bi.SetText(new ReplaceableCharacterIterator(text, pos.Start, pos.Limit, pos.Start)); // TODO: fix clumsy workaround used below. /* * char[] tempBuffer = new char[text.length()]; * text.getChars(0, text.length(), tempBuffer, 0); * bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start)); */ // end debugging // To make things much easier, we will stack the boundaries, and then insert at the end. // generally, we won't need too many, since we will be filtered. for (boundary = bi.First(); boundary != BreakIterator.Done && boundary < pos.Limit; boundary = bi.Next()) { if (boundary == 0) { continue; } // HACK: Check to see that preceeding item was a letter int cp = UTF16.CharAt(text, boundary - 1); int type = UChar.GetUnicodeCategory(cp).ToInt32(); //System.out.println(Integer.toString(cp,16) + " (before): " + type); if (((1 << type) & LETTER_OR_MARK_MASK) == 0) { continue; } cp = UTF16.CharAt(text, boundary); type = UChar.GetUnicodeCategory(cp).ToInt32(); //System.out.println(Integer.toString(cp,16) + " (after): " + type); if (((1 << type) & LETTER_OR_MARK_MASK) == 0) { continue; } if (boundaryCount >= boundaries.Length) { // realloc if necessary int[] temp = new int[boundaries.Length * 2]; System.Array.Copy(boundaries, 0, temp, 0, boundaries.Length); boundaries = temp; } boundaries[boundaryCount++] = boundary; //System.out.println(boundary); } int delta = 0; int lastBoundary = 0; if (boundaryCount != 0) { // if we found something, adjust delta = boundaryCount * insertion.Length; lastBoundary = boundaries[boundaryCount - 1]; // we do this from the end backwards, so that we don't have to keep updating. while (boundaryCount > 0) { boundary = boundaries[--boundaryCount]; text.Replace(boundary, boundary, insertion); } } // Now fix up the return values pos.ContextLimit += delta; pos.Limit += delta; pos.Start = incremental ? lastBoundary + delta : pos.Limit; } }
// /** // * Returns the set of all characters that may be generated as // * replacement text by this transliterator. // */ // public UnicodeSet getTargetSet() { // UnicodeSet set = new UnicodeSet(); // for (int i=0; i<trans.length; ++i) { // // This is a heuristic, and not 100% reliable. // set.addAll(trans[i].getTargetSet()); // } // return set; // } /// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition index, bool incremental) { /* Call each transliterator with the same start value and * initial cursor index, but with the limit index as modified * by preceding transliterators. The cursor index must be * reset for each transliterator to give each a chance to * transliterate the text. The initial cursor index is known * to still point to the same place after each transliterator * is called because each transliterator will not change the * text between start and the initial value of cursor. * * IMPORTANT: After the first transliterator, each subsequent * transliterator only gets to transliterate text committed by * preceding transliterators; that is, the cursor (output * value) of transliterator i becomes the limit (input value) * of transliterator i+1. Finally, the overall limit is fixed * up before we return. * * Assumptions we make here: * (1) contextStart <= start <= limit <= contextLimit <= text.length() * (2) start <= start' <= limit' ;cursor doesn't move back * (3) start <= limit' ;text before cursor unchanged * - start' is the value of start after calling handleKT * - limit' is the value of limit after calling handleKT */ /** * Example: 3 transliterators. This example illustrates the * mechanics we need to implement. C, S, and L are the contextStart, * start, and limit. gl is the globalLimit. contextLimit is * equal to limit throughout. * * 1. h-u, changes hex to Unicode * * 4 7 a d 0 4 7 a * abc/u0061/u => abca/u * C S L C S L gl=f->a * * 2. upup, changes "x" to "XX" * * 4 7 a 4 7 a * abca/u => abcAA/u * C SL C S * L gl=a->b * 3. u-h, changes Unicode to hex * * 4 7 a 4 7 a d 0 3 * abcAA/u => abc/u0041/u0041/u * C S L C S * L gl=b->15 * 4. return * * 4 7 a d 0 3 * abc/u0041/u0041/u * C S L */ if (trans.Length < 1) { index.Start = index.Limit; return; // Short circuit for empty compound transliterators } // compoundLimit is the limit value for the entire compound // operation. We overwrite index.limit with the previous // index.start. After each transliteration, we update // compoundLimit for insertions or deletions that have happened. int compoundLimit = index.Limit; // compoundStart is the start for the entire compound // operation. int compoundStart = index.Start; int delta = 0; // delta in length StringBuffer log = null; ///CLOVER:OFF if (DEBUG) { log = new StringBuffer("CompoundTransliterator{" + ID + (incremental ? "}i: IN=" : "}: IN=")); UtilityExtensions.FormatInput(log, text, index); Console.Out.WriteLine(Utility.Escape(log.ToString())); } ///CLOVER:ON // Give each transliterator a crack at the run of characters. // See comments at the top of the method for more detail. for (int i = 0; i < trans.Length; ++i) { index.Start = compoundStart; // Reset start int limit = index.Limit; if (index.Start == index.Limit) { // Short circuit for empty range ///CLOVER:OFF if (DEBUG) { Console.Out.WriteLine("CompoundTransliterator[" + i + ".." + (trans.Length - 1) + (incremental ? "]i: " : "]: ") + UtilityExtensions.FormatInput(text, index) + " (NOTHING TO DO)"); } ///CLOVER:ON break; } ///CLOVER:OFF if (DEBUG) { log.Length = 0; log.Append("CompoundTransliterator[" + i + "=" + trans[i].ID + (incremental ? "]i: " : "]: ")); UtilityExtensions.FormatInput(log, text, index); } ///CLOVER:ON trans[i].FilteredTransliterate(text, index, incremental); // In a properly written transliterator, start == limit after // handleTransliterate() returns when incremental is false. // Catch cases where the subclass doesn't do this, and throw // an exception. (Just pinning start to limit is a bad idea, // because what's probably happening is that the subclass // isn't transliterating all the way to the end, and it should // in non-incremental mode.) if (!incremental && index.Start != index.Limit) { throw new Exception("ERROR: Incomplete non-incremental transliteration by " + trans[i].ID); } ///CLOVER:OFF if (DEBUG) { log.Append(" => "); UtilityExtensions.FormatInput(log, text, index); Console.Out.WriteLine(Utility.Escape(log.ToString())); } ///CLOVER:ON // Cumulative delta for insertions/deletions delta += index.Limit - limit; if (incremental) { // In the incremental case, only allow subsequent // transliterators to modify what has already been // completely processed by prior transliterators. In the // non-incrmental case, allow each transliterator to // process the entire text. index.Limit = index.Start; } } compoundLimit += delta; // Start is good where it is -- where the last transliterator left // it. Limit needs to be put back where it was, modulo // adjustments for deletions/insertions. index.Limit = compoundLimit; ///CLOVER:OFF if (DEBUG) { log.Length = 0; log.Append("CompoundTransliterator{" + ID + (incremental ? "}i: OUT=" : "}: OUT=")); UtilityExtensions.FormatInput(log, text, index); Console.Out.WriteLine(Utility.Escape(log.ToString())); } ///CLOVER:ON }
/// <summary> /// Convenience method. /// </summary> public static StringBuffer FormatInput(StringBuffer appendTo, IReplaceable input, TransliterationPosition pos) { return(FormatInput(appendTo, (ReplaceableString)input, pos)); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition offsets, bool isIncremental) { lock (this) { // TODO reimplement, see ustrcase.c // using a real word break iterator // instead of just looking for a transition between cased and uncased characters // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap) // needs to take isIncremental into account because case mappings are context-sensitive // also detect when lowercasing function did not finish because of context if (offsets.Start >= offsets.Limit) { return; } // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable CaseType type; // Our mode; we are either converting letter toTitle or // toLower. bool doTitle = true; // Determine if there is a preceding context of cased case-ignorable*, // in which case we want to start in toLower mode. If the // prior context is anything else (including empty) then start // in toTitle mode. int c, start; for (start = offsets.Start - 1; start >= offsets.ContextStart; start -= UTF16.GetCharCount(c)) { c = text.Char32At(start); // ICU4N: Simplfied version of GetTypeOrIgnorable if (!csp.IsCaseIgnorable(c, out type)) { if (type > 0) { // cased doTitle = false; break; } else if (type == 0) { // uncased but not ignorable break; } } // else case-ignorable: continue } // Convert things after a cased character toLower; things // after a uncased, non-case-ignorable character toTitle. Case-ignorable // characters are copied directly and do not change the mode. iter.SetText(text); iter.SetIndex(offsets.Start); iter.SetLimit(offsets.Limit); iter.SetContextLimits(offsets.ContextStart, offsets.ContextLimit); result.Length = 0; // Walk through original string // If there is a case change, modify corresponding position in replaceable int delta; while ((c = iter.NextCaseMapCP()) >= 0) { // ICU4N: Simplfied version of GetTypeOrIgnorable if (!csp.IsCaseIgnorable(c, out type)) {// not case-ignorable if (doTitle) { c = csp.ToFullTitle(c, iter, result, caseLocale); } else { c = csp.ToFullLower(c, iter, result, caseLocale); } doTitle = type == CaseType.None; // doTitle=isUncased if (iter.DidReachLimit && isIncremental) { // the case mapping function tried to look beyond the context limit // wait for more input offsets.Start = iter.CaseMapCPStart; return; } /* decode the result */ if (c < 0) { /* c mapped to itself, no change */ continue; } else if (c <= UCaseProperties.MaxStringLength) { /* replace by the mapping string */ delta = iter.Replace(result.ToString()); result.Length = 0; } else { /* replace by single-code point mapping */ delta = iter.Replace(UTF16.ValueOf(c)); } if (delta != 0) { offsets.Limit += delta; offsets.ContextLimit += delta; } } } offsets.Start = offsets.Limit; } }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition offsets, bool incremental) { offsets.Start = offsets.Limit; }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition offsets, bool isIncremental) { int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space StringBuffer name = new StringBuffer(maxLen); // Get the legal character set UnicodeSet legal = new UnicodeSet(); UCharacterName.Instance.GetCharNameCharacters(legal); int cursor = offsets.Start; int limit = offsets.Limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos int c; while (cursor < limit) { c = text.Char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) { // quick check first openPos = cursor; int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.Length = 0; cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (PatternProps.IsWhiteSpace(c)) { // Ignore leading whitespace if (name.Length > 0 && name[name.Length - 1] != SPACE) { name.Append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.Length > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int len = name.Length; // Delete trailing space, if any if (len > 0 && name[len - 1] == SPACE) { name.Length = --len; } c = UChar.GetCharFromExtendedName(name.ToString()); if (c != -1) { // Lookup succeeded // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM string str = UTF16.ValueOf(c); text.Replace(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int delta = cursor - openPos - str.Length; cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } if (legal.Contains(c)) { UTF16.Append(name, c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.Length >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += UTF16.GetCharCount(c); } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor; }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition pos, bool isIncremental) { int start = pos.Start; int limit = pos.Limit; int i, ipat; //loop: while (start < limit) { // Loop over the forms in spec[]. Exit this loop when we // match one of the specs. Exit the outer loop if a // partial match is detected and isIncremental is true. for (ipat = 0; spec[ipat] != END;) { // Read the header int prefixLen = spec[ipat++]; int suffixLen = spec[ipat++]; int radix = spec[ipat++]; int minDigits = spec[ipat++]; int maxDigits = spec[ipat++]; // s is a copy of start that is advanced over the // characters as we parse them. int s = start; bool match = true; for (i = 0; i < prefixLen; ++i) { if (s >= limit) { if (i > 0) { // We've already matched a character. This is // a partial match, so we return if in // incremental mode. In non-incremental mode, // go to the next spec. if (isIncremental) { goto loop_break; } match = false; break; } } char c = text[s++]; if (c != spec[ipat + i]) { match = false; break; } } if (match) { int u = 0; int digitCount = 0; for (; ;) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto loop_break; } break; } int ch = text.Char32At(s); int digit = UChar.Digit(ch, radix); if (digit < 0) { break; } s += UTF16.GetCharCount(ch); u = (u * radix) + digit; if (++digitCount == maxDigits) { break; } } match = (digitCount >= minDigits); if (match) { for (i = 0; i < suffixLen; ++i) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto loop_break; } match = false; break; } char c = text[s++]; if (c != spec[ipat + prefixLen + i]) { match = false; break; } } if (match) { // At this point, we have a match string str = UTF16.ValueOf(u); text.Replace(start, s, str); limit -= s - start - str.Length; // The following break statement leaves the // loop that is traversing the forms in // spec[]. We then parse the next input // character. break; } } } ipat += prefixLen + suffixLen; } if (start < limit) { start += UTF16.GetCharCount(text.Char32At(start)); } } loop_break : { } pos.ContextLimit += limit - pos.Limit; pos.Limit = limit; pos.Start = start; }
public void TestTransliteratorErrors() { String trans = "Latin-Greek"; String bogusID = "LATINGREEK-GREEKLATIN"; String newID = "Bogus-Latin"; String newIDRules = "zzz > Z; f <> ph"; String bogusRules = "a } [b-g m-p "; ReplaceableString testString = new ReplaceableString("A quick fox jumped over the lazy dog."); String insertString = "cats and dogs"; int stoppedAt = 0, len; TransliterationPosition pos = new TransliterationPosition(); Transliterator t = Transliterator.GetInstance(trans, Transliterator.Forward); if (t == null) { Errln("FAIL: construction of Latin-Greek"); return; } len = testString.Length; stoppedAt = t.Transliterate(testString, 0, 100); if (stoppedAt != -1) { Errln("FAIL: Out of bounds check failed (1)."); } else if (testString.Length != len) { testString = new ReplaceableString("A quick fox jumped over the lazy dog."); Errln("FAIL: Transliterate fails and the target string was modified."); } stoppedAt = t.Transliterate(testString, 100, testString.Length - 1); if (stoppedAt != -1) { Errln("FAIL: Out of bounds check failed (2)."); } else if (testString.Length != len) { testString = new ReplaceableString("A quick fox jumped over the lazy dog."); Errln("FAIL: Transliterate fails and the target string was modified."); } pos.Start = 100; pos.Limit = testString.Length; try { t.Transliterate(testString, pos); Errln("FAIL: Start offset is out of bounds, error not reported."); } catch (ArgumentException e) { Logln("Start offset is out of bounds and detected."); } pos.Limit = 100; pos.Start = 0; try { t.Transliterate(testString, pos); Errln("FAIL: Limit offset is out of bounds, error not reported.\n"); } catch (ArgumentException e) { Logln("Start offset is out of bounds and detected."); } len = pos.ContextLimit = testString.Length; pos.ContextStart = 0; pos.Limit = len - 1; pos.Start = 5; try { t.Transliterate(testString, pos, insertString); if (len == pos.Limit) { Errln("FAIL: Test insertion with string: the transliteration position limit didn't change as expected."); } } catch (ArgumentException e) { Errln("Insertion test with string failed for some reason."); } pos.ContextStart = 0; pos.ContextLimit = testString.Length; pos.Limit = testString.Length - 1; pos.Start = 5; try { t.Transliterate(testString, pos, 0x0061); if (len == pos.Limit) { Errln("FAIL: Test insertion with character: the transliteration position limit didn't change as expected."); } } catch (ArgumentException e) { Errln("FAIL: Insertion test with UTF-16 code point failed for some reason."); } len = pos.Limit = testString.Length; pos.ContextStart = 0; pos.ContextLimit = testString.Length - 1; pos.Start = 5; try { t.Transliterate(testString, pos, insertString); Errln("FAIL: Out of bounds check failed (3)."); if (testString.Length != len) { Errln("FAIL: The input string was modified though the offsets were out of bounds."); } } catch (ArgumentException e) { Logln("Insertion test with out of bounds indexes."); } Transliterator t1 = null; try { t1 = Transliterator.GetInstance(bogusID, Transliterator.Forward); if (t1 != null) { Errln("FAIL: construction of bogus ID \"LATINGREEK-GREEKLATIN\""); } } catch (ArgumentException e) { } //try { // unneeded - Exception cannot be thrown Transliterator t2 = Transliterator.CreateFromRules( newID, newIDRules, Transliterator.Forward); try { Transliterator t3 = t2.GetInverse(); Errln("FAIL: The newID transliterator was not registered so createInverse should fail."); if (t3 != null) { Errln("FAIL: The newID transliterator was not registered so createInverse should fail."); } } catch (Exception e) { } //} catch (Exception e) { } try { Transliterator t4 = Transliterator.CreateFromRules( newID, bogusRules, Transliterator.Forward); if (t4 != null) { Errln("FAIL: The rules is malformed but error was not reported."); } } catch (Exception e) { } }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition offsets, bool isIncremental) { // start and limit of the input range int start = offsets.Start; int limit = offsets.Limit; if (start >= limit) { return; } /* * Normalize as short chunks at a time as possible even in * bulk mode, so that styled text is minimally disrupted. * In incremental mode, a chunk that ends with offsets.limit * must not be normalized. * * If it was known that the input text is not styled, then * a bulk mode normalization could be used. * (For details, see the comment in the C++ version.) */ StringBuilder segment = new StringBuilder(); StringBuilder normalized = new StringBuilder(); int c = text.Char32At(start); do { int prev = start; // Skip at least one character so we make progress. // c holds the character at start. segment.Length = 0; do { segment.AppendCodePoint(c); start += Character.CharCount(c); } while (start < limit && !norm2.HasBoundaryBefore(c = text.Char32At(start))); if (start == limit && isIncremental && !norm2.HasBoundaryAfter(c)) { // stop in incremental mode when we reach the input limit // in case there are additional characters that could change the // normalization result start = prev; break; } norm2.Normalize(segment, normalized); if (!UTF16Plus.Equal(segment, normalized)) { // replace the input chunk with its normalized form text.Replace(prev, start - prev, normalized.ToString()); // ICU4N: Corrected 2nd parameter // update all necessary indexes accordingly int delta = normalized.Length - (start - prev); start += delta; limit += delta; } } while (start < limit); offsets.Start = start; offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; }
public void TestTransliterate() { Logln("Testing the handleTransliterate() API of CompoundTransliterator"); Transliterator ct1 = null; try { ct1 = Transliterator.GetInstance("Any-Hex;Hex-Any"); } catch (ArgumentException iae) { Errln("FAIL: construction using CompoundTransliterator(String ID) failed for " + "Any-Hex;Hex-Any"); throw iae; } String s = "abcabc"; expect(ct1, s, s); TransliterationPosition index = new TransliterationPosition(); ReplaceableString rsource2 = new ReplaceableString(s); String expectedResult = s; ct1.Transliterate(rsource2, index); ct1.FinishTransliteration(rsource2, index); String result = rsource2.ToString(); expectAux(ct1.ID + ":ReplaceableString, index(0,0,0,0)", s + "->" + rsource2, result.Equals(expectedResult), expectedResult); TransliterationPosition index2 = new TransliterationPosition(1, 3, 2, 3); ReplaceableString rsource3 = new ReplaceableString(s); ct1.Transliterate(rsource3, index2); ct1.FinishTransliteration(rsource3, index2); result = rsource3.ToString(); expectAux(ct1.ID + ":String, index2(1,2,2,3)", s + "->" + rsource3, result.Equals(expectedResult), expectedResult); String[] Data = { //ID, input string, transliterated string "Any-Hex;Hex-Any;Any-Hex", "hello", "\\u0068\\u0065\\u006C\\u006C\\u006F", "Any-Hex;Hex-Any", "hello! How are you?", "hello! How are you?", "Devanagari-Latin;Latin-Devanagari", "\u092D\u0948'\u0930'\u0935", "\u092D\u0948\u0930\u0935", // quotes lost "Latin-Cyrillic;Cyrillic-Latin", "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h", "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h", "Latin-Greek;Greek-Latin", "ABGabgAKLMN", "ABGabgAKLMN", //"Latin-Arabic;Arabic-Latin", "Ad'r'a'b'i'k'dh'dd'gh", "Adrabikdhddgh", "Hiragana-Katakana", "\u3041\u308f\u3099\u306e\u304b\u3092\u3099", "\u30A1\u30f7\u30ce\u30ab\u30fa", "Hiragana-Katakana;Katakana-Hiragana", "\u3041\u308f\u3099\u306e\u304b\u3051", "\u3041\u308f\u3099\u306e\u304b\u3051", "Katakana-Hiragana;Hiragana-Katakana", "\u30A1\u30f7\u30ce\u30f5\u30f6", "\u30A1\u30f7\u30ce\u30ab\u30b1", "Latin-Katakana;Katakana-Latin", "vavivuvevohuzizuzoninunasesuzezu", "vavivuvevohuzizuzoninunasesuzezu", }; Transliterator ct2 = null; for (int i = 0; i < Data.Length; i += 3) { try { ct2 = Transliterator.GetInstance(Data[i + 0]); } catch (ArgumentException iae2) { Errln("FAIL: CompoundTransliterator construction failed for " + Data[i + 0]); throw iae2; } expect(ct2, Data[i + 1], Data[i + 2]); } }
/** * Attempt a match and replacement at the given position. Return * the degree of match between this rule and the given text. The * degree of match may be mismatch, a partial match, or a full * match. A mismatch means at least one character of the text * does not match the context or key. A partial match means some * context and key characters match, but the text is not long * enough to match all of them. A full match means all context * and key characters match. * * If a full match is obtained, perform a replacement, update pos, * and return U_MATCH. Otherwise both text and pos are unchanged. * * @param text the text * @param pos the position indices * @param incremental if TRUE, test for partial matches that may * be completed by additional text inserted at pos.limit. * @return one of <code>U_MISMATCH</code>, * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>. If * incremental is FALSE then U_PARTIAL_MATCH will not be returned. */ public virtual MatchDegree MatchAndReplace(IReplaceable text, TransliterationPosition pos, bool incremental) { // Matching and replacing are done in one method because the // replacement operation needs information obtained during the // match. Another way to do this is to have the match method // create a match result struct with relevant offsets, and to pass // this into the replace method. // ============================ MATCH =========================== // Reset segment match data if (segments != null) { for (int i = 0; i < segments.Length; ++i) { ((StringMatcher)segments[i]).ResetMatch(); } } int keyLimit; int[] intRef = new int[1]; // ------------------------ Ante Context ------------------------ // A mismatch in the ante context, or with the start anchor, // is an outright U_MISMATCH regardless of whether we are // incremental or not. int oText; // offset into 'text' int minOText; // Note (1): We process text in 16-bit code units, rather than // 32-bit code points. This works because stand-ins are // always in the BMP and because we are doing a literal match // operation, which can be done 16-bits at a time. int anteLimit = PosBefore(text, pos.ContextStart); MatchDegree match; // Start reverse match at char before pos.start intRef[0] = PosBefore(text, pos.Start); if (anteContext != null) { match = anteContext.Matches(text, intRef, anteLimit, false); if (match != MatchDegree.Match) { return(MatchDegree.Mismatch); } } oText = intRef[0]; minOText = PosAfter(text, oText); // ------------------------ Start Anchor ------------------------ if (((flags & ANCHOR_START) != 0) && oText != anteLimit) { return(MatchDegree.Mismatch); } // -------------------- Key and Post Context -------------------- intRef[0] = pos.Start; if (key != null) { match = key.Matches(text, intRef, pos.Limit, incremental); if (match != MatchDegree.Match) { return(match); } } keyLimit = intRef[0]; if (postContext != null) { if (incremental && keyLimit == pos.Limit) { // The key matches just before pos.limit, and there is // a postContext. Since we are in incremental mode, // we must assume more characters may be inserted at // pos.limit -- this is a partial match. return(MatchDegree.PartialMatch); } match = postContext.Matches(text, intRef, pos.ContextLimit, incremental); if (match != MatchDegree.Match) { return(match); } } oText = intRef[0]; // ------------------------- Stop Anchor ------------------------ if (((flags & ANCHOR_END)) != 0) { if (oText != pos.ContextLimit) { return(MatchDegree.Mismatch); } if (incremental) { return(MatchDegree.PartialMatch); } } // =========================== REPLACE ========================== // We have a full match. The key is between pos.start and // keyLimit. int newLength = output.Replace(text, pos.Start, keyLimit, intRef); int lenDelta = newLength - (keyLimit - pos.Start); int newStart = intRef[0]; oText += lenDelta; pos.Limit += lenDelta; pos.ContextLimit += lenDelta; // Restrict new value of start to [minOText, min(oText, pos.limit)]. pos.Start = Math.Max(minOText, Math.Min(Math.Min(oText, pos.Limit), newStart)); return(MatchDegree.Match); }
/// <summary> /// Convenience method. /// </summary> public static string FormatInput(IReplaceable input, TransliterationPosition pos) { return(FormatInput((ReplaceableString)input, pos)); }