public void CheckIncrementalAux(Transliterator t, String input) { IReplaceable test = new ReplaceableString(input); TransliterationPosition pos = new TransliterationPosition(0, test.Length, 0, test.Length); t.Transliterate(test, pos); bool gotError = false; // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? if (pos.Start == 0 && pos.Limit != 0 && !t.ID.Equals("Hex-Any/Unicode")) { Errln("No Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); gotError = true; } else { Logln("PASS Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); } t.FinishTransliteration(test, pos); if (pos.Start != pos.Limit) { Errln("Incomplete, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos)); gotError = true; } if (!gotError) { //Errln("FAIL: Did not get expected error"); } }
/// <summary> /// Transliterate the given text with the given UTransPosition /// indices. Return TRUE if the transliteration should continue /// or FALSE if it should halt (because of a U_PARTIAL_MATCH match). /// Note that FALSE is only ever returned if isIncremental is TRUE. /// </summary> /// <param name="text">The text to be transliterated.</param> /// <param name="pos">The position indices, which will be updated.</param> /// <param name="incremental">If TRUE, assume new text may be inserted /// at index.Limit, and return FALSE if thre is a partial match.</param> /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, /// indicating that transliteration should stop until more text /// arrives.</returns> public virtual bool Transliterate(IReplaceable text, TransliterationPosition pos, bool incremental) { int indexByte = text.Char32At(pos.Start) & 0xFF; for (int i = index[indexByte]; i < index[indexByte + 1]; ++i) { MatchDegree m = rules[i].MatchAndReplace(text, pos, incremental); switch (m) { case MatchDegree.Match: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: match " : "Rule: match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(true); case MatchDegree.PartialMatch: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: partial match " : "Rule: partial match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(false); default: if (Transliterator.DEBUG) { Console.Out.WriteLine("Rule: no match " + rules[i]); } break; } } // No match or partial match from any rule pos.Start += UTF16.GetCharCount(text.Char32At(pos.Start)); if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: no match => " : "Rule: no match => ") + UtilityExtensions.FormatInput(text, pos)); } return(true); }
// /** // * Returns the set of all characters that may be generated as // * replacement text by this transliterator. // */ // public UnicodeSet getTargetSet() { // UnicodeSet set = new UnicodeSet(); // for (int i=0; i<trans.length; ++i) { // // This is a heuristic, and not 100% reliable. // set.addAll(trans[i].getTargetSet()); // } // return set; // } /// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, TransliterationPosition index, bool incremental) { /* Call each transliterator with the same start value and * initial cursor index, but with the limit index as modified * by preceding transliterators. The cursor index must be * reset for each transliterator to give each a chance to * transliterate the text. The initial cursor index is known * to still point to the same place after each transliterator * is called because each transliterator will not change the * text between start and the initial value of cursor. * * IMPORTANT: After the first transliterator, each subsequent * transliterator only gets to transliterate text committed by * preceding transliterators; that is, the cursor (output * value) of transliterator i becomes the limit (input value) * of transliterator i+1. Finally, the overall limit is fixed * up before we return. * * Assumptions we make here: * (1) contextStart <= start <= limit <= contextLimit <= text.length() * (2) start <= start' <= limit' ;cursor doesn't move back * (3) start <= limit' ;text before cursor unchanged * - start' is the value of start after calling handleKT * - limit' is the value of limit after calling handleKT */ /** * Example: 3 transliterators. This example illustrates the * mechanics we need to implement. C, S, and L are the contextStart, * start, and limit. gl is the globalLimit. contextLimit is * equal to limit throughout. * * 1. h-u, changes hex to Unicode * * 4 7 a d 0 4 7 a * abc/u0061/u => abca/u * C S L C S L gl=f->a * * 2. upup, changes "x" to "XX" * * 4 7 a 4 7 a * abca/u => abcAA/u * C SL C S * L gl=a->b * 3. u-h, changes Unicode to hex * * 4 7 a 4 7 a d 0 3 * abcAA/u => abc/u0041/u0041/u * C S L C S * L gl=b->15 * 4. return * * 4 7 a d 0 3 * abc/u0041/u0041/u * C S L */ if (trans.Length < 1) { index.Start = index.Limit; return; // Short circuit for empty compound transliterators } // compoundLimit is the limit value for the entire compound // operation. We overwrite index.limit with the previous // index.start. After each transliteration, we update // compoundLimit for insertions or deletions that have happened. int compoundLimit = index.Limit; // compoundStart is the start for the entire compound // operation. int compoundStart = index.Start; int delta = 0; // delta in length StringBuffer log = null; ///CLOVER:OFF if (DEBUG) { log = new StringBuffer("CompoundTransliterator{" + ID + (incremental ? "}i: IN=" : "}: IN=")); UtilityExtensions.FormatInput(log, text, index); Console.Out.WriteLine(Utility.Escape(log.ToString())); } ///CLOVER:ON // Give each transliterator a crack at the run of characters. // See comments at the top of the method for more detail. for (int i = 0; i < trans.Length; ++i) { index.Start = compoundStart; // Reset start int limit = index.Limit; if (index.Start == index.Limit) { // Short circuit for empty range ///CLOVER:OFF if (DEBUG) { Console.Out.WriteLine("CompoundTransliterator[" + i + ".." + (trans.Length - 1) + (incremental ? "]i: " : "]: ") + UtilityExtensions.FormatInput(text, index) + " (NOTHING TO DO)"); } ///CLOVER:ON break; } ///CLOVER:OFF if (DEBUG) { log.Length = 0; log.Append("CompoundTransliterator[" + i + "=" + trans[i].ID + (incremental ? "]i: " : "]: ")); UtilityExtensions.FormatInput(log, text, index); } ///CLOVER:ON trans[i].FilteredTransliterate(text, index, incremental); // In a properly written transliterator, start == limit after // handleTransliterate() returns when incremental is false. // Catch cases where the subclass doesn't do this, and throw // an exception. (Just pinning start to limit is a bad idea, // because what's probably happening is that the subclass // isn't transliterating all the way to the end, and it should // in non-incremental mode.) if (!incremental && index.Start != index.Limit) { throw new Exception("ERROR: Incomplete non-incremental transliteration by " + trans[i].ID); } ///CLOVER:OFF if (DEBUG) { log.Append(" => "); UtilityExtensions.FormatInput(log, text, index); Console.Out.WriteLine(Utility.Escape(log.ToString())); } ///CLOVER:ON // Cumulative delta for insertions/deletions delta += index.Limit - limit; if (incremental) { // In the incremental case, only allow subsequent // transliterators to modify what has already been // completely processed by prior transliterators. In the // non-incrmental case, allow each transliterator to // process the entire text. index.Limit = index.Start; } } compoundLimit += delta; // Start is good where it is -- where the last transliterator left // it. Limit needs to be put back where it was, modulo // adjustments for deletions/insertions. index.Limit = compoundLimit; ///CLOVER:OFF if (DEBUG) { log.Length = 0; log.Append("CompoundTransliterator{" + ID + (incremental ? "}i: OUT=" : "}: OUT=")); UtilityExtensions.FormatInput(log, text, index); Console.Out.WriteLine(Utility.Escape(log.ToString())); } ///CLOVER:ON }