public override void Interpret(ColumnResolveContext context) { Regex regex = new Regex(@"<If_IsUnicode>(?<template>((?!</EndIf>).|\n)*)</EndIf>", RegexOptions.IgnoreCase | RegexOptions.Compiled); Match match = regex.Match(context.Output); string template = match.Groups["template"].Value; UnicodeChecker checker = new UnicodeChecker(); UnicodeReplacer replacer = new UnicodeReplacer(); checker.SetSuccessor(replacer); string replaceStr = checker.Resolve(template, context.Column.DbType); context.Output = regex.Replace(context.Output, replaceStr); }
/// <summary> /// Union the set of all characters that may output by this object into the /// given set. /// </summary> /// /// <param name="toUnionTo">the set into which to union the output characters</param> public virtual void AddReplacementSetTo(UnicodeSet toUnionTo) { int ch; for (int i = 0; i < output.Length; i += IBM.ICU.Text.UTF16.GetCharCount(ch)) { ch = IBM.ICU.Text.UTF16.CharAt(output, i); UnicodeReplacer r = data.LookupReplacer(ch); if (r == null) { toUnionTo.Add(ch); } else { r.AddReplacementSetTo(toUnionTo); } } }
/// <summary> /// Construct a replacer that takes the output of the given replacer, passes /// it through the given transliterator, and emits the result as output. /// </summary> /// public FunctionReplacer(Transliterator theTranslit, UnicodeReplacer theReplacer) { translit = theTranslit; replacer = theReplacer; }
/// <summary> /// UnicodeReplacer API /// </summary> /// public virtual String ToReplacerPattern(bool escapeUnprintable) { StringBuilder rule = new StringBuilder(); StringBuilder quoteBuf = new StringBuilder(); int cursor = cursorPos; // Handle a cursor preceding the output if (hasCursor && cursor < 0) { while (cursor++ < 0) { IBM.ICU.Impl.Utility.AppendToRule(rule, '@', true, escapeUnprintable, quoteBuf); } // Fall through and append '|' below } for (int i = 0; i < output.Length; ++i) { if (hasCursor && i == cursor) { IBM.ICU.Impl.Utility.AppendToRule(rule, '|', true, escapeUnprintable, quoteBuf); } char c = output[i]; // Ok to use 16-bits here UnicodeReplacer r = data.LookupReplacer(c); if (r == null) { IBM.ICU.Impl.Utility.AppendToRule(rule, c, false, escapeUnprintable, quoteBuf); } else { StringBuilder buf = new StringBuilder(" "); buf.Append(r.ToReplacerPattern(escapeUnprintable)); buf.Append(' '); IBM.ICU.Impl.Utility.AppendToRule(rule, buf.ToString(), true, escapeUnprintable, quoteBuf); } } // Handle a cursor after the output. Use > rather than >= because // if cursor == output.length() it is at the end of the output, // which is the default position, so we need not emit it. if (hasCursor && cursor > output.Length) { cursor -= output.Length; while (cursor-- > 0) { IBM.ICU.Impl.Utility.AppendToRule(rule, '@', true, escapeUnprintable, quoteBuf); } IBM.ICU.Impl.Utility.AppendToRule(rule, '|', true, escapeUnprintable, quoteBuf); } // Flush quoteBuf out to result IBM.ICU.Impl.Utility.AppendToRule(rule, -1, true, escapeUnprintable, quoteBuf); return(rule.ToString()); }
// = public static UnicodeReplacer valueOf(String output, // = int cursorPos, // = RuleBasedTransliterator.Data data) { // = if (output.length() == 1) { // = char c = output.charAt(0); // = UnicodeReplacer r = data.lookupReplacer(c); // = if (r != null) { // = return r; // = } // = } // = return new StringReplacer(output, cursorPos, data); // = } /// <summary> /// UnicodeReplacer API /// </summary> /// public virtual int Replace(Replaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.Replace(start, limit, output); outLen = output.Length; // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* * When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This * preserves the integrity of indices into the key and surrounding * context while generating the output text. */ StringBuilder buf = new StringBuilder(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and : // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.Length(); // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start - 1)); text.Copy(start - len, start, tempStart); destStart += len; } else { text.Replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput = 0; oOutput < output.Length;) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = buf.Length + destLimit - destStart; // relative // to start // the buf.length() was inserted for bug 5789 // the problem is that if we are accumulating into a buffer // (when r == null below) // then the actual length of the text at that point needs to // add the buf length. // there was an alternative suggested in #5789, but that // looks like it won't work // if we have accumulated some stuff in the dest part AND // have a non-zero buffer. } int c = IBM.ICU.Text.UTF16.CharAt(output, oOutput); // When we are at the last position copy the right style // context character into the temporary buffer. We don't // do this before because it will provide an incorrect // right context for previous replace() operations. int nextIndex = oOutput + IBM.ICU.Text.UTF16.GetCharCount(c); if (nextIndex == output.Length) { tempExtra = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(limit)); text.Copy(limit, limit + tempExtra, destLimit); } UnicodeReplacer r = data.LookupReplacer(c); if (r == null) { // Accumulate straight (non-segment) text. IBM.ICU.Text.UTF16.Append(buf, c); } else { isComplex = true; // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; buf.Length = 0; } // Delegate output generation to replacer object int len_0 = r.Replace(text, destLimit, destLimit, cursor); destLimit += len_0; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.Copy(destStart, destLimit, start); text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.Replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart - 1)); ++n; } newStart += n; } else if (cursorPos > output.Length) { newStart = start + outLen; int n_1 = cursorPos - output.Length; // Outside the output string, cursorPos counts code points while (n_1 > 0 && newStart < text.Length()) { newStart += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart)); --n_1; } newStart += n_1; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return(outLen); }
/// <summary> /// Construct a new rule with the given input, output text, and other /// attributes. A cursor position may be specified for the output text. /// </summary> /// /// <param name="input">input string, including key and optional ante and post context</param> /// <param name="anteContextPos">offset into input to end of ante context, or -1 if none. Mustbe <= input.length() if not -1.</param> /// <param name="postContextPos">offset into input to start of post context, or -1 if none.Must be <= input.length() if not -1, and must be >=anteContextPos.</param> /// <param name="output_0">output string</param> /// <param name="cursorPos">offset into output at which cursor is located, or -1 if none.If less than zero, then the cursor is placed after the<c>output</c>; that is, -1 is equivalent to<c>output.length()</c>. If greater than<c>output.length()</c> then an exception is thrown.</param> /// <param name="cursorOffset">an offset to be added to cursorPos to position the cursoreither in the ante context, if < 0, or in the post context, if> 0. For example, the rule "abc{def} > | @@@ xyz;" changes"def" to "xyz" and moves the cursor to before "a". It wouldhave a cursorOffset of -3.</param> /// <param name="segs">array of UnicodeMatcher corresponding to input patternsegments, or null if there are none</param> /// <param name="anchorStart">true if the the rule is anchored on the left to the contextstart</param> /// <param name="anchorEnd">true if the rule is anchored on the right to the context limit</param> public TransliterationRule(String input, int anteContextPos, int postContextPos, String output_0, int cursorPos, int cursorOffset, UnicodeMatcher[] segs, bool anchorStart, bool anchorEnd, RuleBasedTransliterator.Data theData) { data = theData; // Do range checks only when warranted to save time if (anteContextPos < 0) { anteContextLength = 0; } else { if (anteContextPos > input.Length) { throw new ArgumentException("Invalid ante context"); } anteContextLength = anteContextPos; } if (postContextPos < 0) { keyLength = input.Length - anteContextLength; } else { if (postContextPos < anteContextLength || postContextPos > input.Length) { throw new ArgumentException("Invalid post context"); } keyLength = postContextPos - anteContextLength; } if (cursorPos < 0) { cursorPos = output_0.Length; } else if (cursorPos > output_0.Length) { throw new ArgumentException("Invalid cursor position"); } // We don't validate the segments array. The caller must // guarantee that the segments are well-formed (that is, that // all $n references in the output refer to indices of this // array, and that no array elements are null). this.segments = segs; pattern = input; flags = 0; if (anchorStart) { flags |= ANCHOR_START; } if (anchorEnd) { flags |= ANCHOR_END; } anteContext = null; if (anteContextLength > 0) { anteContext = new StringMatcher(pattern.Substring(0, (anteContextLength) - (0)), 0, data); } key = null; if (keyLength > 0) { key = new StringMatcher(pattern.Substring(anteContextLength, (anteContextLength + keyLength) - (anteContextLength)), 0, data); } int postContextLength = pattern.Length - keyLength - anteContextLength; postContext = null; if (postContextLength > 0) { postContext = new StringMatcher(pattern.Substring(anteContextLength + keyLength), 0, data); } this.output = new StringReplacer(output_0, cursorPos + cursorOffset, data); }