/// <summary> /// Construct a matcher that matches the given pattern string. /// </summary> /// /// <param name="theString">the pattern to be matched, possibly containing stand-ins thatrepresent nested UnicodeMatcher objects.</param> /// <param name="segmentNum">the segment number from 1..n, or 0 if this is not a segment.</param> /// <param name="theData">context object mapping stand-ins to UnicodeMatcher objects.</param> public StringMatcher(String theString, int segmentNum, RuleBasedTransliterator.Data theData) { data = theData; pattern = theString; matchStart = matchLimit = -1; segmentNumber = segmentNum; }
/// <summary> /// Construct a StringReplacer that sets the emits the given output text and /// does not modify the cursor. /// </summary> /// /// <param name="theOutput">text that will replace input text when the replace() method iscalled. May contain stand-in characters that represent nestedreplacers.</param> /// <param name="theData">transliterator context object that translates stand-incharacters to UnicodeReplacer objects</param> public StringReplacer(String theOutput, RuleBasedTransliterator.Data theData) { output = theOutput; cursorPos = 0; hasCursor = false; data = theData; isComplex = true; }
public Transliterator GetInstance() { ArrayList transliterators = new ArrayList(); int passNumber = 1; int limit = Math.Max(idBlockVector.Count, dataVector.Count); for (int i = 0; i < limit; i++) { if (i < idBlockVector.Count) { String idBlock = (String)idBlockVector[i]; if (idBlock.Length > 0) { transliterators .Add(IBM.ICU.Text.Transliterator.GetInstance(idBlock)); } } if (i < dataVector.Count) { RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector[i]; transliterators.Add(new RuleBasedTransliterator("%Pass" + passNumber++, data, null)); } } Transliterator t = new CompoundTransliterator(transliterators, passNumber - 1); t.SetID(ID); if (compoundFilter != null) { t.SetFilter(compoundFilter); } return(t); }
/// <summary> /// Construct a matcher that matches a substring of the given pattern string. /// </summary> /// /// <param name="theString">the pattern to be matched, possibly containing stand-ins thatrepresent nested UnicodeMatcher objects.</param> /// <param name="start">first character of theString to be matched</param> /// <param name="limit">index after the last character of theString to be matched.</param> /// <param name="segmentNum">the segment number from 1..n, or 0 if this is not a segment.</param> /// <param name="theData">context object mapping stand-ins to UnicodeMatcher objects.</param> public StringMatcher(String theString, int start, int limit, int segmentNum, RuleBasedTransliterator.Data theData) : this(theString.Substring(start, (limit) - (start)), segmentNum, theData) { }
/// <summary> /// Construct a new rule with the given input, output text, and other /// attributes. A cursor position may be specified for the output text. /// </summary> /// /// <param name="input">input string, including key and optional ante and post context</param> /// <param name="anteContextPos">offset into input to end of ante context, or -1 if none. Mustbe <= input.length() if not -1.</param> /// <param name="postContextPos">offset into input to start of post context, or -1 if none.Must be <= input.length() if not -1, and must be >=anteContextPos.</param> /// <param name="output_0">output string</param> /// <param name="cursorPos">offset into output at which cursor is located, or -1 if none.If less than zero, then the cursor is placed after the<c>output</c>; that is, -1 is equivalent to<c>output.length()</c>. If greater than<c>output.length()</c> then an exception is thrown.</param> /// <param name="cursorOffset">an offset to be added to cursorPos to position the cursoreither in the ante context, if < 0, or in the post context, if> 0. For example, the rule "abc{def} > | @@@ xyz;" changes"def" to "xyz" and moves the cursor to before "a". It wouldhave a cursorOffset of -3.</param> /// <param name="segs">array of UnicodeMatcher corresponding to input patternsegments, or null if there are none</param> /// <param name="anchorStart">true if the the rule is anchored on the left to the contextstart</param> /// <param name="anchorEnd">true if the rule is anchored on the right to the context limit</param> public TransliterationRule(String input, int anteContextPos, int postContextPos, String output_0, int cursorPos, int cursorOffset, UnicodeMatcher[] segs, bool anchorStart, bool anchorEnd, RuleBasedTransliterator.Data theData) { data = theData; // Do range checks only when warranted to save time if (anteContextPos < 0) { anteContextLength = 0; } else { if (anteContextPos > input.Length) { throw new ArgumentException("Invalid ante context"); } anteContextLength = anteContextPos; } if (postContextPos < 0) { keyLength = input.Length - anteContextLength; } else { if (postContextPos < anteContextLength || postContextPos > input.Length) { throw new ArgumentException("Invalid post context"); } keyLength = postContextPos - anteContextLength; } if (cursorPos < 0) { cursorPos = output_0.Length; } else if (cursorPos > output_0.Length) { throw new ArgumentException("Invalid cursor position"); } // We don't validate the segments array. The caller must // guarantee that the segments are well-formed (that is, that // all $n references in the output refer to indices of this // array, and that no array elements are null). this.segments = segs; pattern = input; flags = 0; if (anchorStart) { flags |= ANCHOR_START; } if (anchorEnd) { flags |= ANCHOR_END; } anteContext = null; if (anteContextLength > 0) { anteContext = new StringMatcher(pattern.Substring(0, (anteContextLength) - (0)), 0, data); } key = null; if (keyLength > 0) { key = new StringMatcher(pattern.Substring(anteContextLength, (anteContextLength + keyLength) - (anteContextLength)), 0, data); } int postContextLength = pattern.Length - keyLength - anteContextLength; postContext = null; if (postContextLength > 0) { postContext = new StringMatcher(pattern.Substring(anteContextLength + keyLength), 0, data); } this.output = new StringReplacer(output_0, cursorPos + cursorOffset, data); }
/// <summary> /// Given an Entry object, instantiate it. Caller owns result. Return 0 on /// failure. /// Return a non-empty aliasReturn value if the ID points to an alias. We /// cannot instantiate it ourselves because the alias may contain filters or /// compounds, which we do not understand. Caller should make aliasReturn /// empty before calling. /// The entry object is assumed to reside in the dynamic store. It may be /// modified. /// </summary> /// private Transliterator InstantiateEntry(String ID_0, Object[] entryWrapper, StringBuilder aliasReturn) { // We actually modify the entry object in some cases. If it // is a string, we may partially parse it and turn it into a // more processed precursor. This makes the next // instantiation faster and allows sharing of immutable // components like the RuleBasedTransliterator.Data objects. // For this reason, the entry object is an Object[] of length // 1. for (;;) { Object entry = entryWrapper[0]; if (entry is RuleBasedTransliterator.Data) { RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)entry; return(new RuleBasedTransliterator(ID_0, data, null)); } else if (entry is Type) { try { return((Transliterator)Activator.CreateInstance(((Type)entry))); } catch (TargetException e) { } catch (MemberAccessException e2) { } return(null); } else if (entry is TransliteratorRegistry.AliasEntry) { aliasReturn.Append(((TransliteratorRegistry.AliasEntry)entry).alias); return(null); } else if (entry is Transliterator.Factory) { return(((Transliterator.Factory)entry).GetInstance(ID_0)); } else if (entry is TransliteratorRegistry.CompoundRBTEntry) { return(((TransliteratorRegistry.CompoundRBTEntry)entry).GetInstance()); } else if (entry is Transliterator) { return((Transliterator)entry); } // At this point entry type must be either RULES_FORWARD or // RULES_REVERSE. We process the rule data into a // TransliteratorRuleData object, and possibly also into an // .id header and/or footer. Then we modify the registry with // the parsed data and retry. TransliteratorParser parser = new TransliteratorParser(); try { TransliteratorRegistry.ResourceEntry re = (TransliteratorRegistry.ResourceEntry)entry; parser.Parse(re.resource, re.direction); } catch (InvalidCastException e_1) { // If we pull a rule from a locale resource bundle it will // be a LocaleEntry. TransliteratorRegistry.LocaleEntry le = (TransliteratorRegistry.LocaleEntry)entry; parser.Parse(le.rule, le.direction); } // Reset entry to something that we process at the // top of the loop, then loop back to the top. As long as we // do this, we only loop through twice at most. // NOTE: The logic here matches that in // Transliterator.createFromRules(). if (parser.idBlockVector.Count == 0 && parser.dataVector.Count == 0) { // No idBlock, no data -- this is just an // alias for Null entryWrapper[0] = new TransliteratorRegistry.AliasEntry(IBM.ICU.Text.NullTransliterator._ID); } else if (parser.idBlockVector.Count == 0 && parser.dataVector.Count == 1) { // No idBlock, data != 0 -- this is an // ordinary RBT_DATA entryWrapper[0] = parser.dataVector[0]; } else if (parser.idBlockVector.Count == 1 && parser.dataVector.Count == 0) { // idBlock, no data -- this is an alias. The ID has // been munged from reverse into forward mode, if // necessary, so instantiate the ID in the forward // direction. if (parser.compoundFilter != null) { entryWrapper[0] = new TransliteratorRegistry.AliasEntry( parser.compoundFilter.ToPattern(false) + ";" + (String)parser.idBlockVector[0]); } else { entryWrapper[0] = new TransliteratorRegistry.AliasEntry( (String)parser.idBlockVector[0]); } } else { entryWrapper[0] = new TransliteratorRegistry.CompoundRBTEntry(ID_0, parser.idBlockVector, parser.dataVector, parser.compoundFilter); } } }
internal /* * public RuleBasedTransliterator(String ID, String rules) { this(ID, rules, * FORWARD, null); } */ RuleBasedTransliterator(String ID, RuleBasedTransliterator.Data data_0, UnicodeFilter filter) : base(ID, filter) { this.data = data_0; SetMaximumContextLength(data_0.ruleSet.GetMaximumContextLength()); }