public override bool Handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UChar.GetIntPropertyValue(c, UProperty.Script); return(script == UScript.Myanmar); } return(false); }
/// <summary> /// Update the set of unhandled characters for the specified breakType to include /// all that have the same script as <paramref name="c"/>. /// May be called concurrently with <see cref="Handles(int, int)"/> or <see cref="FindBreaks(CharacterIterator, int, int, int, DictionaryBreakEngine.DequeI)"/>. /// Must not be called concurrently with itself. /// </summary> public void HandleChar(int c, int breakType) { if (breakType >= 0 && breakType < fHandled.Length && c != CharacterIteration.Done32) { UnicodeSet originalSet = fHandled[breakType]; if (!originalSet.Contains(c)) { int script = UChar.GetIntPropertyValue(c, UProperty.Script); UnicodeSet newSet = new UnicodeSet(); newSet.ApplyInt32PropertyValue(UProperty.Script, script); newSet.AddAll(originalSet); fHandled[breakType] = newSet; } } }
[Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet public void TestUnicodeMapGeneralCategory() { Logln("Setting General Category"); UnicodeMap <String> map1 = new UnicodeMap <string>(); IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>(); //Map<Integer, String> map3 = new TreeMap<Integer, String>(); map1 = new UnicodeMap <String>(); map2 = new JCG.SortedDictionary <Integer, String>(); for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UChar.GetIntPropertyValue(cp, propEnum); //if (enumValue <= 0) continue; // for smaller set String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long); map1.Put(cp, value); map2[new Integer(cp)] = value; } checkNext(map1, map2, int.MaxValue); Logln("Comparing General Category"); check(map1, map2, -1); Logln("Comparing Values"); ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1); ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct() if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2)) { throw new ArgumentException("Halting"); } Logln("Comparing Sets"); foreach (string value in values1) { Logln(value == null ? "null" : value); UnicodeSet set1 = map1.KeySet(value); UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value); if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2)) { throw new ArgumentException("Halting"); } } }
//----------------------------------------------------------------------------- // // calcChainedFollowPos. Modify the previously calculated followPos sets // to implement rule chaining. NOT described by Aho // //----------------------------------------------------------------------------- internal virtual void CalcChainedFollowPos(RBBINode tree) { IList <RBBINode> endMarkerNodes = new JCG.List <RBBINode>(); IList <RBBINode> leafNodes = new JCG.List <RBBINode>(); // get a list of all endmarker nodes. tree.FindNodes(endMarkerNodes, RBBINode.endMark); // get a list all leaf nodes tree.FindNodes(leafNodes, RBBINode.leafChar); // Collect all leaf nodes that can start matches for rules // with inbound chaining enabled, which is the union of the // firstPosition sets from each of the rule root nodes. IList <RBBINode> ruleRootNodes = new JCG.List <RBBINode>(); AddRuleRootNodes(ruleRootNodes, tree); ISet <RBBINode> matchStartNodes = new JCG.HashSet <RBBINode>(); foreach (RBBINode node in ruleRootNodes) { if (node.fChainIn) { matchStartNodes.UnionWith(node.fFirstPosSet); } } // Iterate over all leaf nodes, // foreach (RBBINode tNode in leafNodes) { RBBINode endNode = null; // Identify leaf nodes that correspond to overall rule match positions. // These include an endMarkerNode in their followPos sets. foreach (RBBINode endMarkerNode in endMarkerNodes) { if (tNode.fFollowPos.Contains(endMarkerNode)) { endNode = tNode; break; } } if (endNode == null) { // node wasn't an end node. Try again with the next. continue; } // We've got a node that can end a match. // Line Break Specific hack: If this node's val correspond to the $CM char class, // don't chain from it. // TODO: Add rule syntax for this behavior, get specifics out of here and // into the rule file. if (fRB.fLBCMNoChain) { int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal); if (c != -1) { // c == -1 occurs with sets containing only the {eof} marker string. int cLBProp = UChar.GetIntPropertyValue(c, UProperty.Line_Break); if (cLBProp == LineBreak.CombiningMark) { continue; } } } // Now iterate over the nodes that can start a match, looking for ones // with the same char class as our ending node. foreach (RBBINode startNode in matchStartNodes) { if (startNode.fType != RBBINode.leafChar) { continue; } if (endNode.fVal == startNode.fVal) { // The end val (character class) of one possible match is the // same as the start of another. // Add all nodes from the followPos of the start node to the // followPos set of the end node, which will have the effect of // letting matches transition from a match state at endNode // to the second char of a match starting with startNode. endNode.fFollowPos.UnionWith(startNode.fFollowPos); } } } }