예제 #1
0
 public override bool Handles(int c, int breakType)
 {
     if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE)
     {
         int script = UChar.GetIntPropertyValue(c, UProperty.Script);
         return(script == UScript.Myanmar);
     }
     return(false);
 }
예제 #2
0
 /// <summary>
 /// Update the set of unhandled characters for the specified breakType to include
 /// all that have the same script as <paramref name="c"/>.
 /// May be called concurrently with <see cref="Handles(int, int)"/> or <see cref="FindBreaks(CharacterIterator, int, int, int, DictionaryBreakEngine.DequeI)"/>.
 /// Must not be called concurrently with itself.
 /// </summary>
 public void HandleChar(int c, int breakType)
 {
     if (breakType >= 0 && breakType < fHandled.Length && c != CharacterIteration.Done32)
     {
         UnicodeSet originalSet = fHandled[breakType];
         if (!originalSet.Contains(c))
         {
             int        script = UChar.GetIntPropertyValue(c, UProperty.Script);
             UnicodeSet newSet = new UnicodeSet();
             newSet.ApplyInt32PropertyValue(UProperty.Script, script);
             newSet.AddAll(originalSet);
             fHandled[breakType] = newSet;
         }
     }
 }
예제 #3
0
        [Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet
        public void TestUnicodeMapGeneralCategory()
        {
            Logln("Setting General Category");
            UnicodeMap <String>           map1 = new UnicodeMap <string>();
            IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>();

            //Map<Integer, String> map3 = new TreeMap<Integer, String>();
            map1 = new UnicodeMap <String>();
            map2 = new JCG.SortedDictionary <Integer, String>();

            for (int cp = 0; cp <= SET_LIMIT; ++cp)
            {
                int enumValue = UChar.GetIntPropertyValue(cp, propEnum);
                //if (enumValue <= 0) continue; // for smaller set
                String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long);
                map1.Put(cp, value);
                map2[new Integer(cp)] = value;
            }
            checkNext(map1, map2, int.MaxValue);

            Logln("Comparing General Category");
            check(map1, map2, -1);
            Logln("Comparing Values");
            ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1);
            ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct()

            if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2))
            {
                throw new ArgumentException("Halting");
            }
            Logln("Comparing Sets");
            foreach (string value in values1)
            {
                Logln(value == null ? "null" : value);
                UnicodeSet set1 = map1.KeySet(value);
                UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value);

                if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2))
                {
                    throw new ArgumentException("Halting");
                }
            }
        }
예제 #4
0
        //-----------------------------------------------------------------------------
        //
        //   calcChainedFollowPos.    Modify the previously calculated followPos sets
        //                            to implement rule chaining.  NOT described by Aho
        //
        //-----------------------------------------------------------------------------
        internal virtual void CalcChainedFollowPos(RBBINode tree)
        {
            IList <RBBINode> endMarkerNodes = new JCG.List <RBBINode>();
            IList <RBBINode> leafNodes      = new JCG.List <RBBINode>();

            // get a list of all endmarker nodes.
            tree.FindNodes(endMarkerNodes, RBBINode.endMark);

            // get a list all leaf nodes
            tree.FindNodes(leafNodes, RBBINode.leafChar);

            // Collect all leaf nodes that can start matches for rules
            // with inbound chaining enabled, which is the union of the
            // firstPosition sets from each of the rule root nodes.

            IList <RBBINode> ruleRootNodes = new JCG.List <RBBINode>();

            AddRuleRootNodes(ruleRootNodes, tree);

            ISet <RBBINode> matchStartNodes = new JCG.HashSet <RBBINode>();

            foreach (RBBINode node in ruleRootNodes)
            {
                if (node.fChainIn)
                {
                    matchStartNodes.UnionWith(node.fFirstPosSet);
                }
            }

            // Iterate over all leaf nodes,
            //
            foreach (RBBINode tNode in leafNodes)
            {
                RBBINode endNode = null;

                // Identify leaf nodes that correspond to overall rule match positions.
                //   These include an endMarkerNode in their followPos sets.
                foreach (RBBINode endMarkerNode in endMarkerNodes)
                {
                    if (tNode.fFollowPos.Contains(endMarkerNode))
                    {
                        endNode = tNode;
                        break;
                    }
                }
                if (endNode == null)
                {
                    // node wasn't an end node.  Try again with the next.
                    continue;
                }

                // We've got a node that can end a match.

                // Line Break Specific hack:  If this node's val correspond to the $CM char class,
                //                            don't chain from it.
                // TODO:  Add rule syntax for this behavior, get specifics out of here and
                //        into the rule file.
                if (fRB.fLBCMNoChain)
                {
                    int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal);
                    if (c != -1)
                    {
                        // c == -1 occurs with sets containing only the {eof} marker string.
                        int cLBProp = UChar.GetIntPropertyValue(c, UProperty.Line_Break);
                        if (cLBProp == LineBreak.CombiningMark)
                        {
                            continue;
                        }
                    }
                }


                // Now iterate over the nodes that can start a match, looking for ones
                //   with the same char class as our ending node.
                foreach (RBBINode startNode in matchStartNodes)
                {
                    if (startNode.fType != RBBINode.leafChar)
                    {
                        continue;
                    }

                    if (endNode.fVal == startNode.fVal)
                    {
                        // The end val (character class) of one possible match is the
                        //   same as the start of another.

                        // Add all nodes from the followPos of the start node to the
                        //  followPos set of the end node, which will have the effect of
                        //  letting matches transition from a match state at endNode
                        //  to the second char of a match starting with startNode.
                        endNode.fFollowPos.UnionWith(startNode.fFollowPos);
                    }
                }
            }
        }