Ejemplo n.º 1
0
        public void TestFirstPosition()
        {
            BreakIterator expected = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
            BreakIterator actual   = new WholeBreakIterator();

            assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
        }
Ejemplo n.º 2
0
        public static BreakIterator GetTitleBreakIterator(
            ULocale locale, int options, BreakIterator iter)
        {
            options &= TITLECASE_ITERATOR_MASK;
            if (options != 0 && iter != null)
            {
                throw new ArgumentException(
                          "titlecasing iterator option together with an explicit iterator");
            }
            if (iter == null)
            {
                switch (options)
                {
                case 0:
                    iter = BreakIterator.GetWordInstance(locale);
                    break;

                case TITLECASE_WHOLE_STRING:
                    iter = new WholeStringBreakIterator();
                    break;

                case TITLECASE_SENTENCES:
                    iter = BreakIterator.GetSentenceInstance(locale);
                    break;

                default:
                    throw new ArgumentException("unknown titlecasing iterator option");
                }
            }
            return(iter);
        }
Ejemplo n.º 3
0
 public SentenceAndWordTokenizer(TextReader reader)
     : base(reader, BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture))
 {
     termAtt   = AddAttribute <ICharTermAttribute>();
     offsetAtt = AddAttribute <IOffsetAttribute>();
     posIncAtt = AddAttribute <IPositionIncrementAttribute>();
 }
Ejemplo n.º 4
0
        public void TestSliceMiddle()
        {
            BreakIterator expected = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
            BreakIterator actual   = new WholeBreakIterator();

            assertSameBreaks("000a000", 3, 1, expected, actual);
            assertSameBreaks("000ab000", 3, 2, expected, actual);
            assertSameBreaks("000abc000", 3, 3, expected, actual);
            assertSameBreaks("000000", 3, 0, expected, actual);
        }
Ejemplo n.º 5
0
        public void TestSingleSentences()
        {
            BreakIterator expected = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
            BreakIterator actual   = new WholeBreakIterator();

            assertSameBreaks("a", expected, actual);
            assertSameBreaks("ab", expected, actual);
            assertSameBreaks("abc", expected, actual);
            assertSameBreaks("", expected, actual);
        }
Ejemplo n.º 6
0
 public void Init()
 {
     characterBreak = BreakIterator.GetCharacterInstance();
     wordBreak      = BreakIterator.GetWordInstance();
     lineBreak      = BreakIterator.GetLineInstance();
     //Logln("Creating sentence iterator...");
     sentenceBreak = BreakIterator.GetSentenceInstance();
     //Logln("Finished creating sentence iterator...");
     titleBreak = BreakIterator.GetTitleInstance();
 }
Ejemplo n.º 7
0
        public void TestFilteredJapanese()
        {
            ULocale       loc = ULocale.JAPANESE;
            BreakIterator brk = FilteredBreakIteratorBuilder
                                .GetInstance(loc)
                                .WrapIteratorWithFilter(BreakIterator.GetSentenceInstance(loc));

            brk.SetText("OKです。");
            assertEquals("Starting point", 0, brk.Current);
            assertEquals("Next point", 5, brk.Next());
            assertEquals("Last point", BreakIterator.Done, brk.Next());
        }
Ejemplo n.º 8
0
 private static BreakIterator LoadSentenceProto()
 {
     UninterruptableMonitor.Enter(syncLock);
     try
     {
         return(BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture));
     }
     finally
     {
         UninterruptableMonitor.Exit(syncLock);
     }
 }
Ejemplo n.º 9
0
            public void doTest()
            {
                BreakIterator brkIter;

                switch (type)
                {
                case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.GetCharacterInstance(locale); break;

                case BreakIterator.KIND_WORD: brkIter = BreakIterator.GetWordInstance(locale); break;

                case BreakIterator.KIND_LINE: brkIter = BreakIterator.GetLineInstance(locale); break;

                case BreakIterator.KIND_SENTENCE: brkIter = BreakIterator.GetSentenceInstance(locale); break;

                default: Errln("Unsupported break iterator type " + type); return;
                }
                brkIter.SetText(text);
                int[] foundOffsets = new int[maxOffsetCount];
                int   offset, foundOffsetsCount = 0;

                // do forwards iteration test
                while (foundOffsetsCount < maxOffsetCount && (offset = brkIter.Next()) != BreakIterator.Done)
                {
                    foundOffsets[foundOffsetsCount++] = offset;
                }
                if (!offsetsMatchExpected(foundOffsets, foundOffsetsCount))
                {
                    // log error for forwards test
                    String textToDisplay = (text.Length <= 16) ? text : text.Substring(0, 16 - 0); // ICU4N: Checked 2nd parameter
                    Errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
                          "; expect " + expectOffsets.Length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.Length) +
                          "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount));
                }
                else
                {
                    // do backwards iteration test
                    --foundOffsetsCount; // back off one from the end offset
                    while (foundOffsetsCount > 0)
                    {
                        offset = brkIter.Previous();
                        if (offset != foundOffsets[--foundOffsetsCount])
                        {
                            // log error for backwards test
                            String textToDisplay = (text.Length <= 16) ? text : text.Substring(0, 16 - 0); // ICU4N: Checked 2nd parameter
                            Errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
                                  "; expect " + expectOffsets.Length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.Length) +
                                  "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount]);
                            break;
                        }
                    }
                }
            }
Ejemplo n.º 10
0
        public virtual void TestConsumeSentenceInstance()
        {
            // we use the default locale, as its randomized by LuceneTestCase
            BreakIterator bi = BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);
            var           ci = CharArrayIterator.NewSentenceInstance();

            for (var i = 0; i < 10000; i++)
            {
                var text = TestUtil.RandomUnicodeString(Random()).toCharArray();
                ci.SetText(text, 0, text.Length);
                Consume(bi, ci);
            }
        }
Ejemplo n.º 11
0
        public void TestICUSentenceBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase
            BreakIterator    bi      = BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application");
            int expected = TEXT.IndexOf("It is a");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("application that requires") + "application that requires\n".Length;
            TestFindEndOffset(text, start, expected, scanner);
        }
Ejemplo n.º 12
0
        private string AdjustForUsageAndContext(CapitalizationContextUsage usage, string name)
        {
            if (name != null && name.Length > 0 && UChar.IsLower(name.CodePointAt(0)) &&
                (displayContextOptions.Capitalization == Capitalization.BeginningOfSentence ||
                 (capitalizationUsage != null && capitalizationUsage[(int)usage])))
            {
                // Note, won't have capitalizationUsage != null && capitalizationUsage[usage.ordinal()]
                // unless capitalization is CAPITALIZATION_FOR_UI_LIST_OR_MENU or CAPITALIZATION_FOR_STANDALONE

                // should only happen when deserializing, etc.
                LazyInitializer.EnsureInitialized(ref capitalizationBrkIter, () => BreakIterator.GetSentenceInstance(locale));
                return(UChar.ToTitleCase(locale, name, capitalizationBrkIter,
                                         UChar.TitleCaseNoLowerCase | UChar.TitleCaseNoBreakAdjustment));
            }
            return(name);
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Returns a <see cref="BreakIterator"/> that ignores newline characters and
        /// breaks on sentences that do not start with capital letters
        /// similar to the JDK, but otherwise has the default word break functionality
        /// described at <a href="http://userguide.icu-project.org/boundaryanalysis">http://userguide.icu-project.org/boundaryanalysis</a>.
        /// </summary>
        /// <remarks>
        /// NOTE: If the culture is Thai, Lao, Burmese, Khmer, Japanese, Korean, or Chinese,
        /// the instance returned has the same dictionary-based <see cref="BreakIterator"/> behavior
        /// as if you call <see cref="BreakIterator.GetWordInstance(CultureInfo)"/>. See the
        /// section titled "Details about Dictionary-Based Break Iteration" at
        /// <a href="http://userguide.icu-project.org/boundaryanalysis">http://userguide.icu-project.org/boundaryanalysis</a>.
        /// </remarks>
        /// <param name="culture">The culture of the <see cref="BreakIterator"/> instance to return.</param>
        /// <returns>A sentence <see cref="BreakIterator"/> instance.</returns>
        public static BreakIterator GetSentenceInstance(CultureInfo culture)
        {
            switch (culture.TwoLetterISOLanguageName)
            {
            case "th":     // Thai
            case "lo":     // Lao
            case "my":     // Burmese
            case "km":     // Khmer
            case "ja":     // Japanese
            case "ko":     // Korean
            case "zh":     // Chinese
                return(BreakIterator.GetSentenceInstance(culture));
            }

            return(SentenceInstance);
        }
Ejemplo n.º 14
0
 private string AdjustForUsageAndContext(CapitalizationContextUsage usage, String name)
 {
     if (name != null && name.Length > 0 && UCharacter.IsLowerCase(name.CodePointAt(0)) &&
         (capitalization == DisplayContext.CapitalizationForBeginningOfSentence ||
          (capitalizationUsage != null && capitalizationUsage[(int)usage])))
     {
         // Note, won't have capitalizationUsage != null && capitalizationUsage[usage.ordinal()]
         // unless capitalization is CAPITALIZATION_FOR_UI_LIST_OR_MENU or CAPITALIZATION_FOR_STANDALONE
         lock (this)
         {
             if (capitalizationBrkIter == null)
             {
                 // should only happen when deserializing, etc.
                 capitalizationBrkIter = BreakIterator.GetSentenceInstance(locale);
             }
             return(UCharacter.ToTitleCase(locale, name, capitalizationBrkIter,
                                           UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT));
         }
     }
     return(name);
 }
Ejemplo n.º 15
0
        public void TestSentenceInvariants()
        {
            BreakIterator e = BreakIterator.GetSentenceInstance();

            doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
        }
Ejemplo n.º 16
0
        public void TestNullLocale()
        {
            CultureInfo loc  = null;
            ULocale     uloc = null;

            BreakIterator brk;

            // Character
            try
            {
                brk = BreakIterator.GetCharacterInstance(loc);
                Errln("getCharacterInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetCharacterInstance(uloc);
                Errln("getCharacterInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Line
            try
            {
                brk = BreakIterator.GetLineInstance(loc);
                Errln("getLineInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetLineInstance(uloc);
                Errln("getLineInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Sentence
            try
            {
                brk = BreakIterator.GetSentenceInstance(loc);
                Errln("getSentenceInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetSentenceInstance(uloc);
                Errln("getSentenceInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Title
            try
            {
                brk = BreakIterator.GetTitleInstance(loc);
                Errln("getTitleInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetTitleInstance(uloc);
                Errln("getTitleInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Word
            try
            {
                brk = BreakIterator.GetWordInstance(loc);
                Errln("getWordInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetWordInstance(uloc);
                Errln("getWordInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
        }
Ejemplo n.º 17
0
 /// <summary>
 /// Returns the <see cref="BreakIterator"/> to use for
 /// dividing text into passages.  This instantiates an
 /// <see cref="BreakIterator.GetSentenceInstance(CultureInfo)"/> by default;
 /// subclasses can override to customize.
 /// </summary>
 protected virtual BreakIterator GetBreakIterator(string field)
 {
     return(BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture));
 }
Ejemplo n.º 18
0
        public void TestFilteredBreakIteratorBuilder()
        {
            FilteredBreakIteratorBuilder builder;
            BreakIterator baseBI;
            BreakIterator filteredBI;

            String text      = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
            String ABBR_MR   = "Mr.";
            String ABBR_CAPT = "Capt.";

            {
                Logln("Constructing empty builder\n");
                builder = FilteredBreakIteratorBuilder.GetEmptyInstance();

                Logln("Constructing base BI\n");
                baseBI = BreakIterator.GetSentenceInstance(new CultureInfo("en"));

                Logln("Building new BI\n");
                filteredBI = builder.WrapIteratorWithFilter(baseBI);

                assertDefaultBreakBehavior(filteredBI, text);
            }

            {
                Logln("Constructing empty builder\n");
                builder = FilteredBreakIteratorBuilder.GetEmptyInstance();

                Logln("Adding Mr. as an exception\n");

                assertEquals("2.1 suppressBreakAfter", true, builder.SuppressBreakAfter(ABBR_MR));
                assertEquals("2.2 suppressBreakAfter", false, builder.SuppressBreakAfter(ABBR_MR));
                assertEquals("2.3 unsuppressBreakAfter", true, builder.UnsuppressBreakAfter(ABBR_MR));
                assertEquals("2.4 unsuppressBreakAfter", false, builder.UnsuppressBreakAfter(ABBR_MR));
                assertEquals("2.5 suppressBreakAfter", true, builder.SuppressBreakAfter(ABBR_MR));

                Logln("Constructing base BI\n");
                baseBI = BreakIterator.GetSentenceInstance(new CultureInfo("en"));

                Logln("Building new BI\n");
                filteredBI = builder.WrapIteratorWithFilter(baseBI);

                Logln("Testing:");
                filteredBI.SetText(text);
                assertEquals("2nd next", 84, filteredBI.Next());
                assertEquals("2nd next", 90, filteredBI.Next());
                assertEquals("2nd next", 278, filteredBI.Next());
                filteredBI.First();
            }


            {
                Logln("Constructing empty builder\n");
                builder = FilteredBreakIteratorBuilder.GetEmptyInstance();

                Logln("Adding Mr. and Capt as an exception\n");
                assertEquals("3.1 suppressBreakAfter", true, builder.SuppressBreakAfter(ABBR_MR));
                assertEquals("3.2 suppressBreakAfter", true, builder.SuppressBreakAfter(ABBR_CAPT));

                Logln("Constructing base BI\n");
                baseBI = BreakIterator.GetSentenceInstance(new CultureInfo("en"));

                Logln("Building new BI\n");
                filteredBI = builder.WrapIteratorWithFilter(baseBI);

                Logln("Testing:");
                filteredBI.SetText(text);
                assertEquals("3rd next", 84, filteredBI.Next());
                assertEquals("3rd next", 278, filteredBI.Next());
                filteredBI.First();
            }

            {
                Logln("Constructing English builder\n");
                builder = FilteredBreakIteratorBuilder.GetInstance(ULocale.ENGLISH);

                Logln("Constructing base BI\n");
                baseBI = BreakIterator.GetSentenceInstance(new CultureInfo("en"));

                Logln("unsuppressing 'Capt'");
                assertEquals("1st suppressBreakAfter", true, builder.UnsuppressBreakAfter(ABBR_CAPT));

                Logln("Building new BI\n");
                filteredBI = builder.WrapIteratorWithFilter(baseBI);

                if (filteredBI != null)
                {
                    Logln("Testing:");
                    filteredBI.SetText(text);
                    assertEquals("4th next", 84, filteredBI.Next());
                    assertEquals("4th next", 90, filteredBI.Next());
                    assertEquals("4th next", 278, filteredBI.Next());
                    filteredBI.First();
                }
            }

            {
                Logln("Constructing English builder\n");
                builder = FilteredBreakIteratorBuilder.GetInstance(ULocale.ENGLISH);

                Logln("Constructing base BI\n");
                baseBI = BreakIterator.GetSentenceInstance(new CultureInfo("en"));

                Logln("Building new BI\n");
                filteredBI = builder.WrapIteratorWithFilter(baseBI);

                if (filteredBI != null)
                {
                    assertEnglishBreakBehavior(filteredBI, text);
                }
            }

            {
                Logln("Constructing English @ss=standard\n");
                filteredBI = BreakIterator.GetSentenceInstance(ULocale.ForLanguageTag("en-US-u-ss-standard"));

                if (filteredBI != null)
                {
                    assertEnglishBreakBehavior(filteredBI, text);
                }
            }

            {
                Logln("Constructing Afrikaans @ss=standard - should be == default\n");
                filteredBI = BreakIterator.GetSentenceInstance(ULocale.ForLanguageTag("af-u-ss-standard"));

                assertDefaultBreakBehavior(filteredBI, text);
            }

            {
                Logln("Constructing Japanese @ss=standard - should be == default\n");
                filteredBI = BreakIterator.GetSentenceInstance(ULocale.ForLanguageTag("ja-u-ss-standard"));

                assertDefaultBreakBehavior(filteredBI, text);
            }
            {
                Logln("Constructing tfg @ss=standard - should be == default\n");
                filteredBI = BreakIterator.GetSentenceInstance(ULocale.ForLanguageTag("tfg-u-ss-standard"));

                assertDefaultBreakBehavior(filteredBI, text);
            }

            {
                Logln("Constructing French builder");
                builder = FilteredBreakIteratorBuilder.GetInstance(ULocale.FRENCH);

                Logln("Constructing base BI\n");
                baseBI = BreakIterator.GetSentenceInstance(new CultureInfo("fr"));

                Logln("Building new BI\n");
                filteredBI = builder.WrapIteratorWithFilter(baseBI);

                if (filteredBI != null)
                {
                    assertFrenchBreakBehavior(filteredBI, text);
                }
            }
        }
Ejemplo n.º 19
0
        public void TestExtended()
        {
            TestParams tp = new TestParams();


            //
            //  Open and read the test data file.
            //
            StringBuilder testFileBuf = new StringBuilder();
            Stream        @is         = null;

            try
            {
                @is = typeof(RBBITestExtended).GetTypeInfo().Assembly.GetManifestResourceStream("ICU4N.Dev.Test.Rbbi.rbbitst.txt");
                if (@is == null)
                {
                    Errln("Could not open test data file rbbitst.txt");
                    return;
                }
                StreamReader isr = new StreamReader(@is, Encoding.UTF8);
                try
                {
                    int c;
                    int count = 0;
                    for (; ;)
                    {
                        c = isr.Read();
                        if (c < 0)
                        {
                            break;
                        }
                        count++;
                        if (c == 0xFEFF && count == 1)
                        {
                            // BOM in the test data file. Discard it.
                            continue;
                        }

                        testFileBuf.AppendCodePoint(c);
                    }
                }
                finally
                {
                    isr.Dispose();
                }
            }
            catch (IOException e)
            {
                Errln(e.ToString());
                try
                {
                    @is.Dispose();
                }
                catch (IOException ignored)
                {
                }
                return;
            }

            String testString = testFileBuf.ToString();


            const int PARSE_COMMENT = 1;
            const int PARSE_TAG     = 2;
            const int PARSE_DATA    = 3;
            const int PARSE_NUM     = 4;
            const int PARSE_RULES   = 5;

            int parseState = PARSE_TAG;

            int savedState = PARSE_TAG;

            int lineNum  = 1;
            int colStart = 0;
            int column   = 0;
            int charIdx  = 0;
            int i;

            int tagValue = 0;                                   // The numeric value of a <nnn> tag.

            StringBuilder rules          = new StringBuilder(); // Holds rules from a <rules> ... </rules> block
            int           rulesFirstLine = 0;                   // Line number of the start of current <rules> block

            int len = testString.Length;

            for (charIdx = 0; charIdx < len;)
            {
                int c = testString.CodePointAt(charIdx);
                charIdx++;
                if (c == '\r' && charIdx < len && testString[charIdx] == '\n')
                {
                    // treat CRLF as a unit
                    c = '\n';
                    charIdx++;
                }
                if (c == '\n' || c == '\r')
                {
                    lineNum++;
                    colStart = charIdx;
                }
                column = charIdx - colStart + 1;

                switch (parseState)
                {
                case PARSE_COMMENT:
                    if (c == 0x0a || c == 0x0d)
                    {
                        parseState = savedState;
                    }
                    break;

                case PARSE_TAG:
                {
                    if (c == '#')
                    {
                        parseState = PARSE_COMMENT;
                        savedState = PARSE_TAG;
                        break;
                    }
                    if (UCharacter.IsWhitespace(c))
                    {
                        break;
                    }
                    if (testString.StartsWith("<word>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetWordInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<char>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetCharacterInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<line>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetLineInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<sent>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetSentenceInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<title>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetTitleInstance(tp.currentLocale);
                        charIdx += 6;
                        break;
                    }
                    if (testString.StartsWith("<rules>", charIdx - 1) ||
                        testString.StartsWith("<badrules>", charIdx - 1))
                    {
                        charIdx        = testString.IndexOf('>', charIdx) + 1;
                        parseState     = PARSE_RULES;
                        rules.Length   = (0);
                        rulesFirstLine = lineNum;
                        break;
                    }

                    if (testString.StartsWith("<locale ", charIdx - 1))
                    {
                        int closeIndex = testString.IndexOf(">", charIdx);
                        if (closeIndex < 0)
                        {
                            Errln("line" + lineNum + ": missing close on <locale  tag.");
                            break;
                        }
                        String localeName = testString.Substring(charIdx + 6, closeIndex - (charIdx + 6));         // ICU4N: Corrected 2nd parameter
                        localeName       = localeName.Trim();
                        tp.currentLocale = new ULocale(localeName);
                        charIdx          = closeIndex + 1;
                        break;
                    }
                    if (testString.StartsWith("<data>", charIdx - 1))
                    {
                        parseState            = PARSE_DATA;
                        charIdx              += 5;
                        tp.dataToBreak.Length = (0);
                        Arrays.Fill(tp.expectedBreaks, 0);
                        Arrays.Fill(tp.srcCol, 0);
                        Arrays.Fill(tp.srcLine, 0);
                        break;
                    }

                    Errln("line" + lineNum + ": Tag expected in test file.");
                    return;
                    //parseState = PARSE_COMMENT;
                    //savedState = PARSE_DATA;
                }

                case PARSE_RULES:
                    if (testString.StartsWith("</rules>", charIdx - 1))
                    {
                        charIdx   += 7;
                        parseState = PARSE_TAG;
                        try
                        {
                            tp.bi = new RuleBasedBreakIterator(rules.ToString());
                        }
                        catch (ArgumentException e)
                        {
                            Errln(String.Format("rbbitst.txt:{0}  Error creating break iterator from rules.  {1}", lineNum, e));
                        }
                    }
                    else if (testString.StartsWith("</badrules>", charIdx - 1))
                    {
                        charIdx   += 10;
                        parseState = PARSE_TAG;
                        bool goodRules = true;
                        try
                        {
                            new RuleBasedBreakIterator(rules.ToString());
                        }
                        catch (ArgumentException e)
                        {
                            goodRules = false;
                        }
                        if (goodRules)
                        {
                            Errln(String.Format(
                                      "rbbitst.txt:{0}  Expected, but did not get, a failure creating break iterator from rules.",
                                      lineNum));
                        }
                    }
                    else
                    {
                        rules.AppendCodePoint(c);
                    }
                    break;

                case PARSE_DATA:
                    if (c == '•')
                    {
                        int breakIdx = tp.dataToBreak.Length;
                        tp.expectedBreaks[breakIdx] = -1;
                        tp.srcLine[breakIdx]        = lineNum;
                        tp.srcCol[breakIdx]         = column;
                        break;
                    }

                    if (testString.StartsWith("</data>", charIdx - 1))
                    {
                        // Add final entry to mappings from break location to source file position.
                        //  Need one extra because last break position returned is after the
                        //    last char in the data, not at the last char.
                        int idx = tp.dataToBreak.Length;
                        tp.srcLine[idx] = lineNum;
                        tp.srcCol[idx]  = column;

                        parseState = PARSE_TAG;
                        charIdx   += 6;

                        // RUN THE TEST!
                        executeTest(tp);
                        break;
                    }

                    if (testString.StartsWith("\\N{", charIdx - 1))
                    {
                        int nameEndIdx = testString.IndexOf('}', charIdx);
                        if (nameEndIdx == -1)
                        {
                            Errln("Error in named character in test file at line " + lineNum +
                                  ", col " + column);
                        }
                        // Named character, e.g. \N{COMBINING GRAVE ACCENT}
                        // Get the code point from the name and insert it into the test data.
                        String charName = testString.Substring(charIdx + 2, nameEndIdx - (charIdx + 2));     // ICU4N: Corrected 2nd parameter
                        c = UCharacter.GetCharFromName(charName);
                        if (c == -1)
                        {
                            Errln("Error in named character in test file at line " + lineNum +
                                  ", col " + column);
                        }
                        else
                        {
                            // Named code point was recognized.  Insert it
                            //   into the test data.
                            tp.dataToBreak.AppendCodePoint(c);
                            for (i = tp.dataToBreak.Length - 1; i >= 0 && tp.srcLine[i] == 0; i--)
                            {
                                tp.srcLine[i] = lineNum;
                                tp.srcCol[i]  = column;
                            }
                        }
                        if (nameEndIdx > charIdx)
                        {
                            charIdx = nameEndIdx + 1;
                        }
                        break;
                    }

                    if (testString.StartsWith("<>", charIdx - 1))
                    {
                        charIdx++;
                        int breakIdx = tp.dataToBreak.Length;
                        tp.expectedBreaks[breakIdx] = -1;
                        tp.srcLine[breakIdx]        = lineNum;
                        tp.srcCol[breakIdx]         = column;
                        break;
                    }

                    if (c == '<')
                    {
                        tagValue   = 0;
                        parseState = PARSE_NUM;
                        break;
                    }

                    if (c == '#' && column == 3)
                    {       // TODO:  why is column off so far?
                        parseState = PARSE_COMMENT;
                        savedState = PARSE_DATA;
                        break;
                    }

                    if (c == '\\')
                    {
                        // Check for \ at end of line, a line continuation.
                        //     Advance over (discard) the newline
                        int cp = testString.CodePointAt(charIdx);
                        if (cp == '\r' && charIdx < len && testString.CodePointAt(charIdx + 1) == '\n')
                        {
                            // We have a CR LF
                            //  Need an extra increment of the input ptr to move over both of them
                            charIdx++;
                        }
                        if (cp == '\n' || cp == '\r')
                        {
                            lineNum++;
                            column = 0;
                            charIdx++;
                            colStart = charIdx;
                            break;
                        }

                        // Let unescape handle the back slash.
                        int[] charIdxAr = new int[1];
                        charIdxAr[0] = charIdx;
                        cp           = Utility.UnescapeAt(testString, charIdxAr);
                        if (cp != -1)
                        {
                            // Escape sequence was recognized.  Insert the char
                            //   into the test data.
                            charIdx = charIdxAr[0];
                            tp.dataToBreak.AppendCodePoint(cp);
                            for (i = tp.dataToBreak.Length - 1; i >= 0 && tp.srcLine[i] == 0; i--)
                            {
                                tp.srcLine[i] = lineNum;
                                tp.srcCol[i]  = column;
                            }

                            break;
                        }


                        // Not a recognized backslash escape sequence.
                        // Take the next char as a literal.
                        //  TODO:  Should this be an error?
                        c       = testString.CodePointAt(charIdx);
                        charIdx = testString.OffsetByCodePoints(charIdx, 1);
                    }

                    // Normal, non-escaped data char.
                    tp.dataToBreak.AppendCodePoint(c);

                    // Save the mapping from offset in the data to line/column numbers in
                    //   the original input file.  Will be used for better error messages only.
                    //   If there's an expected break before this char, the slot in the mapping
                    //     vector will already be set for this char; don't overwrite it.
                    for (i = tp.dataToBreak.Length - 1; i >= 0 && tp.srcLine[i] == 0; i--)
                    {
                        tp.srcLine[i] = lineNum;
                        tp.srcCol[i]  = column;
                    }
                    break;


                case PARSE_NUM:
                    // We are parsing an expected numeric tag value, like <1234>,
                    //   within a chunk of data.
                    if (UCharacter.IsWhitespace(c))
                    {
                        break;
                    }

                    if (c == '>')
                    {
                        // Finished the number.  Add the info to the expected break data,
                        //   and switch parse state back to doing plain data.
                        parseState = PARSE_DATA;
                        if (tagValue == 0)
                        {
                            tagValue = -1;
                        }
                        int breakIdx = tp.dataToBreak.Length;
                        tp.expectedBreaks[breakIdx] = tagValue;
                        tp.srcLine[breakIdx]        = lineNum;
                        tp.srcCol[breakIdx]         = column;
                        break;
                    }

                    if (UCharacter.IsDigit(c))
                    {
                        tagValue = tagValue * 10 + UCharacter.Digit(c);
                        break;
                    }

                    Errln(String.Format("Syntax Error in rbbitst.txt at line {0}, col {1}", lineNum, column));
                    return;
                }
            }

            // Reached end of test file. Raise an error if parseState indicates that we are
            //   within a block that should have been terminated.
            if (parseState == PARSE_RULES)
            {
                Errln(String.Format("rbbitst.txt:{0} <rules> block beginning at line {1} is not closed.",
                                    lineNum, rulesFirstLine));
            }
            if (parseState == PARSE_DATA)
            {
                Errln(String.Format("rbbitst.txt:{0} <data> block not closed.", lineNum));
            }
        }
Ejemplo n.º 20
0
 public WholeSentenceTokenizer(TextReader reader)
     : base(reader, BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture))
 {
     termAtt   = AddAttribute <ICharTermAttribute>();
     offsetAtt = AddAttribute <IOffsetAttribute>();
 }
Ejemplo n.º 21
0
        public void TestRegUnreg()
        {
            CultureInfo thailand_locale = new CultureInfo("th-TH");
            // ICU4N: Arbitrary locales are not allowed in .NET
            //CultureInfo foo_locale = new CultureInfo("fu-FU");
            BreakIterator jwbi = BreakIterator.GetWordInstance(new CultureInfo("ja"));
            BreakIterator uwbi = BreakIterator.GetWordInstance(new CultureInfo("en-US"));
            BreakIterator usbi = BreakIterator.GetSentenceInstance(new CultureInfo("en-US"));
            BreakIterator twbi = BreakIterator.GetWordInstance(thailand_locale);
            BreakIterator rwbi = BreakIterator.GetWordInstance(CultureInfo.InvariantCulture);  // (new Locale("", "", ""));

            BreakIterator sbi = (BreakIterator)usbi.Clone();

            // todo: this will cause the test to fail, no way to set a breakiterator to null text so can't fix yet.
            // String text = "This is some test, by golly. Boy, they don't make tests like they used to, do they?  This here test ain't worth $2.50.  Nope.";
            // sbi.setText(text);

            assertTrue(!BreakIterator.Unregister(""), "unregister before register"); // coverage

            // ICU4N: Arbitrary locales are not allowed in .NET
            //object key0 = BreakIterator.RegisterInstance((BreakIterator)twbi.Clone(), foo_locale, BreakIterator.KIND_WORD);
            object key1 = BreakIterator.RegisterInstance(sbi, new CultureInfo("en-US"), BreakIterator.KIND_WORD);
            object key2 = BreakIterator.RegisterInstance((BreakIterator)twbi.Clone(), new CultureInfo("en-US"), BreakIterator.KIND_WORD);

            {
                BreakIterator test0 = BreakIterator.GetWordInstance(new CultureInfo("ja"));
                BreakIterator test1 = BreakIterator.GetWordInstance(new CultureInfo("en-US"));
                BreakIterator test2 = BreakIterator.GetSentenceInstance(new CultureInfo("en-US"));
                BreakIterator test3 = BreakIterator.GetWordInstance(thailand_locale);
                // ICU4N: Arbitrary locales are not allowed in .NET
                //BreakIterator test4 = BreakIterator.GetWordInstance(foo_locale);

                assertEqual(test0, jwbi, "japan word == japan word");
                assertEqual(test1, twbi, "us word == thai word");
                assertEqual(test2, usbi, "us sentence == us sentence");
                assertEqual(test3, twbi, "thai word == thai word");
                // ICU4N: Arbitrary locales are not allowed in .NET
                //assertEqual(test4, twbi, "foo word == thai word");
            }

            //Locale[] locales = BreakIterator.getAvailableLocales();

            assertTrue(BreakIterator.Unregister(key2), "unregister us word (thai word)");
            assertTrue(!BreakIterator.Unregister(key2), "unregister second time");
            bool error = false;

            try
            {
                BreakIterator.Unregister(null);
            }
            catch (ArgumentException e)
            {
                error = true;
            }

            assertTrue(error, "unregister null");

            {
                CharacterIterator sci = BreakIterator.GetWordInstance(new CultureInfo("en-US")).Text;
                int len = sci.EndIndex - sci.BeginIndex;
                assertEqual(len, 0, "us word text: " + getString(sci));
            }

            // ICU4N: Arbitrary locales are not allowed in .NET
            //assertTrue((BreakIterator.GetAvailableLocales().ToList()).Contains(foo_locale), "foo_locale");
            //assertTrue(BreakIterator.Unregister(key0), "unregister foo word (thai word)");
            //assertTrue(!(BreakIterator.GetAvailableLocales().ToList()).Contains(foo_locale), "no foo_locale");
            assertEqual(BreakIterator.GetWordInstance(new CultureInfo("en-US")), usbi, "us word == us sentence");

            assertTrue(BreakIterator.Unregister(key1), "unregister us word (us sentence)");
            {
                BreakIterator test0 = BreakIterator.GetWordInstance(new CultureInfo("ja"));
                BreakIterator test1 = BreakIterator.GetWordInstance(new CultureInfo("en-US"));
                BreakIterator test2 = BreakIterator.GetSentenceInstance(new CultureInfo("en-US"));
                BreakIterator test3 = BreakIterator.GetWordInstance(thailand_locale);
                // ICU4N: Arbitrary locales are not allowed in .NET
                //BreakIterator test4 = BreakIterator.GetWordInstance(foo_locale);

                assertEqual(test0, jwbi, "japanese word break");
                assertEqual(test1, uwbi, "us sentence-word break");
                assertEqual(test2, usbi, "us sentence break");
                assertEqual(test3, twbi, "thai word break");
                // ICU4N: Arbitrary locales are not allowed in .NET
                //assertEqual(test4, rwbi, "root word break");

                CharacterIterator sci = test1.Text;
                int len = sci.EndIndex - sci.BeginIndex;
                assertEqual(len, 0, "us sentence-word break text: " + getString(sci));
            }
        }
Ejemplo n.º 22
0
        public void TestLastPreviousPreceding()
        {
            int    p, q;
            String testString = "This is a word break. Isn't it? 2.25 dollars";

            Logln("Testing last(),previous(), preceding() with custom rules");
            Logln("testing word iteration for string \"" + testString + "\"");
            RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(new CultureInfo("en"));

            wordIter1.SetText(testString);
            p = wordIter1.Last();
            if (p != testString.Length)
            {
                Errln("ERROR: last() returned" + p + "instead of" + testString.Length);
            }
            q = wordIter1.Previous();
            doTest(testString, p, q, 37, "dollars");
            p = q;
            q = wordIter1.Previous();
            doTest(testString, p, q, 36, " ");
            q = wordIter1.Preceding(25);
            doTest(testString, 25, q, 22, "Isn");
            p = q;
            q = wordIter1.Previous();
            doTest(testString, p, q, 21, " ");
            q = wordIter1.Preceding(20);
            doTest(testString, 20, q, 15, "break");
            p = wordIter1.Preceding(wordIter1.First());
            if (p != BreakIterator.Done)
            {
                Errln("ERROR: preceding()  at starting position returned #" + p + " instead of 0");
            }
            testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This  costs $20,00,000.";
            Logln("testing sentence iter - String:- \"" + testString + "\"");
            RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);

            sentIter1.SetText(testString);
            p = sentIter1.Last();
            if (p != testString.Length)
            {
                Errln("ERROR: last() returned" + p + "instead of " + testString.Length);
            }
            q = sentIter1.Previous();
            doTest(testString, p, q, 60, "This  costs $20,00,000.");
            p = q;
            q = sentIter1.Previous();
            doTest(testString, p, q, 41, "How are you doing? ");
            q = sentIter1.Preceding(40);
            doTest(testString, 40, q, 31, "Thankyou.");
            q = sentIter1.Preceding(25);
            doTest(testString, 25, q, 20, "I'am ");
            sentIter1.First();
            p = sentIter1.Previous();
            q = sentIter1.Preceding(sentIter1.First());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: previous()/preceding() at starting position returned #"
                      + p + " and " + q + " instead of 0\n");
            }
            testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.";
            Logln("testing line iter - String:- \"" + testString + "\"");
            RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(CultureInfo.CurrentCulture);

            lineIter1.SetText(testString);
            p = lineIter1.Last();
            if (p != testString.Length)
            {
                Errln("ERROR: last() returned" + p + "instead of " + testString.Length);
            }
            q = lineIter1.Previous();
            doTest(testString, p, q, 72, "$20,00,000.");
            p = q;
            q = lineIter1.Previous();
            doTest(testString, p, q, 66, "costs ");
            q = lineIter1.Preceding(40);
            doTest(testString, 40, q, 31, "Thankyou.");
            q = lineIter1.Preceding(25);
            doTest(testString, 25, q, 20, "I'am ");
            lineIter1.First();
            p = lineIter1.Previous();
            q = lineIter1.Preceding(sentIter1.First());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: previous()/preceding() at starting position returned #"
                      + p + " and " + q + " instead of 0\n");
            }
        }
Ejemplo n.º 23
0
        public void TestFirstNextFollowing()
        {
            int    p, q;
            String testString = "This is a word break. Isn't it? 2.25";

            Logln("Testing first() and next(), following() with custom rules");
            Logln("testing word iterator - string :- \"" + testString + "\"\n");
            RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator)BreakIterator.GetWordInstance(CultureInfo.CurrentCulture);

            wordIter1.SetText(testString);
            p = wordIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = wordIter1.Next(9);
            doTest(testString, p, q, 20, "This is a word break");
            p = q;
            q = wordIter1.Next();
            doTest(testString, p, q, 21, ".");
            p = q;
            q = wordIter1.Next(3);
            doTest(testString, p, q, 28, " Isn't ");
            p = q;
            q = wordIter1.Next(2);
            doTest(testString, p, q, 31, "it?");
            q = wordIter1.Following(2);
            doTest(testString, 2, q, 4, "is");
            q = wordIter1.Following(22);
            doTest(testString, 22, q, 27, "Isn't");
            wordIter1.Last();
            p = wordIter1.Next();
            q = wordIter1.Following(wordIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: next()/following() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length + "\n");
            }
            RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator)BreakIterator.GetCharacterInstance(CultureInfo.CurrentCulture);

            testString = "Write hindi here. ";
            Logln("testing char iter - string:- \"" + testString + "\"");
            charIter1.SetText(testString);
            p = charIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = charIter1.Next();
            doTest(testString, p, q, 1, "W");
            p = q;
            q = charIter1.Next(4);
            doTest(testString, p, q, 5, "rite");
            p = q;
            q = charIter1.Next(12);
            doTest(testString, p, q, 17, " hindi here.");
            p = q;
            q = charIter1.Next(-6);
            doTest(testString, p, q, 11, " here.");
            p = q;
            q = charIter1.Next(6);
            doTest(testString, p, q, 17, " here.");
            p = charIter1.Following(charIter1.Last());
            q = charIter1.Next(charIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: following()/next() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length);
            }
            testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This  costs $20,00,000.";
            RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);

            Logln("testing sentence iter - String:- \"" + testString + "\"");
            sentIter1.SetText(testString);
            p = sentIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = sentIter1.Next();
            doTest(testString, p, q, 7, "Hello! ");
            p = q;
            q = sentIter1.Next(2);
            doTest(testString, p, q, 31, "how are you? I'am fine. ");
            p = q;
            q = sentIter1.Next(-2);
            doTest(testString, p, q, 7, "how are you? I'am fine. ");
            p = q;
            q = sentIter1.Next(4);
            doTest(testString, p, q, 60, "how are you? I'am fine. Thankyou. How are you doing? ");
            p = q;
            q = sentIter1.Next();
            doTest(testString, p, q, 83, "This  costs $20,00,000.");
            q = sentIter1.Following(1);
            doTest(testString, 1, q, 7, "ello! ");
            q = sentIter1.Following(10);
            doTest(testString, 10, q, 20, " are you? ");
            q = sentIter1.Following(20);
            doTest(testString, 20, q, 31, "I'am fine. ");
            p = sentIter1.Following(sentIter1.Last());
            q = sentIter1.Next(sentIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: following()/next() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length);
            }
            testString = "Hello! how\r\n (are)\r you? I'am fine- Thankyou. foo\u00a0bar How, are, you? This, costs $20,00,000.";
            Logln("(UnicodeString)testing line iter - String:- \"" + testString + "\"");
            RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator)BreakIterator.GetLineInstance(CultureInfo.CurrentCulture);

            lineIter1.SetText(testString);
            p = lineIter1.First();
            if (p != 0)
            {
                Errln("ERROR: first() returned" + p + "instead of 0");
            }
            q = lineIter1.Next();
            doTest(testString, p, q, 7, "Hello! ");
            p = q;
            p = q;
            q = lineIter1.Next(4);
            doTest(testString, p, q, 20, "how\r\n (are)\r ");
            p = q;
            q = lineIter1.Next(-4);
            doTest(testString, p, q, 7, "how\r\n (are)\r ");
            p = q;
            q = lineIter1.Next(6);
            doTest(testString, p, q, 30, "how\r\n (are)\r you? I'am ");
            p = q;
            q = lineIter1.Next();
            doTest(testString, p, q, 36, "fine- ");
            p = q;
            q = lineIter1.Next(2);
            doTest(testString, p, q, 54, "Thankyou. foo\u00a0bar ");
            q = lineIter1.Following(60);
            doTest(testString, 60, q, 64, "re, ");
            q = lineIter1.Following(1);
            doTest(testString, 1, q, 7, "ello! ");
            q = lineIter1.Following(10);
            doTest(testString, 10, q, 12, "\r\n");
            q = lineIter1.Following(20);
            doTest(testString, 20, q, 25, "you? ");
            p = lineIter1.Following(lineIter1.Last());
            q = lineIter1.Next(lineIter1.Last());
            if (p != BreakIterator.Done || q != BreakIterator.Done)
            {
                Errln("ERROR: following()/next() at last position returned #"
                      + p + " and " + q + " instead of" + testString.Length);
            }
        }
Ejemplo n.º 24
0
        public LocaleDisplayNamesImpl(ULocale locale, params DisplayContext[] contexts)
#pragma warning disable 612, 618
            : base()
#pragma warning restore 612, 618
        {
            DialectHandling dialectHandling    = DialectHandling.STANDARD_NAMES;
            DisplayContext  capitalization     = DisplayContext.CapitalizationNone;
            DisplayContext  nameLength         = DisplayContext.LengthFull;
            DisplayContext  substituteHandling = DisplayContext.Substitute;

            foreach (DisplayContext contextItem in contexts)
            {
                switch (contextItem.Type())
                {
                case DisplayContextType.DialectHandling:
                    dialectHandling = (contextItem.Value() == DisplayContext.StandardNames.Value()) ?
                                      DialectHandling.STANDARD_NAMES : DialectHandling.DIALECT_NAMES;
                    break;

                case DisplayContextType.Capitalization:
                    capitalization = contextItem;
                    break;

                case DisplayContextType.DisplayLength:
                    nameLength = contextItem;
                    break;

                case DisplayContextType.SubstituteHandling:
                    substituteHandling = contextItem;
                    break;

                default:
                    break;
                }
            }

            this.dialectHandling    = dialectHandling;
            this.capitalization     = capitalization;
            this.nameLength         = nameLength;
            this.substituteHandling = substituteHandling;
            this.langData           = LangDataTables.impl.Get(locale, substituteHandling == DisplayContext.NoSubstitute);
            this.regionData         = RegionDataTables.impl.Get(locale, substituteHandling == DisplayContext.NoSubstitute);
            this.locale             = ULocale.ROOT.Equals(langData.GetLocale()) ? regionData.GetLocale() :
                                      langData.GetLocale();

            // Note, by going through DataTable, this uses table lookup rather than straight lookup.
            // That should get us the same data, I think.  This way we don't have to explicitly
            // load the bundle again.  Using direct lookup didn't seem to make an appreciable
            // difference in performance.
            string sep = langData.Get("localeDisplayPattern", "separator");

            if (sep == null || "separator".Equals(sep))
            {
                sep = "{0}, {1}";
            }
            StringBuilder sb = new StringBuilder();

            this.separatorFormat = SimpleFormatterImpl.CompileToStringMinMaxArguments(sep, sb, 2, 2);

            string pattern = langData.Get("localeDisplayPattern", "pattern");

            if (pattern == null || "pattern".Equals(pattern))
            {
                pattern = "{0} ({1})";
            }
            this.format = SimpleFormatterImpl.CompileToStringMinMaxArguments(pattern, sb, 2, 2);
            if (pattern.Contains("("))
            {
                formatOpenParen         = '(';
                formatCloseParen        = ')';
                formatReplaceOpenParen  = '[';
                formatReplaceCloseParen = ']';
            }
            else
            {
                formatOpenParen         = '(';
                formatCloseParen        = ')';
                formatReplaceOpenParen  = '[';
                formatReplaceCloseParen = ']';
            }

            string keyTypePattern = langData.Get("localeDisplayPattern", "keyTypePattern");

            if (keyTypePattern == null || "keyTypePattern".Equals(keyTypePattern))
            {
                keyTypePattern = "{0}={1}";
            }
            this.keyTypeFormat = SimpleFormatterImpl.CompileToStringMinMaxArguments(
                keyTypePattern, sb, 2, 2);

            // Get values from the contextTransforms data if we need them
            // Also check whether we will need a break iterator (depends on the data)
            bool needBrkIter = false;

            if (capitalization == DisplayContext.CapitalizationForUIListOrMenu ||
                capitalization == DisplayContext.CapitalizationForStandalone)
            {
                capitalizationUsage = new bool[Enum.GetValues(typeof(CapitalizationContextUsage)).Length]; // initialized to all false
                ICUResourceBundle         rb   = (ICUResourceBundle)UResourceBundle.GetBundleInstance(ICUData.ICU_BASE_NAME, locale);
                CapitalizationContextSink sink = new CapitalizationContextSink(this);
                try
                {
                    rb.GetAllItemsWithFallback("contextTransforms", sink);
                }
                catch (MissingManifestResourceException e)
                {
                    // Silently ignore.  Not every locale has contextTransforms.
                }
                needBrkIter = sink.hasCapitalizationUsage;
            }
            // Get a sentence break iterator if we will need it
            if (needBrkIter || capitalization == DisplayContext.CapitalizationForBeginningOfSentence)
            {
                capitalizationBrkIter = BreakIterator.GetSentenceInstance(locale);
            }

            this.currencyDisplayInfo = CurrencyData.Provider.GetInstance(locale, false);
        }
Ejemplo n.º 25
0
 static CodeTokenizer()
 {
     CultureInfo.CurrentCulture = CultureInfo.InvariantCulture; // Workaround to fix the bug of ICN4N, refer https://github.com/NightOwl888/ICU4N/issues/29
     sentenceProto = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
 }
Ejemplo n.º 26
0
        public DataTableCultureDisplayNames(UCultureInfo culture, DisplayContextOptions options)
#pragma warning disable 612, 618
            : base()
#pragma warning restore 612, 618
        {
            this.displayContextOptions = options.Freeze();
            this.langData   = languageDataTableProvider.GetDataTable(culture, options.SubstituteHandling == SubstituteHandling.NoSubstitute);
            this.regionData = regionDataTableProvider.GetDataTable(culture, options.SubstituteHandling == SubstituteHandling.NoSubstitute);
            this.locale     = langData.CultureInfo != null && langData.CultureInfo.Equals(CultureInfo.InvariantCulture)
                ? regionData.CultureInfo.ToUCultureInfo()
                : langData.CultureInfo.ToUCultureInfo();

            // Note, by going through DataTable, this uses table lookup rather than straight lookup.
            // That should get us the same data, I think.  This way we don't have to explicitly
            // load the bundle again.  Using direct lookup didn't seem to make an appreciable
            // difference in performance.
            string sep = langData.Get("localeDisplayPattern", "separator");

            if (sep == null || "separator".Equals(sep))
            {
                sep = "{0}, {1}";
            }
            StringBuilder sb = new StringBuilder();

            this.separatorFormat = SimpleFormatterImpl.CompileToStringMinMaxArguments(sep, sb, 2, 2);

            string pattern = langData.Get("localeDisplayPattern", "pattern");

            if (pattern == null || "pattern".Equals(pattern))
            {
                pattern = "{0} ({1})";
            }
            this.format = SimpleFormatterImpl.CompileToStringMinMaxArguments(pattern, sb, 2, 2);
            if (pattern.Contains("("))
            {
                formatOpenParen         = '(';
                formatCloseParen        = ')';
                formatReplaceOpenParen  = '[';
                formatReplaceCloseParen = ']';
            }
            else
            {
                formatOpenParen         = '(';
                formatCloseParen        = ')';
                formatReplaceOpenParen  = '[';
                formatReplaceCloseParen = ']';
            }

            string keyTypePattern = langData.Get("localeDisplayPattern", "keyTypePattern");

            if (keyTypePattern == null || "keyTypePattern".Equals(keyTypePattern))
            {
                keyTypePattern = "{0}={1}";
            }
            this.keyTypeFormat = SimpleFormatterImpl.CompileToStringMinMaxArguments(
                keyTypePattern, sb, 2, 2);

            // Get values from the contextTransforms data if we need them
            // Also check whether we will need a break iterator (depends on the data)
            bool needBrkIter = false;

            if (options.Capitalization == Capitalization.UIListOrMenu ||
                options.Capitalization == Capitalization.Standalone)
            {
                capitalizationUsage = new bool[Enum.GetValues(typeof(CapitalizationContextUsage)).Length]; // initialized to all false
                ICUResourceBundle         rb   = (ICUResourceBundle)UResourceBundle.GetBundleInstance(ICUData.IcuBaseName, locale);
                CapitalizationContextSink sink = new CapitalizationContextSink(this);
                try
                {
                    rb.GetAllItemsWithFallback("contextTransforms", sink);
                }
                catch (MissingManifestResourceException)
                {
                    // Silently ignore.  Not every locale has contextTransforms.
                }
                needBrkIter = sink.hasCapitalizationUsage;
            }
            // Get a sentence break iterator if we will need it
            if (needBrkIter || options.Capitalization == Capitalization.BeginningOfSentence)
            {
                capitalizationBrkIter = BreakIterator.GetSentenceInstance(locale);
            }

            this.currencyDisplayInfo = CurrencyData.Provider.GetInstance(locale, false);
        }
Ejemplo n.º 27
0
 private static BreakIterator LoadSentenceProto()
 {
     lock (syncLock)
         return(BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture));
 }