예제 #1
0
 static JdkBreakIterator()
 {
     using (Stream @is =
                typeof(JdkBreakIterator).FindAndGetManifestResourceStream("jdksent.brk"))
     {
         SentenceInstance = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
     }
     using (Stream @is =
                typeof(JdkBreakIterator).FindAndGetManifestResourceStream("jdkword.brk"))
     {
         WordInstance = RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
     }
 }
예제 #2
0
 private static RuleBasedBreakIterator ReadBreakIterator(string filename)
 {
     using Stream @is = typeof(DefaultICUTokenizerConfig).FindAndGetManifestResourceStream(filename);
     try
     {
         RuleBasedBreakIterator bi =
             RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
         return(bi);
     }
     catch (IOException e)
     {
         throw new Exception(e.ToString(), e);
     }
 }
예제 #3
0
        private static BreakIterator CreateBreakInstance(ULocale locale, int kind)
        {
            RuleBasedBreakIterator iter = null;
            ICUResourceBundle      rb   = ICUResourceBundle.
                                          GetBundleInstance(ICUData.ICU_BRKITR_BASE_NAME, locale,
                                                            ICUResourceBundle.OpenType.LOCALE_ROOT);

            //
            //  Get the binary rules.
            //
            ByteBuffer bytes      = null;
            string     typeKeyExt = null;

            if (kind == BreakIterator.KIND_LINE)
            {
                string lbKeyValue = locale.GetKeywordValue("lb");
                if (lbKeyValue != null && (lbKeyValue.Equals("strict") || lbKeyValue.Equals("normal") || lbKeyValue.Equals("loose")))
                {
                    typeKeyExt = "_" + lbKeyValue;
                }
            }

            try
            {
                string typeKey       = (typeKeyExt == null) ? KIND_NAMES[kind] : KIND_NAMES[kind] + typeKeyExt;
                string brkfname      = rb.GetStringWithFallback("boundaries/" + typeKey);
                string rulesFileName = ICUData.ICU_BRKITR_NAME + '/' + brkfname;
                bytes = ICUBinary.GetData(rulesFileName);
            }
            catch (Exception e)
            {
                throw new MissingManifestResourceException(e.ToString(), e /*, "", ""*/);
            }

            //
            // Create a normal RuleBasedBreakIterator.
            //
            try
            {
#pragma warning disable 612, 618
                iter = RuleBasedBreakIterator.GetInstanceFromCompiledRules(bytes);
#pragma warning restore 612, 618
            }
            catch (IOException e)
            {
                // Shouldn't be possible to get here.
                // If it happens, the compiled rules are probably corrupted in some way.
                Assert.Fail(e);
            }
            // TODO: Determine valid and actual locale correctly.
            ULocale uloc = ULocale.ForLocale(rb.GetLocale());
            iter.SetLocale(uloc, uloc);
            iter.BreakType = kind;

            // filtered break
            if (kind == BreakIterator.KIND_SENTENCE)
            {
                string ssKeyword = locale.GetKeywordValue("ss");
                if (ssKeyword != null && ssKeyword.Equals("standard"))
                {
                    ULocale @base = new ULocale(locale.GetBaseName());
                    return(FilteredBreakIteratorBuilder.GetInstance(@base).WrapIteratorWithFilter(iter));
                }
            }

            return(iter);
        }
예제 #4
0
        private static readonly RuleBasedBreakIterator WordInstance     = LoadBreakRules("jdkword.brk"); // LUCENENET: CA1810: Initialize reference type static fields inline

        private static RuleBasedBreakIterator LoadBreakRules(string fileName)
        {
            using Stream @is = typeof(JdkBreakIterator).FindAndGetManifestResourceStream(fileName);
            return(RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is));
        }