internal static LanguagePlatform.Lingua.FST.FST CreateFST(System.Globalization.CultureInfo culture,
                                                                  bool appendWordTerminator)
        {
            NumberFormatData nfd
                = NumberPatternComputer.GetNumberFormatData(culture, true, true);

            string fstPattern = Lingua.Tokenization.NumberPatternComputer.ComputeFSTPattern(nfd,
                                                                                            true, appendWordTerminator);

            LanguagePlatform.Lingua.FST.FST fst = LanguagePlatform.Lingua.FST.FST.Create(fstPattern);

            fst.MakeDeterministic();

#if DEBUG
            bool dump = false;
            if (dump)
            {
                fst.Dump(String.Format("d:/temp/number-fst-{0}.txt", culture.Name));
            }
#endif

            return(fst);
        }
Exemplo n.º 2
0
        internal static LanguagePlatform.Lingua.FST.FST CreateFST(System.Globalization.CultureInfo culture,
                                                                  bool appendWordTerminator)
        {
            NumberFormatData nfd
                = NumberPatternComputer.GetNumberFormatData(culture, true, true);

            string numberPattern = Lingua.Tokenization.NumberPatternComputer.ComputeFSTPattern(nfd,
                                                                                               true, false);

            System.Text.StringBuilder sb = new StringBuilder(numberPattern);
            sb.Append("(");
            bool first = true;

            NumberPatternComputer.AppendDisjunction(sb, Core.CharacterProperties.Blanks, 'U', ref first);
            sb.Append(")?(");

            first = true;
            Core.Wordlist units = Core.Tokenization.PhysicalUnit.GetUnits(culture, false);
            foreach (string unit in units.Items)
            {
                if (first)
                {
                    first = false;
                }
                else
                {
                    sb.Append("|");
                }

                // append single unit, make sure that first char emits 'U' (in case no whitespace
                //  sep is in the input)
                sb.AppendFormat("(<{0}:U>", FST.FST.EscapeSpecial(unit[0]));
                string remainder = unit.Substring(1);
                if (!String.IsNullOrEmpty(remainder))
                {
                    sb.Append(FST.FST.EscapeSpecial(remainder));
                }
                sb.Append(")");
            }

            sb.Append(")");

            if (appendWordTerminator)
            {
                // Append "word terminator"
                sb.Append("#>");
            }

            LanguagePlatform.Lingua.FST.FST fst = LanguagePlatform.Lingua.FST.FST.Create(sb.ToString());

            fst.MakeDeterministic();

#if DEBUG
            bool dump = false;
            if (dump)
            {
                fst.Dump(String.Format("d:/temp/measure-fst-{0}.txt", culture.Name));
            }
#endif

            return(fst);
        }