コード例 #1
0
        /// <summary>
        /// Attempts to load the FSTs from the resource accessor.
        /// </summary>
        private List <CalendarDateTimePatterns> LoadPatterns(Core.Tokenization.DateTimePatternType types, bool allCalendars)
        {
            if (_Accessor == null)
            {
                return(null);
            }

            DateTimePatternType[] iter = new DateTimePatternType[]
            {
                DateTimePatternType.LongDate,
                DateTimePatternType.ShortDate,
                DateTimePatternType.LongTime,
                DateTimePatternType.ShortTime
            };

            List <CalendarDateTimePatterns> result = null;
            // we currently only support one culture pattern
            CalendarDateTimePatterns thePattern = null;

            foreach (DateTimePatternType t in iter)
            {
                if ((types & t) == 0)
                {
                    continue;
                }

                Core.Resources.LanguageResourceType rt = Core.Resources.LanguageResourceType.Undefined;

                switch (t)
                {
                case DateTimePatternType.LongDate:
                    rt = Core.Resources.LanguageResourceType.LongDateFST;
                    break;

                case DateTimePatternType.ShortDate:
                    rt = Core.Resources.LanguageResourceType.ShortDateFST;
                    break;

                case DateTimePatternType.ShortTime:
                    rt = Core.Resources.LanguageResourceType.ShortTimeFST;
                    break;

                case DateTimePatternType.LongTime:
                    rt = Core.Resources.LanguageResourceType.LongTimeFST;
                    break;

                default:
                    throw new Exception("Cannot map token type to corresponding resource type");
                }

                if (_Accessor.GetResourceStatus(_Culture, rt, false) != Core.Resources.ResourceStatus.NotAvailable)
                {
                    byte[] data = _Accessor.GetResourceData(_Culture, rt, false);
                    LanguagePlatform.Lingua.FST.FST fst = LanguagePlatform.Lingua.FST.FST.Create(data);

                    if (thePattern == null)
                    {
                        // TODO support the case where some (not all) FSTs are loaded from the resources
                        result = new List <CalendarDateTimePatterns>();
                        // TODO support all calendars
                        thePattern = new CalendarDateTimePatterns(_Culture, null);
                        result.Add(thePattern);
                    }

                    // TODO compute FIRST() for the FST at load time (or persistently store it?)
                    thePattern.Patterns.Add(new DateTimePattern(t, _Culture, "(unavailable)", fst));
                }
            }

            return(result);
        }
コード例 #2
0
        private CalendarDateTimePatterns ComputeSinglePattern(Core.Tokenization.DateTimePatternType types, System.Globalization.Calendar cal)
        {
            CalendarDateTimePatterns result;
            List <string>            probePatterns;

            // TODO this doesn't yet work with alternate calendars

            if (cal == null)
            {
                result = new CalendarDateTimePatterns(_Culture, _Culture.Calendar);

                probePatterns = new List <string>(_Culture.DateTimeFormat.GetAllDateTimePatterns());
            }
            else
            {
                result = new CalendarDateTimePatterns(_Culture, cal);

                System.Globalization.DateTimeFormatInfo tmp = (System.Globalization.DateTimeFormatInfo)_Culture.DateTimeFormat.Clone();
                tmp.Calendar = cal;

                probePatterns = new List <string>(tmp.GetAllDateTimePatterns());
            }

            // manually augment list of date/time patterns for some languages
            List <string> customPatters = GetCustomPatterns(_Culture);

            if (customPatters != null)
            {
                probePatterns.AddRange(customPatters);
            }

            List <string> patterns = new List <string>();

            // TODO

            /*
             * The current approach computes a transducer which will emit a canonicalized
             * representation of the token value which will later be parsed during
             * tokenization. Alternatively, we could directly emit the parse pattern and
             * skip the canonicalization.
             * */

            foreach (string p in probePatterns)
            {
                if (patterns.Contains(p) || IgnorePattern(_Culture, p))
                {
                    continue;
                }

                patterns.Add(p);

                string rx;
                DateTimePatternType patternType = ClassifyFormatString(p, out rx);
                if (patternType == DateTimePatternType.Unknown)
                {
                    continue;
                }

                if ((types & patternType) == 0)
                {
                    continue;
                }

                // TODO support addWordBoundary
#if DEBUG
                bool log = false;
                if (log)
                {
                    using (System.IO.StreamWriter output = new System.IO.StreamWriter(System.IO.Path.GetTempPath() + @"\builder.log", true, System.Text.Encoding.UTF8))
                    {
                        output.WriteLine("{0}:\r\nPattern: {1}\r\nExpression: {2}\r\n",
                                         _Culture.Name, p, rx);
                    }
                }
#endif

                LanguagePlatform.Lingua.FST.FST f = LanguagePlatform.Lingua.FST.FST.Create(rx);
                f.MakeDeterministic();
                result.Patterns.Add(new DateTimePattern(patternType, _Culture, p, f));
            }

            if (result.Patterns.Count > 0)
            {
                // combine FSTs of each pattern type into a single automaton

                result.Patterns.Sort((a, b) => (int)a.PatternType - (int)b.PatternType);

                List <DateTimePattern> combined = new List <DateTimePattern>();
                while (result.Patterns.Count > 0)
                {
                    int first             = 0;
                    DateTimePatternType t = result.Patterns[0].PatternType;
                    int last = first + 1;                     // intentionally starting at next pattern

                    List <FST.FST> alternatives = new List <Sdl.LanguagePlatform.Lingua.FST.FST>();

                    while (last < result.Patterns.Count && result.Patterns[last].PatternType == t)
                    {
                        alternatives.Add(result.Patterns[last].FST);
                        ++last;
                    }

                    result.Patterns[0].FST.Disjunct(alternatives);
                    result.Patterns[0].FST.MakeDeterministic();

                    combined.Add(result.Patterns[0]);
                    result.Patterns.RemoveRange(0, last - first);
                }

                result.Patterns.AddRange(combined);
            }

            return(result);
        }