/// <summary> /// Attempts to load the FSTs from the resource accessor. /// </summary> private List <CalendarDateTimePatterns> LoadPatterns(Core.Tokenization.DateTimePatternType types, bool allCalendars) { if (_Accessor == null) { return(null); } DateTimePatternType[] iter = new DateTimePatternType[] { DateTimePatternType.LongDate, DateTimePatternType.ShortDate, DateTimePatternType.LongTime, DateTimePatternType.ShortTime }; List <CalendarDateTimePatterns> result = null; // we currently only support one culture pattern CalendarDateTimePatterns thePattern = null; foreach (DateTimePatternType t in iter) { if ((types & t) == 0) { continue; } Core.Resources.LanguageResourceType rt = Core.Resources.LanguageResourceType.Undefined; switch (t) { case DateTimePatternType.LongDate: rt = Core.Resources.LanguageResourceType.LongDateFST; break; case DateTimePatternType.ShortDate: rt = Core.Resources.LanguageResourceType.ShortDateFST; break; case DateTimePatternType.ShortTime: rt = Core.Resources.LanguageResourceType.ShortTimeFST; break; case DateTimePatternType.LongTime: rt = Core.Resources.LanguageResourceType.LongTimeFST; break; default: throw new Exception("Cannot map token type to corresponding resource type"); } if (_Accessor.GetResourceStatus(_Culture, rt, false) != Core.Resources.ResourceStatus.NotAvailable) { byte[] data = _Accessor.GetResourceData(_Culture, rt, false); LanguagePlatform.Lingua.FST.FST fst = LanguagePlatform.Lingua.FST.FST.Create(data); if (thePattern == null) { // TODO support the case where some (not all) FSTs are loaded from the resources result = new List <CalendarDateTimePatterns>(); // TODO support all calendars thePattern = new CalendarDateTimePatterns(_Culture, null); result.Add(thePattern); } // TODO compute FIRST() for the FST at load time (or persistently store it?) thePattern.Patterns.Add(new DateTimePattern(t, _Culture, "(unavailable)", fst)); } } return(result); }
private CalendarDateTimePatterns ComputeSinglePattern(Core.Tokenization.DateTimePatternType types, System.Globalization.Calendar cal) { CalendarDateTimePatterns result; List <string> probePatterns; // TODO this doesn't yet work with alternate calendars if (cal == null) { result = new CalendarDateTimePatterns(_Culture, _Culture.Calendar); probePatterns = new List <string>(_Culture.DateTimeFormat.GetAllDateTimePatterns()); } else { result = new CalendarDateTimePatterns(_Culture, cal); System.Globalization.DateTimeFormatInfo tmp = (System.Globalization.DateTimeFormatInfo)_Culture.DateTimeFormat.Clone(); tmp.Calendar = cal; probePatterns = new List <string>(tmp.GetAllDateTimePatterns()); } // manually augment list of date/time patterns for some languages List <string> customPatters = GetCustomPatterns(_Culture); if (customPatters != null) { probePatterns.AddRange(customPatters); } List <string> patterns = new List <string>(); // TODO /* * The current approach computes a transducer which will emit a canonicalized * representation of the token value which will later be parsed during * tokenization. Alternatively, we could directly emit the parse pattern and * skip the canonicalization. * */ foreach (string p in probePatterns) { if (patterns.Contains(p) || IgnorePattern(_Culture, p)) { continue; } patterns.Add(p); string rx; DateTimePatternType patternType = ClassifyFormatString(p, out rx); if (patternType == DateTimePatternType.Unknown) { continue; } if ((types & patternType) == 0) { continue; } // TODO support addWordBoundary #if DEBUG bool log = false; if (log) { using (System.IO.StreamWriter output = new System.IO.StreamWriter(System.IO.Path.GetTempPath() + @"\builder.log", true, System.Text.Encoding.UTF8)) { output.WriteLine("{0}:\r\nPattern: {1}\r\nExpression: {2}\r\n", _Culture.Name, p, rx); } } #endif LanguagePlatform.Lingua.FST.FST f = LanguagePlatform.Lingua.FST.FST.Create(rx); f.MakeDeterministic(); result.Patterns.Add(new DateTimePattern(patternType, _Culture, p, f)); } if (result.Patterns.Count > 0) { // combine FSTs of each pattern type into a single automaton result.Patterns.Sort((a, b) => (int)a.PatternType - (int)b.PatternType); List <DateTimePattern> combined = new List <DateTimePattern>(); while (result.Patterns.Count > 0) { int first = 0; DateTimePatternType t = result.Patterns[0].PatternType; int last = first + 1; // intentionally starting at next pattern List <FST.FST> alternatives = new List <Sdl.LanguagePlatform.Lingua.FST.FST>(); while (last < result.Patterns.Count && result.Patterns[last].PatternType == t) { alternatives.Add(result.Patterns[last].FST); ++last; } result.Patterns[0].FST.Disjunct(alternatives); result.Patterns[0].FST.MakeDeterministic(); combined.Add(result.Patterns[0]); result.Patterns.RemoveRange(0, last - first); } result.Patterns.AddRange(combined); } return(result); }