Ejemplo n.º 1
0
        /// <summary>
        /// Determines the pattern type of a date/time format string.
        /// </summary>
        /// <param name="formatString">A format string, as returned from a culture's date/time pattern list.</param>
        /// <returns>The pattern type, which may be "Unknown" for unsupported patterns.</returns>
        public static Core.Tokenization.DateTimePatternType ClassifyPattern(string formatString)
        {
            DateTimePatternComputer computer = new DateTimePatternComputer(System.Globalization.CultureInfo.InvariantCulture, null);
            string rx;
            DateTimePatternType result = computer.ClassifyFormatString(formatString, out rx);

            return(result);
        }
Ejemplo n.º 2
0
        private static string GetDateTimeTextForClient(DateTime datetime, DateTimePatternType dateTimePattern = DateTimePatternType.Default)
        {
            // We need to format everything in English for the client, as Javascript in some of
            // the browsers does not understand datetime values in other language formats.
            var ci = DateTimeField.DefaultUICulture;

            switch (dateTimePattern)
            {
            case DateTimePatternType.ShortDate:
                return(datetime.ToString(ci.DateTimeFormat.ShortDatePattern, ci));

            case DateTimePatternType.ShortTime:
                return(datetime.ToString(ci.DateTimeFormat.ShortTimePattern, ci));

            default:
                return(datetime.ToString(ci));
            }
        }
Ejemplo n.º 3
0
        private Core.Tokenization.DateTimePatternType ClassifyFormatString(string formatString, out string rx)
        {
            const string formatChars = "dfFhmstyzgHM";

            int len = formatString.Length;
            int p   = 0;

            DateTimePatternType resultPatternType = DateTimePatternType.Unknown;

            bool dateHasDay   = false;
            bool dateHasMonth = false;
            bool dateHasYear  = false;

            bool hasDateComponents = false;
            bool hasTimeComponents = false;

            rx = String.Empty;

            System.Text.StringBuilder result = new StringBuilder();

            bool escapeMode = false;

            while (p < len)
            {
                char c = formatString[p];

                if (escapeMode)
                {
                    if (c == '\'')
                    {
                        if (p + 1 < len && formatString[p + 1] == '\'')
                        {
                            // two adjacent single quotes: append a single quote
                            AppendLiteral(result, c);
                            ++p;
                        }
                        else
                        {
                            // single single quote ends escape mode
                            escapeMode = false;
                        }
                    }
                    else if (c == '\\')
                    {
                        // backslash followed by quote also represents a literal quote
                        if (p + 1 < len && formatString[p + 1] == '\'')
                        {
                            AppendLiteral(result, '\'');
                            ++p;
                        }
                        else
                        {
                            AppendLiteral(result, c);
                        }
                    }
                    else
                    {
                        if (c == '\u5e74' || c == '\u6708' || c == '\u65e5')
                        {
                            // ja, zh: indicates long date pattern
                            resultPatternType = DateTimePatternType.LongDate;
                        }

                        AppendLiteral(result, c);
                    }

                    ++p;
                    continue;
                }

                if (formatChars.IndexOf(c) >= 0)
                {
                    // format character -- process accordingly
                    int formatLength = 0;

                    while (p < len && formatString[p] == c)
                    {
                        ++formatLength;
                        ++p;
                    }

                    switch (c)
                    {
                    case 'd':
                        dateHasDay = true;
                        break;

                    case 'M':
                        dateHasMonth = true;
                        break;

                    case 'y':
                        dateHasYear = true;
                        break;

                    default:
                        break;
                    }

                    string pattern;
                    DateTimePatternType type = GetPattern(c, formatLength,
                                                          ref hasDateComponents, ref hasTimeComponents,
                                                          out pattern);
                    if (pattern == null || pattern.Length == 0)
                    {
                        // if a pattern string contains unknown or unprocessable elements, we don't
                        //  construct an RX for it
                        return(DateTimePatternType.Unknown);
                    }
                    else
                    {
                        result.Append(pattern);

                        switch (resultPatternType)
                        {
                        case DateTimePatternType.Unknown:
                            resultPatternType = type;
                            break;

                        case DateTimePatternType.LongDate:
                            // long date patterns never "downgrade" to simpler ones
                            break;

                        case DateTimePatternType.ShortDate:
                            // shot date patterns only upgrade to long date patterns
                            if (type == DateTimePatternType.LongDate)
                            {
                                resultPatternType = DateTimePatternType.LongDate;
                            }
                            break;

                        case DateTimePatternType.LongTime:
                            // time patterns upgrade to date patterns only
                            if (type == DateTimePatternType.LongDate || type == DateTimePatternType.ShortDate)
                            {
                                resultPatternType = type;
                            }
                            break;

                        case DateTimePatternType.ShortTime:
                            // short time patterns upgrade to all other patterns
                            if (type != DateTimePatternType.Unknown)
                            {
                                resultPatternType = type;
                            }
                            break;

                        default:
                            throw new Exception("Invalid switch constant");
                        }
                    }
                }
                else if (c == '\'')
                {
                    ++p;
                    escapeMode = true;
                }
                else if (c == '%')
                {
                    // don't quite understand MSDN what this stands for..
                    System.Diagnostics.Debug.Assert(false);
                    ++p;
                }
                else if (c == ':')
                {
                    // default time separator
                    // TODO the default date/time separator may be a blank in which case we maybe should
                    //  append \s instead of ' '
                    AppendLiteral(result, _Culture.DateTimeFormat.TimeSeparator);
                    ++p;
                }
                else if (c == '/')
                {
                    // default date separator
                    // TODO the default date/time separator may be a blank in which case we maybe should
                    //  append \s instead of ' '
                    AppendLiteral(result, _Culture.DateTimeFormat.DateSeparator);
                    ++p;
                }
                else if (c == '\\')
                {
                    // literally append next char
                    if (p + 1 < len)
                    {
                        AppendLiteral(result, formatString[p + 1]);
                        ++p;
                    }
                    else
                    {
                        // trailing backslash - may also yield an error
                        AppendLiteral(result, c);
                        ++p;
                    }
                }
                else if (c == ' ')
                {
                    // append single whitepsace
                    result.Append(@"<\s:>");
                    // NOTE we assume that patterns don't include leading blanks and therefore don't add them to first
                    ++p;
                }
                else
                {
                    // literally append char
                    AppendLiteral(result, c);
                    ++p;
                }
            }

            if (resultPatternType == DateTimePatternType.LongDate ||
                resultPatternType == DateTimePatternType.ShortDate)
            {
                if (!(dateHasDay && dateHasMonth && dateHasYear))
                {
                    // #33803: don't recognize "July 08" without a year
                    return(DateTimePatternType.Unknown);
                }
            }

            // #38942
            if ((hasDateComponents && hasTimeComponents) ||
                (!hasDateComponents && !hasTimeComponents))
            {
                // pattern either specifies neither date nor time, or both date and time
                // TODO allow "mixed" patterns (f, F, g, G pattern classes of DateTimeFormatInfo)
                return(DateTimePatternType.Unknown);
            }

            rx = result.ToString();
            return(resultPatternType);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Attempts to load the FSTs from the resource accessor.
        /// </summary>
        private List <CalendarDateTimePatterns> LoadPatterns(Core.Tokenization.DateTimePatternType types, bool allCalendars)
        {
            if (_Accessor == null)
            {
                return(null);
            }

            DateTimePatternType[] iter = new DateTimePatternType[]
            {
                DateTimePatternType.LongDate,
                DateTimePatternType.ShortDate,
                DateTimePatternType.LongTime,
                DateTimePatternType.ShortTime
            };

            List <CalendarDateTimePatterns> result = null;
            // we currently only support one culture pattern
            CalendarDateTimePatterns thePattern = null;

            foreach (DateTimePatternType t in iter)
            {
                if ((types & t) == 0)
                {
                    continue;
                }

                Core.Resources.LanguageResourceType rt = Core.Resources.LanguageResourceType.Undefined;

                switch (t)
                {
                case DateTimePatternType.LongDate:
                    rt = Core.Resources.LanguageResourceType.LongDateFST;
                    break;

                case DateTimePatternType.ShortDate:
                    rt = Core.Resources.LanguageResourceType.ShortDateFST;
                    break;

                case DateTimePatternType.ShortTime:
                    rt = Core.Resources.LanguageResourceType.ShortTimeFST;
                    break;

                case DateTimePatternType.LongTime:
                    rt = Core.Resources.LanguageResourceType.LongTimeFST;
                    break;

                default:
                    throw new Exception("Cannot map token type to corresponding resource type");
                }

                if (_Accessor.GetResourceStatus(_Culture, rt, false) != Core.Resources.ResourceStatus.NotAvailable)
                {
                    byte[] data = _Accessor.GetResourceData(_Culture, rt, false);
                    LanguagePlatform.Lingua.FST.FST fst = LanguagePlatform.Lingua.FST.FST.Create(data);

                    if (thePattern == null)
                    {
                        // TODO support the case where some (not all) FSTs are loaded from the resources
                        result = new List <CalendarDateTimePatterns>();
                        // TODO support all calendars
                        thePattern = new CalendarDateTimePatterns(_Culture, null);
                        result.Add(thePattern);
                    }

                    // TODO compute FIRST() for the FST at load time (or persistently store it?)
                    thePattern.Patterns.Add(new DateTimePattern(t, _Culture, "(unavailable)", fst));
                }
            }

            return(result);
        }
Ejemplo n.º 5
0
        private Core.Tokenization.DateTimePatternType GetPattern(char designator, int length,
                                                                 ref bool hasDateComponents, ref bool hasTimeComponents,
                                                                 out string pattern)
        {
            pattern = String.Empty;

            DateTimePatternType type = DateTimePatternType.Unknown;

            switch (designator)
            {
            case 'd':
                hasDateComponents = true;
                switch (length)
                {
                case 1:
                case 2:
                    // day, with or without leading zero (some date patterns only contain the 'dd' variant)
                    pattern = "(<:d>((1|2)(0|1|2|3|4|5|6|7|8|9)|3(0|1)|0?(1|2|3|4|5|6|7|8|9)))";
                    type    = DateTimePatternType.ShortDate;
                    break;

                case 3:
                    // abbreviated day names
                    pattern = AppendDayNames(true);
                    type    = DateTimePatternType.ShortDate;
                    break;

                case 4:
                    // full day names
                    pattern = AppendDayNames(false);
                    type    = DateTimePatternType.LongDate;
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                break;

            case 'f':
            case 'F':
                // we don't process these patterns during tokenization
                return(DateTimePatternType.Unknown);

            case 'h':
                hasTimeComponents = true;
                switch (length)
                {
                case 1:
                case 2:
                    // 12h clock with or without leading 0
                    pattern = "(<:h>(1(0|1|2)|0?(0|1|2|3|4|5|6|7|8|9)))";
                    type    = DateTimePatternType.ShortTime;
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                break;

            case 'm':
            case 's':
            {
                hasTimeComponents = true;
                switch (length)
                {
                case 1:
                    // minute or second, with or without leading 0
                    pattern = "(<:" + designator + ">(((0|1|2|3|4|5)(0|1|2|3|4|5|6|7|8|9))|(0|1|2|3|4|5|6|7|8|9)))";
                    break;

                case 2:
                    // minute or second, with leading 0
                    pattern = "(<:" + designator + ">((0|1|2|3|4|5)(0|1|2|3|4|5|6|7|8|9)))";
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                // the only difference between long and short time patterns is that the long pattern
                //  includes seconds
                if (designator == 's')
                {
                    type = DateTimePatternType.LongTime;
                }
                else
                {
                    type = DateTimePatternType.ShortTime;
                }
            }
            break;

            case 't':
            {
                hasTimeComponents = true;
                string s = AppendAMPM(length == 1);
                if (s != null)
                {
                    pattern = s;
                }
                type = DateTimePatternType.ShortTime;
            }
            break;

            case 'y':
                hasDateComponents = true;
                switch (length)
                {
                case 1:
                    // 2-digit year w/o century, w/o leading 0
                    pattern = "(<:y>((1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)|(0|1|2|3|4|5|6|7|8|9)))";

                    type = DateTimePatternType.ShortDate;
                    break;

                case 2:
                    // 2-digit year w/o century, w/ leading 0
                    pattern = "(<:y>((0|1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)))";

                    type = DateTimePatternType.ShortDate;
                    break;

                case 4:
                    // 4-digit year
                    pattern = "(<:y>((0|1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)))";
                    type    = DateTimePatternType.ShortDate;
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                break;

            case 'g':
                hasDateComponents = true;
                switch (length)
                {
                case 2:
                {
                    // TODO preceding whitespace in case eras are undefined
                    string eras = AppendEras();
                    if (eras != null)
                    {
                        pattern = eras;
                    }
                }
                    type = DateTimePatternType.ShortDate;
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                break;

            case 'z':
                // time zone offset: only in special time patterns
                return(DateTimePatternType.Unknown);

            case 'H':
                hasTimeComponents = true;
                switch (length)
                {
                case 1:
                case 2:
                    // hours in 24h format, with or without leading 0
                    pattern = "(<:H>(1(0|1|2|3|4|5|6|7|8|9)|2(0|1|2|3|4)|0?(0|1|2|3|4|5|6|7|8|9)))";
                    type    = DateTimePatternType.ShortTime;
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                break;

            case 'M':
                hasDateComponents = true;
                switch (length)
                {
                case 1:
                case 2:
                    // numeric month, with or without leading 0
                    pattern = "(<:M>(1(0|1|2)|0?(1|2|3|4|5|6|7|8|9)))";
                    type    = DateTimePatternType.ShortDate;
                    break;

                case 3:
                    // abbreviated month names
                    pattern = AppendMonthNames(true);
                    type    = DateTimePatternType.ShortDate;
                    break;

                case 4:
                    // full month names
                    pattern = AppendMonthNames(false);
                    type    = DateTimePatternType.LongDate;
                    break;

                default:
                    return(DateTimePatternType.Unknown);
                }
                break;

            default:
                System.Diagnostics.Debug.Assert(false, "Error in pattern char list");
                return(DateTimePatternType.Unknown);
            }

            return(type);
        }
Ejemplo n.º 6
0
        private CalendarDateTimePatterns ComputeSinglePattern(Core.Tokenization.DateTimePatternType types, System.Globalization.Calendar cal)
        {
            CalendarDateTimePatterns result;
            List <string>            probePatterns;

            // TODO this doesn't yet work with alternate calendars

            if (cal == null)
            {
                result = new CalendarDateTimePatterns(_Culture, _Culture.Calendar);

                probePatterns = new List <string>(_Culture.DateTimeFormat.GetAllDateTimePatterns());
            }
            else
            {
                result = new CalendarDateTimePatterns(_Culture, cal);

                System.Globalization.DateTimeFormatInfo tmp = (System.Globalization.DateTimeFormatInfo)_Culture.DateTimeFormat.Clone();
                tmp.Calendar = cal;

                probePatterns = new List <string>(tmp.GetAllDateTimePatterns());
            }

            // manually augment list of date/time patterns for some languages
            List <string> customPatters = GetCustomPatterns(_Culture);

            if (customPatters != null)
            {
                probePatterns.AddRange(customPatters);
            }

            List <string> patterns = new List <string>();

            // TODO

            /*
             * The current approach computes a transducer which will emit a canonicalized
             * representation of the token value which will later be parsed during
             * tokenization. Alternatively, we could directly emit the parse pattern and
             * skip the canonicalization.
             * */

            foreach (string p in probePatterns)
            {
                if (patterns.Contains(p) || IgnorePattern(_Culture, p))
                {
                    continue;
                }

                patterns.Add(p);

                string rx;
                DateTimePatternType patternType = ClassifyFormatString(p, out rx);
                if (patternType == DateTimePatternType.Unknown)
                {
                    continue;
                }

                if ((types & patternType) == 0)
                {
                    continue;
                }

                // TODO support addWordBoundary
#if DEBUG
                bool log = false;
                if (log)
                {
                    using (System.IO.StreamWriter output = new System.IO.StreamWriter(System.IO.Path.GetTempPath() + @"\builder.log", true, System.Text.Encoding.UTF8))
                    {
                        output.WriteLine("{0}:\r\nPattern: {1}\r\nExpression: {2}\r\n",
                                         _Culture.Name, p, rx);
                    }
                }
#endif

                LanguagePlatform.Lingua.FST.FST f = LanguagePlatform.Lingua.FST.FST.Create(rx);
                f.MakeDeterministic();
                result.Patterns.Add(new DateTimePattern(patternType, _Culture, p, f));
            }

            if (result.Patterns.Count > 0)
            {
                // combine FSTs of each pattern type into a single automaton

                result.Patterns.Sort((a, b) => (int)a.PatternType - (int)b.PatternType);

                List <DateTimePattern> combined = new List <DateTimePattern>();
                while (result.Patterns.Count > 0)
                {
                    int first             = 0;
                    DateTimePatternType t = result.Patterns[0].PatternType;
                    int last = first + 1;                     // intentionally starting at next pattern

                    List <FST.FST> alternatives = new List <Sdl.LanguagePlatform.Lingua.FST.FST>();

                    while (last < result.Patterns.Count && result.Patterns[last].PatternType == t)
                    {
                        alternatives.Add(result.Patterns[last].FST);
                        ++last;
                    }

                    result.Patterns[0].FST.Disjunct(alternatives);
                    result.Patterns[0].FST.MakeDeterministic();

                    combined.Add(result.Patterns[0]);
                    result.Patterns.RemoveRange(0, last - first);
                }

                result.Patterns.AddRange(combined);
            }

            return(result);
        }