// // This is the lexer. Check the character at the current index, and put the found token in dtok and // some raw date/time information in raw. // private static void Lex( int dps, __DTString str, DateTimeToken dtok, DateTimeRawInfo raw, DateTimeResult result, ref DateTimeFormatInfo dtfi) { int sep; dtok.dtt = DTT_Unk; // Assume the token is unkown. // // Skip any white spaces. // if (!str.SkipWhiteSpaceComma()) { // // SkipWhiteSpaceComma() will return true when end of string is reached. // dtok.dtt = DTT_End; return; } char ch = str.GetChar(); if (Char.IsLetter(ch)) { // // This is a letter. // int month, dayOfWeek, era, timeMark; // // Check if this is a beginning of a month name. // And check if this is a day of week name. // if (raw.month == -1 && (month = GetMonthNumber(str, dtfi)) >= 1) { // // This is a month name // switch(sep=GetSeparator(str, raw, dtfi)) { case SEP_End: dtok.dtt = DTT_MonthEnd; break; case SEP_Space: dtok.dtt = DTT_MonthSpace; break; case SEP_Date: dtok.dtt = DTT_MonthDatesep; break; default: //Invalid separator after month name throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } raw.month = month; } else if (raw.dayOfWeek == -1 && (dayOfWeek = GetDayOfWeekNumber(str, dtfi)) >= 0) { // // This is a day of week name. // raw.dayOfWeek = dayOfWeek; dtok.dtt = DTT_DayOfWeek; // // Discard the separator. // GetSeparator(str, raw, dtfi); } else if (GetTimeZoneName(str)) { // // This is a timezone designator // // NOTENOTE : for now, we only support "GMT" and "Z" (for Zulu time). // dtok.dtt = DTT_TimeZone; result.timeZoneUsed = true; result.timeZoneOffset = new TimeSpan(0); } else if ((raw.era == -1) && ((era = GetEra(str, result, ref dtfi)) != -1)) { raw.era = era; dtok.dtt = DTT_Era; } else if (raw.timeMark == -1 && (timeMark = GetTimeMark(str, dtfi)) != -1) { raw.timeMark = timeMark; GetSeparator(str, raw, dtfi); } else { // // Not a month name, not a day of week name. Check if this is one of the // known date words. This is used to deal case like Spanish cultures, which // uses 'de' in their Date string. // // if (!str.MatchWords(dtfi.DateWords)) { throw new FormatException( String.Format(Environment.GetResourceString("Format_UnknowDateTimeWord"), str.Index)); } GetSeparator(str, raw, dtfi); } } else if (Char.IsDigit(ch)) { if (raw.numCount == 3) { throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } // // This is a digit. // int number = ch - '0'; int digitCount = 1; // // Collect other digits. // while (str.GetNextDigit()) { number = number * 10 + str.GetDigit(); digitCount++; } // If the previous parsing state is DS_T_NNt (like 12:01), and we got another number, // so we will have a terminal state DS_TX_NNN (like 12:01:02). // If the previous parsing state is DS_T_Nt (like 12:), and we got another number, // so we will have a terminal state DS_TX_NN (like 12:01:02). // // Look ahead to see if the following character is a decimal point or timezone offset. // This enables us to parse time in the forms of: // "11:22:33.1234" or "11:22:33-08". if (dps == DS_T_NNt || dps == DS_T_Nt) { char nextCh; if ((str.Index < str.len - 1)) { nextCh = str.Value[str.Index]; switch (nextCh) { case '.': if (dps == DS_T_NNt) { // Yes, advance to the next character. str.Index++; // Collect the second fraction. raw.fraction = ParseFraction(str); } break; case '+': case '-': if (result.timeZoneUsed) { // Should not have two timezone offsets. throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } result.timeZoneUsed = true; result.timeZoneOffset = ParseTimeZone(str, nextCh); break; } } } if (number >= 0) { dtok.num = number; if (digitCount >= 3) { if (raw.year == -1) { raw.year = number; // // If we have number which has 3 or more digits (like "001" or "0001"), // we assume this number is a year. Save the currnet raw.numCount in // raw.year. // switch (sep = GetSeparator(str, raw, dtfi)) { case SEP_End: dtok.dtt = DTT_YearEnd; break; case SEP_Am: case SEP_Pm: case SEP_Space: dtok.dtt = DTT_YearSpace; break; case SEP_Date: dtok.dtt = DTT_YearDateSep; break; case SEP_YearSuff: case SEP_MonthSuff: case SEP_DaySuff: dtok.dtt = DTT_NumDatesuff; dtok.suffix = sep; break; case SEP_HourSuff: case SEP_MinuteSuff: case SEP_SecondSuff: dtok.dtt = DTT_NumTimesuff; dtok.suffix = sep; break; default: // Invalid separator after number number. throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } // // Found the token already. Let's bail. // return; } throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } } else { // // number is overflowed. // throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } switch (sep = GetSeparator(str, raw, dtfi)) { // // Note here we check if the numCount is less than three. // When we have more than three numbers, it will be caught as error in the state machine. // case SEP_End: dtok.dtt = DTT_NumEnd; raw.num[raw.numCount++] = dtok.num; break; case SEP_Am: case SEP_Pm: dtok.dtt = DTT_NumAmpm; raw.num[raw.numCount++] = dtok.num; break; case SEP_Space: dtok.dtt = DTT_NumSpace; raw.num[raw.numCount++] = dtok.num; break; case SEP_Date: dtok.dtt = DTT_NumDatesep; raw.num[raw.numCount++] = dtok.num; break; case SEP_Time: if (!result.timeZoneUsed) { dtok.dtt = DTT_NumTimesep; raw.num[raw.numCount++] = dtok.num; } else { // If we already got timezone, there should be no // time separator again. throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } break; case SEP_YearSuff: dtok.num = dtfi.Calendar.ToFourDigitYear(number); dtok.dtt = DTT_NumDatesuff; dtok.suffix = sep; break; case SEP_MonthSuff: case SEP_DaySuff: dtok.dtt = DTT_NumDatesuff; dtok.suffix = sep; break; case SEP_HourSuff: case SEP_MinuteSuff: case SEP_SecondSuff: dtok.dtt = DTT_NumTimesuff; dtok.suffix = sep; break; case SEP_LocalTimeMark: dtok.dtt = DTT_NumLocalTimeMark; raw.num[raw.numCount++] = dtok.num; break; default: // Invalid separator after number number. throw new FormatException(Environment.GetResourceString("Format_BadDateTime")); } } else { // // Not a letter, not a digit. Just ignore it. // str.Index++; } return; }
// // Starting at str.Index, check the type of the separator. // private static int GetSeparator(__DTString str, DateTimeRawInfo raw, DateTimeFormatInfo dtfi) { int separator = SEP_Space; // Assume the separator is a space. And try to find a better one. // // Check if we found any white spaces. // if (!str.SkipWhiteSpaceComma()) { // // SkipWhiteSpaceComma() will return true when end of string is reached. // // // Return the separator as SEP_End. // return (SEP_End); } if (Char.IsLetter(str.GetChar())) { // // This is a beginning of a word. // if (raw.timeMark == -1) { // // Check if this is an AM time mark. // int timeMark; if ((timeMark = GetTimeMark(str, dtfi)) != -1) { raw.timeMark = timeMark;; return (timeMark == TM_AM ? SEP_Am: SEP_Pm); } } if (MatchWord(str, LocalTimeMark, false)) { separator = SEP_LocalTimeMark; } else if (MatchWord(str, CJKYearSuff, false) || MatchWord(str, KoreanYearSuff, false)) { separator = SEP_YearSuff; } else if (MatchWord(str, CJKMonthSuff, false) || MatchWord(str, KoreanMonthSuff, false)) { separator = SEP_MonthSuff; } else if (MatchWord(str, CJKDaySuff, false) || MatchWord(str, KoreanDaySuff, false)) { separator = SEP_DaySuff; } else if (MatchWord(str, CJKHourSuff, false) || MatchWord(str, ChineseHourSuff, false)) { separator = SEP_HourSuff; } else if (MatchWord(str, CJKMinuteSuff, false)) { separator = SEP_MinuteSuff; } else if (MatchWord(str, CJKSecondSuff, false)) { separator = SEP_SecondSuff; } } else { // // Not a letter. Check if this is a date separator. // if ((MatchWord(str, dtfi.DateSeparator, false)) || (MatchWord(str, invariantInfo.DateSeparator, false)) || (MatchWord(str, alternativeDateSeparator, false))) { // // NOTENOTE : alternativeDateSeparator is a special case because some cultures // (e.g. the invariant culture) use "/". However, in RFC format, we use "-" as the // date separator. Therefore, we should check for it. // separator = SEP_Date; } // // Check if this is a time separator. // else if ((MatchWord(str, dtfi.TimeSeparator, false)) || (MatchWord(str, invariantInfo.TimeSeparator, false))) { separator = SEP_Time; } else if (dtfi.CultureID == 0x041c) { // Special case for sq-AL (0x041c) // Its time pattern is "h:mm:ss.tt" if (str.GetChar() == '.') { if (raw.timeMark == -1) { // // Check if this is an AM time mark. // int timeMark; str.Index++; if ((timeMark = GetTimeMark(str, dtfi)) != -1) { raw.timeMark = timeMark;; return (timeMark == TM_AM ? SEP_Am: SEP_Pm); } str.Index--; } } } } return (separator); }