/// <summary> /// Extracts and parses the day number from the given regex Match instance. /// </summary> /// <param name="m">The regex Match instance to extract day number from.</param> /// <returns></returns> private int ExtractDayNum(Match m) { int dayNum = -1; foreach (Capture c in m.Groups["DayNum"].Captures) { string value = c.Value; if (Char.IsDigit(value[0])) { int n; value = ParsingUtils.ConvertNumber2English(value); if (Int32.TryParse(value, out n)) { dayNum = n; } else { dayNum = -1; } } else { dayNum = -1; foreach (PersianNumberPatternInfo pi in persianNumberParser.FindAndParse(c.Value)) { dayNum = (int)pi.Number; } } } return(dayNum); }
/// <summary> /// Extracts and parses the week-day from the given regex Match instance. /// </summary> /// <param name="m">The regex Match instance to extract week-day from.</param> /// <returns></returns> private Weekdays ExtractWeekday(Match m) { Weekdays w = Weekdays.Illeagal; foreach (Capture c in m.Groups["Weekday"].Captures) { if (c.Value == "جمعه") { w = Weekdays.Fri; } if (c.Value == "شنبه") { w = Weekdays.Sat; } else { foreach (Capture cc in m.Groups["WeekdayNum"].Captures) { string value = cc.Value; if (Char.IsDigit(value[0])) { int n; value = ParsingUtils.ConvertNumber2English(value); if (Int32.TryParse(value, out n)) { w = NthWeekday(n); } else { w = Weekdays.Illeagal; } } else { Int64 n; if (PersianLiteral2NumMap.TryPersianString2Num(value, out n)) { w = NthWeekday((int)n); } else { w = Weekdays.Illeagal; } } } } } return(w); }
/// <summary> /// Extracts and parses the three digit block from the specified string. /// </summary> /// <param name="str">The string to extract number from.</param> /// <returns></returns> private long ExtractThreeDB(string str) { long num = 0L; long tempNum = 0L; if (Char.IsDigit(str[0])) { string strNum = ParsingUtils.ConvertNumber2English(str.Trim()); int n; if (Int32.TryParse(strNum, out n)) { num = (long)n; } } else { Regex regex = new Regex(ThreeDigitBlockPattern()); foreach (Match m in regex.Matches(str)) { foreach (Capture c in m.Groups[ONES_NAME].Captures) { if (PersianLiteral2NumMap.TryPersianString2Num(c.Value, out tempNum)) { num += tempNum; } } foreach (Capture c in m.Groups[TENS_NAME].Captures) { if (PersianLiteral2NumMap.TryPersianString2Num(c.Value, out tempNum)) { num += tempNum; } } foreach (Capture c in m.Groups[TENONES_NAME].Captures) { if (PersianLiteral2NumMap.TryPersianString2Num(c.Value, out tempNum)) { num += tempNum; } } num += ExtractHundreds(m); } } return(num); }
/// <summary> /// Searches the specified string for digitized number patterns, and tries to parse the patterns found. /// </summary> /// <param name="str">The string to be searched.</param> /// <returns>A sequence of <see cref="DigitizedNumberPatternInfo"/> objects which contain /// information about the patterns found.</returns> public IEnumerable <DigitizedNumberPatternInfo> FindAndParse(string str) { var l = new List <DigitizedNumberPatternInfo>(); var wordTokenizer = new WordTokenizer(WordTokenizerOptions.None); foreach (var wordInfo in wordTokenizer.Tokenize(str)) { if (wordInfo.IsNumber) { var refinedWord = PostProcessNumberWordInfo(wordInfo); double result; if (Double.TryParse(ParsingUtils.ConvertNumber2English(refinedWord.Value), out result)) { l.Add(new DigitizedNumberPatternInfo(refinedWord.Value, refinedWord.Index, refinedWord.Length, result)); } } } return(l); }
/// <summary> /// Extracts and parses the day number. /// </summary> /// <param name="m">The regex Match object to parse.</param> /// <returns></returns> private int ExtractDayNum(Match m) { int dayNum = -1; foreach (Capture c in m.Groups["DayNum"].Captures) { string value = c.Value; if (Char.IsDigit(value[0])) { int n; value = ParsingUtils.ConvertNumber2English(value); if (Int32.TryParse(value, out n)) { dayNum = n; } else { dayNum = -1; } } } return(dayNum); }
/// <summary> /// Returns the month number from the input string. The month number ranges vary for different calendar types. /// The return value ranges are as follows: /// -1 Illegal /// 1 - 12 Jalali /// 13 - 24 Gregorian /// 25 - 36 HijriGhamari /// </summary> private int EnglishMonthNum(string str) { // the items that end in dot are for the abbreviated form of the dates. str = ParsingUtils.NormalizeSpaces(str.ToLower()); switch (str) { case "farvardin": return(1); case "ordibehesht": return(2); case "khordād": return(3); case "khordad": return(3); case "tir": return(4); case "mordād": return(5); case "amordād": return(5); case "mordad": return(5); case "amordad": return(5); case "shahrivar": return(6); case "mehr": return(7); case "ābān": return(8); case "aban": return(8); case "āzar": return(9); case "azar": return(9); case "dey": return(10); case "bahman": return(11); case "esfand": return(12); case "espand": return(12); // Gregorian Begins Here case "january": case "jan": case "jan.": return(13); case "february": case "feb": case "feb.": return(14); case "march": case "mar": case "mar.": return(15); case "april": case "apr": case "apr.": return(16); case "may": return(17); case "june": case "jun": case "jun.": return(18); case "july": case "jul": case "jul.": return(19); case "august": case "aug": case "aug.": return(20); case "september": case "sep": case "sep.": return(21); case "october": case "oct": case "oct.": return(22); case "november": case "nov": case "nov.": return(23); case "december": case "dec": case "dec.": return(24); // Hijri Ghamari Begins Here case "muharram": case "muḥarram ul ḥaram": case "muharram ul haram": return(25); case "safar": case "ṣafar ul muzaffar": case "safar ul muzaffar": return(26); case "rabi' al-awwal": return(27); case "rabi' al-thani": return(28); case "jumada al-ula": return(29); case "jumada al-thani": return(30); case "rajab": return(31); case "rajab al murajab": return(31); case "sha'aban": return(32); case "sha'abān ul moazam": return(32); case "sha'aban ul moazam": return(32); case "ramadan": return(33); case "ramazān": return(33); case "ramaḍān ul mubarak": return(33); case "ramadan ul mubarak": return(33); case "ramazan": return(33); case "ramazan ul mubarak": return(33); case "shawwal": return(34); case "shawwal ul mukarram": return(34); case "dhu al-qi'dah": return(35); case "dhu al-hijjah": return(36); default: return(-1); } }
/// <summary> /// Returns the month number from the input string. The month number ranges vary for different calendar types. /// The return value ranges are as follows: /// -1 Illegal /// 1 - 12 Jalali /// 13 - 24 Gregorian /// 25 - 36 HijriGhamari /// </summary> private int PersianMonthNum(string str) { str = ParsingUtils.NormalizeSpaces(str); switch (str) { case "فروردین": return(1); case "اردیبهشت": return(2); case "خرداد": return(3); case "تیر": return(4); case "مرداد": return(5); case "امرداد": return(5); case "شهریور": return(6); case "مهر": return(7); case "آبان": return(8); case "ابان": return(8); case "آذر": return(9); case "دی": return(10); case "بهمن": return(11); case "اسفند": return(12); case "اسپند": return(12); // Gregorian Begins Here case "ژانویه": return(13); case "جنوری": return(13); case "فوریه": return(14); case "فبروری": return(14); case "مارس": return(15); case "مارچ": return(15); case "آوریل": return(16); case "آپریل": return(16); case "مه": return(17); case "می": return(17); case "ژوئن": return(18); case "جون": return(18); case "ژوئیه": return(19); case "ژوییه": return(19); case "جولای": return(19); case "ژولای": return(19); case "اوت": return(20); case "اگوست": return(20); case "آگوست": return(20); case "سپتامبر": return(21); case "سپتمبر": return(21); case "اکتبر": return(22); case "نوامبر": return(23); case "نومبر": return(23); case "دسامبر": return(24); case "دسبمر": return(24); // Hijri Ghamari Begins Here case "محرم": return(25); case "محرمالحرام": return(25); case "محرم الحرام": return(25); case "صفر": return(26); case "ربیعالاول": return(27); case "ربیع الاول": return(27); case "ربیعالثانی": return(28); case "ربیع الثانی": return(28); case "جمادیالاولی": return(29); case "جمادیالاول": return(29); case "جمادی الاولی": return(29); case "جمادی الاول": return(29); case "جمادیالثانی": return(30); case "جمادی الثانی": return(30); case "رجب": return(31); case "شعبان": return(32); case "رمضان": return(33); case "شوال": return(34); case "ذیالقعده": return(35); case "ذی القعده": return(35); case "ذوالقعده": return(35); case "ذو القعده": return(35); case "ذیالحجه": return(36); case "ذوالحجه": return(36); case "ذی الحجه": return(36); case "ذو الحجه": return(36); default: return(-1); } }