private static void CoerceMatchesToDates(List<ExtractedDate> datesResult, double matchResultFactor, ScanTextElem textElem, MatchCollection matches, ExtractedDate.DateMatchType matchType, int yearGroupIdx, int monthGroupIdx, int dayGroupIdx) { foreach (Match match in matches) { ExtractedDate fd = new ExtractedDate(); try { string yrStr = match.Groups[yearGroupIdx].Value.Replace(" ", ""); yrStr = yrStr.ToLower().Replace("l", "1"); yrStr = yrStr.ToLower().Replace("o", "0"); int year = Convert.ToInt32(yrStr); if (year < 80) { year += 2000; fd.yearWas2Digit = true; } else if (year < 100) { year += 1900; fd.yearWas2Digit = true; } int month = 1; if (Char.IsDigit(match.Groups[monthGroupIdx].Value, 0)) month = Convert.ToInt32(match.Groups[2].Value); else month = monthDict[match.Groups[monthGroupIdx].Value.ToLower().Substring(0, 3)]; int day = 1; fd.dayWasMissing = true; if (match.Groups[dayGroupIdx].Value.Trim() != "") { day = Convert.ToInt32(match.Groups[dayGroupIdx].Value); fd.dayWasMissing = false; } if (year > DateTime.MaxValue.Year) year = DateTime.MaxValue.Year; if (year < DateTime.MinValue.Year) year = DateTime.MinValue.Year; if (day > DateTime.DaysInMonth(year, month)) day = DateTime.DaysInMonth(year, month); if (day < 1) day = 1; DateTime dt = new DateTime(year, month, day); // Add date to list fd.foundInText = textElem.text; fd.posnInText = match.Index; fd.matchLength = match.Length; fd.dateTime = dt; fd.dateMatchType = matchType; fd.locationOfDateOnPagePercent = textElem.bounds; fd.matchFactor = matchResultFactor; datesResult.Add(fd); } catch { } } }
private static void AddCompletedDateToList(string srcStr, DocRectangle textBounds, double matchFactor, int year, int month, int day, bool bMonthFromChars, bool bRangeIndicatorFound, int firstMatchPos, int lastMatchPos, DateSrchInfo dateSrchInfo, int pageNum, List<ExtractedDate> datesResult) { double finalMatchFactor = matchFactor; ExtractedDate fd = new ExtractedDate(); if (bRangeIndicatorFound) finalMatchFactor += 10; // Bump the match factor for dates in the top 40% of page - letterhead dates if (textBounds.Y < 40) finalMatchFactor += MATCH_FACTOR_BUMP_FOR_TOP_40_PC_OF_PAGE; // Year if (year < 80) { year += 2000; fd.yearWas2Digit = true; } else if (year < 100) { year += 1900; fd.yearWas2Digit = true; } else { finalMatchFactor += MATCH_FACTOR_BUMP_FOR_4_DIGIT_YEAR; } // Month if (bMonthFromChars) finalMatchFactor += MATCH_FACTOR_BUMP_FOR_TEXT_MONTH; // Check for bump if (dateSrchInfo.bPlusOneMonth) { month += 1; if (month > 12) { month = 1; year++; } } // Day if (day == -1) { day = 1; fd.dayWasMissing = true; finalMatchFactor += MATCH_FACTOR_BUMP_FOR_DAY_MISSING; } if (day > DateTime.DaysInMonth(year, month)) day = DateTime.DaysInMonth(year, month); if (day < 1) day = 1; // Create datetime DateTime dt = DateTime.MinValue; try { dt = new DateTime(year, month, day); } catch { } // Add date to list fd.foundInText = srcStr; fd.pageNum = pageNum; fd.posnInText = firstMatchPos; fd.matchLength = lastMatchPos-firstMatchPos+1; fd.dateTime = dt; fd.dateMatchType = ExtractedDate.DateMatchType.LongDate; fd.locationOfDateOnPagePercent = textBounds; fd.matchFactor = finalMatchFactor; datesResult.Add(fd); }