private void imageDocToFile_MouseMove(object sender, MouseEventArgs e) { // Only show tooltips when there is no touch select going on if (_touchFromPageText != TouchFromPageText.TOUCH_NONE) return; // Check if window is in foreground - remove popup if it isn't if (!IsActive) { imageDocToFileToolTip.IsOpen = false; return; } // Show tool tip Point curMousePoint = e.GetPosition(imageDocToFile); Point docCoords = ConvertImagePointToDocPoint(imageDocToFile, curMousePoint.X, curMousePoint.Y); DocRectangle docRect = new DocRectangle(docCoords.X, docCoords.Y, 0, 0); bool bToolTipSet = false; if ((_curDocScanDocInfo != null) && (_curDocScanPages != null)) if ((_curDocDisplay_pageNum > 0) && (_curDocDisplay_pageNum <= _curDocScanPages.scanPagesText.Count)) { if (!imageDocToFileToolTip.IsOpen) imageDocToFileToolTip.IsOpen = true; imageDocToFileToolTip.HorizontalOffset = curMousePoint.X - 50; imageDocToFileToolTip.VerticalOffset = curMousePoint.Y; List<ScanTextElem> scanTextElems = _curDocScanPages.scanPagesText[_curDocDisplay_pageNum - 1]; foreach (ScanTextElem el in scanTextElems) if (el.bounds.Intersects(docRect)) { imageDocToFileToolText.Text = el.text; bToolTipSet = true; break; } } if (!bToolTipSet) { imageDocToFileToolText.Text = ""; imageDocToFileToolTip.IsOpen = false; } e.Handled = true; }
private string FormatLocationStr(DocRectangle docRect) { string st = String.Format("{0:0},{1:0},{2:0},{3:0}", docRect.X, docRect.Y, docRect.Width, docRect.Height); return st; }
private DocRectangle ConvertDocPercentRectToCanvas(DocRectangle docPercentRect) { double tlx = _masterImage.ActualWidth * docPercentRect.X / 100; double tly = _masterImage.ActualHeight * docPercentRect.Y / 100; Point tlPoint = _masterImage.TranslatePoint(new Point(tlx, tly), _uiOverlayCanvas); double wid = _masterImage.ActualWidth * docPercentRect.Width / 100; double hig = _masterImage.ActualHeight * docPercentRect.Height / 100; Point brPoint = _masterImage.TranslatePoint(new Point(tlx + wid, tly + hig), _uiOverlayCanvas); return new DocRectangle(tlPoint.X, tlPoint.Y, brPoint.X - tlPoint.X, brPoint.Y - tlPoint.Y); }
private void imageDocToFile_MouseDown(object sender, MouseButtonEventArgs e) { e.Handled = true; if (_curDocScanPages == null) return; if (_touchFromPageText == TouchFromPageText.TOUCH_NONE) return; // Check for touch point Point curMousePoint = e.GetPosition(imageDocToFile); Point docCoords = ConvertImagePointToDocPoint(imageDocToFile, curMousePoint.X, curMousePoint.Y); DocRectangle docRect = new DocRectangle(docCoords.X, docCoords.Y, 0, 0); int pgNum = _curDocDisplay_pageNum; if (_curDocDisplay_pageNum < 1) pgNum = 1; if (_curDocDisplay_pageNum >= _curDocScanPages.scanPagesText.Count) pgNum = _curDocScanPages.scanPagesText.Count; // Check for date if ((_touchFromPageText == TouchFromPageText.TOUCH_DATE) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_DATE)) { bool earliestDateReq = false; bool latestDateReq = false; List<ExtractedDate> extractedDates = new List<ExtractedDate>(); DocTextAndDateExtractor.SearchForDateItem(_curDocScanPages, "", docRect, 0, extractedDates, ref latestDateReq, ref earliestDateReq, pgNum, false); if (extractedDates.Count <= 0) DocTextAndDateExtractor.SearchForDateItem(_curDocScanPages, "", docRect, 0, extractedDates, ref latestDateReq, ref earliestDateReq, pgNum, true); if (extractedDates.Count > 0) { if (_touchFromPageText == TouchFromPageText.TOUCH_DATE) SetDateRollers(extractedDates[0].dateTime.Year, extractedDates[0].dateTime.Month, extractedDates[0].dateTime.Day, dateRollerChange.none); else datePickerEventDate.SelectedDate = new DateTime(extractedDates[0].dateTime.Year, extractedDates[0].dateTime.Month, extractedDates[0].dateTime.Day); } } else if ((_touchFromPageText == TouchFromPageText.TOUCH_SUFFIX) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_NAME) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_DESC) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_LOCN)) { string extractedText = DocTextAndDateExtractor.ExtractTextFromPage(_curDocScanPages, docRect, pgNum); if (extractedText != "") { if (_touchFromPageText == TouchFromPageText.TOUCH_SUFFIX) txtDestFileSuffix.Text = txtDestFileSuffix.Text + (txtDestFileSuffix.Text.Trim() == "" ? "" : " ") + extractedText; else if (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_NAME) txtEventName.Text = txtEventName.Text + (txtEventName.Text.Trim() == "" ? "" : " ") + extractedText; else if (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_DESC) txtEventDesc.Text = txtEventDesc.Text + (txtEventDesc.Text.Trim() == "" ? "" : " ") + extractedText; else if (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_LOCN) txtEventLocn.Text = txtEventLocn.Text + (txtEventLocn.Text.Trim() == "" ? "" : " ") + extractedText; } } else if (_touchFromPageText == TouchFromPageText.TOUCH_MONEY) { string extractedText = DocTextAndDateExtractor.ExtractTextFromPage(_curDocScanPages, docRect, pgNum); if (extractedText != "") { // Get currency symbol if available int currencyLen = 1; int currencyPos = extractedText.IndexOf('$'); if (currencyPos < 0) currencyPos = extractedText.IndexOf('£'); if (currencyPos < 0) currencyPos = extractedText.IndexOf('€'); // Find number matching money format string noCurrencyStr = extractedText; if ((currencyPos >= 0) && (extractedText.Length > currencyPos + 1)) { noCurrencyStr = extractedText.Substring(currencyPos + 1); int whitespacePos = noCurrencyStr.IndexOf(' '); if (whitespacePos > 0) noCurrencyStr = noCurrencyStr.Substring(0, whitespacePos); } Match match = Regex.Match(noCurrencyStr, @"((?:^\d{1,3}(?:\.?\d{3})*(?:,\d{2})?$))|((?:^\d{1,3}(?:,?\d{3})*(?:\.\d{2})?$)((\d+)?(\.\d{1,2})?))"); if ((match.Success) && (match.Groups.Count > 1)) { // Found string may be ###,###.## or ###.###,## string foundStr = match.Groups[1].Value; if (foundStr.Trim() == "") { foundStr = match.Groups[2].Value; foundStr = foundStr.Replace(",", ""); } else { foundStr = foundStr.Replace(".", ""); } // Form string string numberText = (currencyPos >= 0 ? extractedText.Substring(currencyPos, currencyLen) : "") + foundStr; txtMoneySum.Text = txtMoneySum.Text + (txtMoneySum.Text.Trim() == "" ? "" : " ") + numberText; } else if (currencyPos >= 0) { txtMoneySum.Text = extractedText.Substring(currencyPos, currencyLen) + txtMoneySum.Text; } } } // Cancel touch activity _touchFromPageText = TouchFromPageText.TOUCH_NONE; }
private void docRectChangesComplete(string nameOfMatchingTextBox, int docRectIdx, DocRectangle rectInDocPercent) { // Check which text box the rectangle is in RichTextBox rtb = txtMatchExpression; string cacheTermForRtb = "Match"; if ((nameOfMatchingTextBox == "Date") || ((nameOfMatchingTextBox == "New") && txtDateLocations.IsKeyboardFocused)) { rtb = txtDateLocations; cacheTermForRtb = "Date"; } // Extract string string txtExpr = GetTextFromRichTextBox(rtb); // Parse using our grammar List<ExprParseTerm> exprParseTermList = GetParseResultForMatchText(txtExpr, cacheTermForRtb); // Get current caret position int curCaretPos = GetCaretPos(rtb); // Find where to change/insert the location bool bInserted = false; int bestNewRectPos = txtExpr.Length; string newTextExpr = txtExpr; foreach (ExprParseTerm parseTerm in exprParseTermList) { Run txtRun = new Run(txtExpr.Substring(parseTerm.stPos, parseTerm.termLen)); // Check for location rectangle if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Location) { if (parseTerm.locationBracketIdx == docRectIdx) { newTextExpr = txtExpr.Substring(0, parseTerm.stPos) + FormatLocationStr(rectInDocPercent) + txtExpr.Substring(parseTerm.stPos + parseTerm.termLen); bInserted = true; break; } } else if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Text) if (curCaretPos >= parseTerm.stPos) bestNewRectPos = parseTerm.stPos + parseTerm.termLen; } if (!bInserted) { newTextExpr = txtExpr.Substring(0, bestNewRectPos) + "{" + FormatLocationStr(rectInDocPercent) + "}"; string endOfStr = txtExpr.Substring(bestNewRectPos); if (endOfStr.Trim().Length > 0) { if (endOfStr.Trim().Substring(0, 1) == "{") { int closePos = endOfStr.IndexOf('}'); if (closePos > 0) endOfStr = endOfStr.Substring(closePos + 1); } } newTextExpr += endOfStr; } // All rectangles will get redrawn as text expression is changed and causes trigger to refresh SetTextInRichTextBox(rtb, newTextExpr); }
private bool MatchString(string str, DocRectangle docRectPercent, ScanPages scanPages, int exprIdx, List<DocMatchingTextLoc> matchingTextLocs) { #if TEST_PERF_MATCHSTRING Stopwatch stopWatch1 = new Stopwatch(); stopWatch1.Start(); #endif bool result = false; if (scanPages == null) return result; int elemCount = 0; for (int pageIdx = 0; pageIdx < scanPages.scanPagesText.Count; pageIdx++) { List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageIdx]; for (int elemIdx = 0; elemIdx < scanPageText.Count; elemIdx++) { ScanTextElem textElem = scanPageText[elemIdx]; // Check bounds if (docRectPercent.Intersects(textElem.bounds)) { int mtchPos = textElem.text.IndexOf(str.Trim(), StringComparison.OrdinalIgnoreCase); if (mtchPos >= 0) { result = true; if (matchingTextLocs != null) { DocMatchingTextLoc dtml = new DocMatchingTextLoc(); dtml.pageIdx = pageIdx; dtml.elemIdx = elemIdx; dtml.exprIdx = exprIdx; dtml.posInText = mtchPos; dtml.matchLen = str.Trim().Length; dtml.foundInTxtLen = textElem.text.Length; matchingTextLocs.Add(dtml); } else { // If not compiling all text match locations then return immediately to save time return true; } } } elemCount++; } } #if TEST_PERF_MATCHSTRING stopWatch1.Stop(); logger.Info("CheckForNewDocs : {0:0.00} uS, count {1}", stopWatch1.ElapsedTicks * 1000000.0 / Stopwatch.Frequency, elemCount); #endif return result; }
public static void SearchForDateItem2(ScanPages scanPages, string dateSearchTerm, DocRectangle dateDocRect, double matchFactor, List<ExtractedDate> datesResult, int limitToPageNumN = -1) { int firstPageIdx = 0; int lastPageIdxPlusOne = scanPages.scanPagesText.Count; if (limitToPageNumN != -1) { firstPageIdx = limitToPageNumN - 1; lastPageIdxPlusOne = limitToPageNumN; } for (int pageIdx = firstPageIdx; pageIdx < lastPageIdxPlusOne; pageIdx++) { List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageIdx]; foreach (ScanTextElem textElem in scanPageText) { // Check if there are at least two digits together in the text (any date format requires this at least) if (!Regex.IsMatch(textElem.text, @"\d\d")) continue; // Check bounds if (dateDocRect.Intersects(textElem.bounds)) { // See which date formats to try bool bTryLong = false; bool bTryShort = false; bool bTryUS = false; bool bTryNoZeroes = false; bool bTrySpaceSeparated = false; if (dateSearchTerm.IndexOf("~long", StringComparison.OrdinalIgnoreCase) >= 0) bTryLong = true; if (dateSearchTerm.IndexOf("~short", StringComparison.OrdinalIgnoreCase) >= 0) bTryShort = true; if (dateSearchTerm.IndexOf("~US", StringComparison.OrdinalIgnoreCase) >= 0) bTryUS = true; if (dateSearchTerm.IndexOf("~No0", StringComparison.OrdinalIgnoreCase) >= 0) bTryNoZeroes = true; if (dateSearchTerm.IndexOf("~Spaces", StringComparison.OrdinalIgnoreCase) >= 0) bTrySpaceSeparated = true; if (!(bTryLong | bTryShort)) { bTryLong = true; bTryShort = true; bTryUS = true; bTryNoZeroes = true; bTrySpaceSeparated = true; } // Get match text if any string matchText = dateSearchTerm; int squigPos = dateSearchTerm.IndexOf('~'); if (squigPos >= 0) matchText = dateSearchTerm.Substring(0, squigPos); double matchResultFactor = 0; if (textElem.text.IndexOf(matchText, StringComparison.OrdinalIgnoreCase) >= 0) matchResultFactor = matchFactor; // Try to find dates if (bTryLong) { MatchCollection ldMatches = Regex.Matches(textElem.text, longDateRegex, RegexOptions.IgnoreCase); CoerceMatchesToDates(datesResult, matchResultFactor, textElem, ldMatches, ExtractedDate.DateMatchType.LongDate, 13, 11, 1); if (bTryUS) { MatchCollection usldMatches = Regex.Matches(textElem.text, USlongDateRegex, RegexOptions.IgnoreCase); CoerceMatchesToDates(datesResult, matchResultFactor, textElem, usldMatches, ExtractedDate.DateMatchType.USLongDate, 14, 1, 4); } } if (bTryShort) { MatchCollection sdlzMatches = Regex.Matches(textElem.text, shortDateLeadingZeroesRegex, RegexOptions.IgnoreCase); CoerceMatchesToDates(datesResult, matchResultFactor, textElem, sdlzMatches, ExtractedDate.DateMatchType.ShortDateLeadingZeroes, 3, 2, 1); if (bTryNoZeroes) { MatchCollection sdnlzMatches = Regex.Matches(textElem.text, shortDateNoLeadingZeroesRegex, RegexOptions.IgnoreCase); CoerceMatchesToDates(datesResult, matchResultFactor, textElem, sdnlzMatches, ExtractedDate.DateMatchType.ShortDateNoLeadingZeroes, 3, 2, 1); } if (bTrySpaceSeparated) { MatchCollection sdspMatches = Regex.Matches(textElem.text, shortDateSpacesRegex, RegexOptions.IgnoreCase); CoerceMatchesToDates(datesResult, matchResultFactor, textElem, sdspMatches, ExtractedDate.DateMatchType.ShortDateNoLeadingZeroes, 3, 2, 1); } } } } } }
private void txtDateResult_MouseEnter(object sender, MouseEventArgs e) { // popupDateResult.HorizontalOffset = ptOnImage.X - 100; // popupDateResult.VerticalOffset = ptOnImage.Y; locRectHandler.ClearTextMatchRect("dateMatch"); if (_curDocDisplay_lastMatchResult != null) { // Create popup string string datestr = ""; foreach (ExtractedDate dat in _curDocDisplay_lastMatchResult.datesFoundInDoc) { if (datestr.Length != 0) datestr += "\n"; datestr += dat.dateTime.ToLongDateString() + " (" + dat.matchFactor.ToString() + "%) Page " + dat.pageNum; // Display match locations if (dat.pageNum == _curDocDisplay_pageNum) { Brush colrBrush = Brushes.Firebrick; DocRectangle inRect = dat.locationOfDateOnPagePercent; DocRectangle computedLocation = new DocRectangle(inRect.X, inRect.Y, inRect.Width, inRect.Height); double wid = computedLocation.Width; double inx = computedLocation.X; computedLocation.X = inx + dat.posnInText * wid / dat.foundInText.Length; computedLocation.Width = dat.matchLength * wid / dat.foundInText.Length; locRectHandler.DrawTextMatchRect(computedLocation, colrBrush, "dateMatch"); } } popupDateResultText.Text = datestr; if (!popupDateResult.IsOpen) popupDateResult.IsOpen = true; } e.Handled = true; }
public static string ExtractTextFromPage(ScanPages scanPages, DocRectangle docRect, int pageNum) { int pageIdx = pageNum-1; if ((pageIdx < 0) || (pageIdx >= scanPages.scanPagesText.Count)) return ""; // Get page to search List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageNum-1]; // Iterate text elements foreach (ScanTextElem textElem in scanPageText) { // Check rectangle bounds if (!docRect.Intersects(textElem.bounds)) continue; // Return first match return textElem.text; } return ""; }
public static void SearchForDateItem(ScanPages scanPages, string dateSearchTerm, DocRectangle dateDocRect, double matchFactor, List<ExtractedDate> datesResult, ref bool latestDateRequested, ref bool earliestDateRequested, int limitToPageNumN = -1, bool ignoreWhitespace = false) { // Get date search info DateSrchInfo dateSrchInfo = GetDateSearchInfo(dateSearchTerm); if (dateSrchInfo.bEarliestDate) earliestDateRequested = true; if (dateSrchInfo.bLatestDate) latestDateRequested = true; // Find first and last pages to search int firstPageIdx = 0; int lastPageIdxPlusOne = scanPages.scanPagesText.Count; if (limitToPageNumN != -1) { firstPageIdx = limitToPageNumN - 1; lastPageIdxPlusOne = limitToPageNumN; } // Iterate pages for (int pageIdx = firstPageIdx; pageIdx < lastPageIdxPlusOne; pageIdx++) { List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageIdx]; string joinedText = ""; // This maybe used if ~join macrocommand used int joinCount = 0; double matchFactorForThisPage = matchFactor + (pageIdx == 0 ? MATCH_FACTOR_BUMP_FOR_PAGE1 : (pageIdx == 1 ? MATCH_FACTOR_BUMP_FOR_PAGE2 : 0)); // Iterate text elements foreach (ScanTextElem textElem in scanPageText) { // Check that the text contains at least two digits together to avoid wasting time looking for dates where there can be none if (!Regex.IsMatch(textElem.text, @"\d\d")) continue; // Check rectangle bounds if (!dateDocRect.Intersects(textElem.bounds)) continue; // Check for join if (dateSrchInfo.bJoinTextInRect) { if (joinCount < MAX_TEXT_ELEMS_TO_JOIN) joinedText += textElem.text + " "; joinCount++; continue; } // Search within the found text SearchWithinString(textElem.text, textElem.bounds, dateSearchTerm, dateSrchInfo, matchFactorForThisPage, pageIdx, datesResult, ignoreWhitespace); } // If joined then search just once if (dateSrchInfo.bJoinTextInRect) SearchWithinString(joinedText, dateDocRect, dateSearchTerm, dateSrchInfo, matchFactorForThisPage, pageIdx, datesResult, ignoreWhitespace); } // TEST TEST TEST #if TEST_AGAINST_OLD_DATE_ALGORITHM { List<ExtractedDate> testDatesResult = new List<ExtractedDate>(); SearchForDateItem2(scanPages, dateSearchTerm, dateDocRect, matchFactor, testDatesResult, limitToPageNumN); stp2.Stop(); Console.WriteLine("File: " + scanPages.uniqName + " OldTime = " + stp2.ElapsedMilliseconds.ToString() + " NewTime = " + stp.ElapsedMilliseconds.ToString()); foreach (ExtractedDate newD in datesResult) { bool bFound = false; foreach (ExtractedDate oldD in testDatesResult) { if (oldD.dateTime == newD.dateTime) { bFound = true; break; } } if (!bFound) { Console.WriteLine("Date Mismatch New=" + newD.dateTime.ToLongDateString()); } } foreach (ExtractedDate oldD in testDatesResult) { bool bFound = false; foreach (ExtractedDate newD in datesResult) { if (oldD.dateTime == newD.dateTime) { bFound = true; break; } } if (!bFound) { Console.WriteLine("Date Mismatch Old=" + oldD.dateTime.ToLongDateString()); } } } #endif }
public bool Intersects(DocRectangle rect) { if (X > rect.BottomRightX) return false; if (BottomRightX < rect.X) return false; if (Y > rect.BottomRightY) return false; if (BottomRightY < rect.Y) return false; return true; }
public bool Contains(DocRectangle rect) { return (this.X <= rect.X) && ((rect.X + rect.Width) <= (this.X + this.Width)) && (this.Y <= rect.Y) && ((rect.Y + rect.Height) <= (this.Y + this.Height)); }
public void DrawTextMatchRect(DocRectangle docRect, Brush colr, string txtMatchStr) { Rectangle rect = new Rectangle(); rect.Opacity = 0.5; rect.Fill = colr; DocRectangle canvasRect = ConvertDocPercentRectToCanvas(docRect); rect.Width = canvasRect.Width; rect.Height = canvasRect.Height; rect.Tag = txtMatchStr; rect.IsHitTestVisible = false; _uiOverlayCanvas.Children.Add(rect); rect.SetValue(Canvas.LeftProperty, canvasRect.X); rect.SetValue(Canvas.TopProperty, canvasRect.Y); }
private string GetTextFromPageIntersectWithRect(DocRectangle ptInDocPercent) { List<ScanTextElem> scanTextElems = _curDocDisplay_scanPages.scanPagesText[_curDocDisplay_pageNum - 1]; foreach (ScanTextElem el in scanTextElems) if (el.bounds.Intersects(ptInDocPercent)) return el.text; return ""; }
private static void AddCompletedDateToList(string srcStr, DocRectangle textBounds, double matchFactor, int year, int month, int day, bool bMonthFromChars, bool bRangeIndicatorFound, int firstMatchPos, int lastMatchPos, DateSrchInfo dateSrchInfo, int pageNum, List<ExtractedDate> datesResult) { double finalMatchFactor = matchFactor; ExtractedDate fd = new ExtractedDate(); if (bRangeIndicatorFound) finalMatchFactor += 10; // Bump the match factor for dates in the top 40% of page - letterhead dates if (textBounds.Y < 40) finalMatchFactor += MATCH_FACTOR_BUMP_FOR_TOP_40_PC_OF_PAGE; // Year if (year < 80) { year += 2000; fd.yearWas2Digit = true; } else if (year < 100) { year += 1900; fd.yearWas2Digit = true; } else { finalMatchFactor += MATCH_FACTOR_BUMP_FOR_4_DIGIT_YEAR; } // Month if (bMonthFromChars) finalMatchFactor += MATCH_FACTOR_BUMP_FOR_TEXT_MONTH; // Check for bump if (dateSrchInfo.bPlusOneMonth) { month += 1; if (month > 12) { month = 1; year++; } } // Day if (day == -1) { day = 1; fd.dayWasMissing = true; finalMatchFactor += MATCH_FACTOR_BUMP_FOR_DAY_MISSING; } if (day > DateTime.DaysInMonth(year, month)) day = DateTime.DaysInMonth(year, month); if (day < 1) day = 1; // Create datetime DateTime dt = DateTime.MinValue; try { dt = new DateTime(year, month, day); } catch { } // Add date to list fd.foundInText = srcStr; fd.pageNum = pageNum; fd.posnInText = firstMatchPos; fd.matchLength = lastMatchPos-firstMatchPos+1; fd.dateTime = dt; fd.dateMatchType = ExtractedDate.DateMatchType.LongDate; fd.locationOfDateOnPagePercent = textBounds; fd.matchFactor = finalMatchFactor; datesResult.Add(fd); }
private void tooltipCallback_MouseMove(Point ptOnImage, DocRectangle ptInDocPercent) { // Close tooltip if window isn't active if (!IsActive) { exampleFileImageToolTip.IsOpen = false; return; } bool bToolTipSet = false; if (_curDocDisplay_scanPages != null) if ((_curDocDisplay_pageNum > 0) && (_curDocDisplay_pageNum <= _curDocDisplay_scanPages.scanPagesText.Count)) { if (!exampleFileImageToolTip.IsOpen) exampleFileImageToolTip.IsOpen = true; exampleFileImageToolTip.HorizontalOffset = ptOnImage.X - 100; exampleFileImageToolTip.VerticalOffset = ptOnImage.Y; string pgText = GetTextFromPageIntersectWithRect(ptInDocPercent); if (pgText != "") { exampleFileImageToolText.Text = pgText; bToolTipSet = true; } } if (!bToolTipSet) { exampleFileImageToolText.Text = ""; exampleFileImageToolTip.IsOpen = false; } }
public static List<ExtractedDate> ExtractDatesFromDoc(ScanPages scanPages, string dateExpr, out int bestDateIdx) { bestDateIdx = 0; List<ExtractedDate> datesResult = new List<ExtractedDate>(); if (scanPages == null) return datesResult; // Extract location rectangles from doctype List<ExprParseTerm> parseTerms = DocTypesMatcher.ParseDocMatchExpression(dateExpr, 0); bool bAtLeastOneExprSearched = false; string lastDateSearchTerm = ""; double lastDateSearchMatchFactor = 0; bool latestDateRequested = false; bool earliestDateRequested = false; foreach (ExprParseTerm parseTerm in parseTerms) { if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Text) { if (lastDateSearchTerm != "") { SearchForDateItem(scanPages, lastDateSearchTerm, new DocRectangle(0, 0, 100, 100), lastDateSearchMatchFactor, datesResult, ref latestDateRequested, ref earliestDateRequested); bAtLeastOneExprSearched = true; } lastDateSearchTerm = dateExpr.Substring(parseTerm.stPos, parseTerm.termLen); // Reset matchFactor for next search term lastDateSearchMatchFactor = 0; } else if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Location) { string locStr = dateExpr.Substring(parseTerm.stPos, parseTerm.termLen); DocRectangle lastDateSearchRect = new DocRectangle(locStr); SearchForDateItem(scanPages, lastDateSearchTerm, lastDateSearchRect, lastDateSearchMatchFactor, datesResult, ref latestDateRequested, ref earliestDateRequested); lastDateSearchTerm = ""; lastDateSearchMatchFactor = 0; bAtLeastOneExprSearched = true; } else if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_MatchFactor) { if (dateExpr.Length > parseTerm.stPos + 1) { string valStr = dateExpr.Substring(parseTerm.stPos + 1, parseTerm.termLen-1); Double.TryParse(valStr, out lastDateSearchMatchFactor); } } } // There may be one last expression still to find - but be sure that at least one is searched for if ((lastDateSearchTerm != "") || (!bAtLeastOneExprSearched)) SearchForDateItem(scanPages, lastDateSearchTerm, new DocRectangle(0, 0, 100, 100), lastDateSearchMatchFactor, datesResult, ref latestDateRequested, ref earliestDateRequested); // If required check for the earliest and/or latest dates and bump their factors DateTime earliestDate = DateTime.MaxValue; DateTime latestDate = DateTime.MinValue; int earliestIdx = -1; int latestIdx = -1; for (int dateIdx = 0; dateIdx < datesResult.Count; dateIdx++) { if (earliestDate > datesResult[dateIdx].dateTime) { earliestDate = datesResult[dateIdx].dateTime; earliestIdx = dateIdx; } if (latestDate < datesResult[dateIdx].dateTime) { latestDate = datesResult[dateIdx].dateTime; latestIdx = dateIdx; } } if (earliestDateRequested && (earliestIdx != -1)) datesResult[earliestIdx].matchFactor += MATCH_FACTOR_BUMP_FOR_EARLIEST_DATE; if (latestDateRequested && (latestIdx != -1)) datesResult[latestIdx].matchFactor += MATCH_FACTOR_BUMP_FOR_LATEST_DATE; // Find the best date index based on highest match factor bestDateIdx = 0; double highestDateMatchFactor = 0; for (int dateIdx = 0; dateIdx < datesResult.Count; dateIdx++) { if (highestDateMatchFactor < datesResult[dateIdx].matchFactor) { bestDateIdx = dateIdx; highestDateMatchFactor = datesResult[dateIdx].matchFactor; } } return datesResult; }
private void DisplayMatchResultForDoc(DocTypeMatchResult matchRslt, List<DocMatchingTextLoc> matchingTextLocs) { List<Brush> exprColrBrushes = new List<Brush> { Brushes.DarkMagenta, Brushes.Green, Brushes.Red, Brushes.Orange, Brushes.Purple, Brushes.Peru, Brushes.Purple }; if (matchRslt.docDate != DateTime.MinValue) txtDateResult.Text = matchRslt.docDate.ToLongDateString(); else txtDateResult.Text = ""; // Display match status string matchFactorStr = String.Format("{0}", (int)matchRslt.matchFactor); if (matchRslt.matchCertaintyPercent == 100) { txtCheckResult.Text = "MATCHES (" + matchFactorStr + "%)"; txtCheckResult.Foreground = Brushes.White; txtCheckResult.Background = Brushes.Green; } else { txtCheckResult.Text = "FAILED (" + matchFactorStr + "%)"; txtCheckResult.Foreground = Brushes.White; txtCheckResult.Background = Brushes.Red; } // Display matching text locations locRectHandler.ClearTextMatchRect("txtMatch"); foreach (DocMatchingTextLoc txtLoc in matchingTextLocs) { if (txtLoc.pageIdx+1 == _curDocDisplay_pageNum) { DocRectangle inRect = _curDocDisplay_scanPages.scanPagesText[txtLoc.pageIdx][txtLoc.elemIdx].bounds; Brush colrBrush = exprColrBrushes[txtLoc.exprIdx % exprColrBrushes.Count]; DocRectangle computedLocation = new DocRectangle(inRect.X, inRect.Y, inRect.Width, inRect.Height); double wid = computedLocation.Width; double inx = computedLocation.X; computedLocation.X = inx + txtLoc.posInText * wid / txtLoc.foundInTxtLen; computedLocation.Width = txtLoc.matchLen * wid / txtLoc.foundInTxtLen; locRectHandler.DrawTextMatchRect(_curDocDisplay_scanPages.scanPagesText[txtLoc.pageIdx][txtLoc.elemIdx].bounds, colrBrush, "txtMatch"); } } }
private static void SearchWithinString(string inStr, DocRectangle textBounds, string dateSearchTerm, DateSrchInfo dateSrchInfo, double matchFactor, int pageIdx, List<ExtractedDate> datesResult, bool ignoreWhitespace) { int numDatesFoundInString = 0; int year = -1; // Use regex to find financial if (dateSrchInfo.bFinancialYearEnd) { const string finYearEndRegex = @"year end.{0,16}?\s?((19|20)?(\d\d))"; Match fyMatch = Regex.Match(inStr, finYearEndRegex, RegexOptions.IgnoreCase); if (fyMatch.Success) { if (fyMatch.Groups.Count > 1) { // Add result year = Convert.ToInt32(fyMatch.Groups[1].Value); AddCompletedDateToList(inStr, textBounds, 100, year, 4, 5, false, false, fyMatch.Index, fyMatch.Length, dateSrchInfo, pageIdx+1, datesResult); numDatesFoundInString++; } } } // Start at the beginning of the string string s = inStr; if (ignoreWhitespace) s = s.Replace(" ", ""); int dateSrchPos = 0; int chIdx = 0; string curStr = ""; int day = -1; int month = -1; bool bMonthFromChars = false; year = -1; s = s.ToLower(); bool strIsDigits = false; int firstMatchPos = -1; int lastMatchPos = 0; int commaCount = 0; bool bRangeIndicatorFound = false; int numSepChars = 0; for (chIdx = 0; chIdx < s.Length; chIdx++) { char ch = s[chIdx]; bool bResetNeeded = false; // Search element DateElemSrch el = null; int minChars = 1; int maxChars = 9; if (dateSrchPos < dateSrchInfo.dateEls.Count) { el = dateSrchInfo.dateEls[dateSrchPos]; minChars = el.minChars; maxChars = el.maxChars; } // Check if digits required if ((el == null) || (el.isDigits)) { char testCh = ch; if ((testCh == 'l') || (testCh == 'o')) { if (((strIsDigits) && (curStr.Length > 0)) || ((chIdx+1 < s.Length) && (Char.IsDigit(s[chIdx+1])))) { if (testCh == 'l') testCh = '1'; else if (testCh == 'o') testCh = '0'; else if (testCh == 'i') testCh = '1'; } } if (Char.IsDigit(testCh)) { numSepChars = 0; // Ignore if it's a zero and we're not allowed leading zeroes // if ((el != null) && (!el.allowLeadingZeroes) && (curStrPos == 0) && (ch == '0')) // continue; if (!strIsDigits) curStr = ""; curStr += testCh; strIsDigits = true; if (curStr.Length < minChars) continue; // Check max chars if (curStr.Length > maxChars) { curStr = ""; continue; } // Check if the next char is also a digit - if not then we've found what we're looking for if (((chIdx + 1 >= s.Length) || (!Char.IsDigit(s[chIdx + 1]))) && (curStr != "0")) { // Is this a day / month or year?? DateElemSrch.DateElType elType = DateElemSrch.DateElType.DE_NONE; if (el != null) elType = el.dateElType; else { // Handle one and two digit numbers if (curStr.Length <= 2) { // Already had a char based month? if (bMonthFromChars) { if (!dateSrchInfo.bIsUsDate) elType = DateElemSrch.DateElType.DE_YEAR; else elType = DateElemSrch.DateElType.DE_DAY; } else { // Position for standard month? if ((dateSrchPos == 1) && (!dateSrchInfo.bIsUsDate)) elType = DateElemSrch.DateElType.DE_MONTH; // Position for US month? else if ((dateSrchPos == 0) && (dateSrchInfo.bIsUsDate)) elType = DateElemSrch.DateElType.DE_MONTH; else if (dateSrchPos < 2) elType = DateElemSrch.DateElType.DE_DAY; else if ((dateSrchPos > 0) && (curStr.Length == 2)) elType = DateElemSrch.DateElType.DE_YEAR; } } else if (curStr.Length == 4) { // Num digits == 4 if (dateSrchPos > 0) elType = DateElemSrch.DateElType.DE_YEAR; } } // Handle the value if (elType == DateElemSrch.DateElType.DE_DAY) { Int32.TryParse(curStr, out day); if ((day < 1) || (day > 31)) day = -1; } else if (elType == DateElemSrch.DateElType.DE_MONTH) { Int32.TryParse(curStr, out month); if ((month < 1) || (month > 12)) month = -1; bMonthFromChars = false; } else if (elType == DateElemSrch.DateElType.DE_YEAR) { Int32.TryParse(curStr, out year); if (curStr.Length == 2) { if ((year < 0) || (year > 100)) year = -1; } else if (curStr.Length == 4) { if ((year < 1800) || (year > 2200)) year = -1; } // If no date formatting string is used then year must be the last item if ((el == null) && (year != -1)) bResetNeeded = true; } else { curStr = ""; continue; } if (firstMatchPos == -1) firstMatchPos = chIdx - curStr.Length; lastMatchPos = chIdx; dateSrchPos++; curStr = ""; } } } if ((el == null) || (!el.isDigits)) { if (Char.IsLetter(ch)) { if (strIsDigits) curStr = ""; strIsDigits = false; // Check we're still looking for a month value if (month != -1) { numSepChars++; continue; } // Form a sub-string to test for month names curStr += ch; // Check for range indicator if (numDatesFoundInString == 1) { if (chIdx - curStr.Length - 1 > 0) { string testStr = s.Substring(chIdx - curStr.Length - 1); if (testStr.Contains(" to") || testStr.Contains(" to")) bRangeIndicatorFound = true; } } // No point checking for month strings until 3 chars got if (curStr.Length < 3) continue; // Check for a month name if (shortMonthStrings.Any(curStr.Contains)) { for (int monIdx = 0; monIdx < shortMonthStrings.Length; monIdx++) if (curStr.Contains(shortMonthStrings[monIdx])) { month = monIdx + 1; bMonthFromChars = true; break; } if (firstMatchPos == -1) firstMatchPos = chIdx - curStr.Length; lastMatchPos = chIdx; dateSrchPos++; curStr = ""; numSepChars = 0; // Move chIdx on to skip to next non letter while ((chIdx < s.Length-1) && (Char.IsLetter(s[chIdx+1]))) chIdx++; // Check for another valid month string in next few chars to detect ranges without a year // e.g. should find ranges like 3 Jan - 4 Mar 2011 or 1st Jan to 31st May 2013 // but exlude ranges like 3 Jan 2012 - 4 Mar 2012 which would be seen as two separate dates if (!dateSrchInfo.bNoDateRanges) { string strNextStr = ""; bool bStrRangeIndicatorFound = false; int digitGroups = 0; bool isInDigitGroup = false; for (int chNext = chIdx+1; (chNext < s.Length) && (chNext < chIdx + 15); chNext++) { // Count the groups of digits // (if we find two groups then break out as it's probably a range that contains separate years) if (Char.IsDigit(s[chNext])) { if (!isInDigitGroup) { isInDigitGroup = true; digitGroups++; if (digitGroups >= 2) break; } } // Form a string from letters found else if (Char.IsLetter(s[chNext])) { isInDigitGroup = false; strNextStr += s[chNext]; // Check if the string contains "to" if (strNextStr.Length >= 2) if (strNextStr.Contains("to")) bStrRangeIndicatorFound = true; // Check if the string contains a short month name if (bStrRangeIndicatorFound && (strNextStr.Length >= 3)) if (shortMonthStrings.Any(strNextStr.Contains)) { bResetNeeded = true; break; } } else { // Check punctuation - this assumes a - is a range seperator isInDigitGroup = false; if (s[chNext] == '-') bStrRangeIndicatorFound = true; strNextStr = ""; } } } else { bResetNeeded = true; } } } } // Check for whitespace/punctuation/etc if (!Char.IsLetterOrDigit(ch)) { if ((day != -1) || (month != -1) || (year != -1)) { numSepChars++; if (numSepChars > MAX_SEP_CHARS_BETWEEN_DATE_ELEMS) { bResetNeeded = true; numSepChars = 0; } } curStr = ""; switch (ch) { case ':': { if (!dateSrchInfo.bAllowColons) bResetNeeded = true; break; } case ',': { commaCount++; if ((!dateSrchInfo.bAllowTwoCommas) && (commaCount > 1)) bResetNeeded = true; break; } case '.': { if (!dateSrchInfo.bAllowDots) bResetNeeded = true; break; } case '-': { if (numDatesFoundInString == 1) bRangeIndicatorFound = true; break; } } } // Check for complete date if ((year != -1) && (month != -1) && ((day != -1) || (bMonthFromChars))) { // Add result AddCompletedDateToList(s, textBounds, matchFactor, year, month, day, bMonthFromChars, bRangeIndicatorFound, firstMatchPos, lastMatchPos, dateSrchInfo, pageIdx+1, datesResult); numDatesFoundInString++; // Start again to see if another date can be found curStr = ""; bResetNeeded = true; } // Restart the process of finding a date if required if (bResetNeeded) { dateSrchPos = 0; day = -1; month = -1; year = -1; bMonthFromChars = false; strIsDigits = false; firstMatchPos = -1; bResetNeeded = false; commaCount = 0; numSepChars = 0; } } }
private bool EvalMatch(string matchExpression, StringTok st, ScanPages scanPages, ref double matchFactorTotal, ref int curExpressionIdx, List<DocMatchingTextLoc> matchingTextLocs) { bool result = false; string token = ""; bool curOpIsOr = true; bool opIsInverse = false; DocRectangle docRectPercent = new DocRectangle(0, 0, 100, 100); int docRectValIdx = 0; double matchFactorForTerm = 0; #if TEST_PERF_EVALMATCH Stopwatch stopWatch1 = new Stopwatch(); stopWatch1.Start(); #endif while((token = st.GetNextToken()) != null) { if (token.Trim() == "") continue; else if (token == ")") return result; else if (token == "(") { bool tmpRslt = EvalMatch(matchExpression, st, scanPages, ref matchFactorTotal, ref curExpressionIdx, matchingTextLocs); if (opIsInverse) tmpRslt = !tmpRslt; if (curOpIsOr) result |= tmpRslt; else result &= tmpRslt; } else if (token == "&") curOpIsOr = false; else if (token == "|") curOpIsOr = true; else if (token == "!") opIsInverse = true; else { // We've reached a terminal token (string to match to text in the document) string stringToMatch = token; // Check for matchFactor - must have some text before it if (token == ":") return result; // See if there is a location defined by the next token while ((st.PeekNextToken() != null) && (st.PeekNextToken() == "")) st.GetNextToken(); if ((st.PeekNextToken() != null) && (st.PeekNextToken() == ":")) { matchFactorForTerm = 0; st.GetNextToken(); while ((st.PeekNextToken() != null) && (st.PeekNextToken() == "")) st.GetNextToken(); token = st.GetNextToken(); if (token != null) Double.TryParse(token, out matchFactorForTerm); } // Check for location on empty string if (token == "{") return result; // See if there is a location defined by the next token while ((st.PeekNextToken() != null) && (st.PeekNextToken() == "")) st.GetNextToken(); if ((st.PeekNextToken() != null) && (st.PeekNextToken() == "{")) { while ((token = st.GetNextToken()) != null) { if (token == "") continue; else if (token == "{") docRectValIdx = 0; else if (token == ",") docRectValIdx++; else if (token == "}") break; else { double rectVal = 0; Double.TryParse(token, out rectVal); docRectPercent.SetVal(docRectValIdx, rectVal); } } } // Process the match string using the location rectangle // The check for curOpIsOr || result is to avoid unnecessary work if the expression is already false and we're doing a AND if ((stringToMatch.Trim().Length >= 0) && (curOpIsOr || result)) { bool tmpRslt = MatchString(stringToMatch, docRectPercent, scanPages, curExpressionIdx, matchingTextLocs); if (opIsInverse) tmpRslt = !tmpRslt; if (curOpIsOr) result |= tmpRslt; else result &= tmpRslt; // Clear the inverse operator after 1 use opIsInverse = false; // Handle match factor if (tmpRslt) matchFactorTotal += matchFactorForTerm; } // Set the docRect to the entire page (ready for next term) docRectPercent = new DocRectangle(0,0,100,100); matchFactorForTerm = 0; curExpressionIdx++; } } #if TEST_PERF_EVALMATCH stopWatch1.Stop(); logger.Info("EvalMatch : {0:0.00} uS, expr {1}", stopWatch1.ElapsedTicks * 1000000.0 / Stopwatch.Frequency, matchExpression); #endif return result; }
public ScanTextElem(string a_text, DocRectangle a_bounds) { text = a_text; bounds = a_bounds; }
private DocRectangle ConvertToDocRect(iTextSharp.text.pdf.parser.Vector topLeftCoord, iTextSharp.text.pdf.parser.Vector bottomRightCoord, iTextSharp.text.Rectangle pageRect, int pageRotation) { double tlX = topLeftCoord.Dot(new iTextSharp.text.pdf.parser.Vector(1, 0, 0)); double tlY = topLeftCoord.Dot(new iTextSharp.text.pdf.parser.Vector(0, 1, 0)); double width = bottomRightCoord.Dot(new iTextSharp.text.pdf.parser.Vector(1, 0, 0)) - tlX; double height = tlY - bottomRightCoord.Dot(new iTextSharp.text.pdf.parser.Vector(0, 1, 0)); DocRectangle docRect = new DocRectangle(tlX * 100 / pageRect.Width, (pageRect.Height - tlY) * 100 / pageRect.Height, width * 100 / pageRect.Width, height * 100 / pageRect.Height); docRect.RotateAt(pageRotation, 50, 50); return docRect; }
private DocRectangle ConvertCanvasRectToDocPercent(DocRectangle canvasRect) { Point tlPoint = _uiOverlayCanvas.TranslatePoint(new Point(canvasRect.X, canvasRect.Y), _masterImage); double tlx = 100 * tlPoint.X / _masterImage.ActualWidth; double tly = 100 * tlPoint.Y / _masterImage.ActualHeight; Point brPoint = _uiOverlayCanvas.TranslatePoint(new Point(canvasRect.BottomRightX, canvasRect.BottomRightY), _masterImage); double brx = 100 * brPoint.X / _masterImage.ActualWidth; double bry = 100 * brPoint.Y / _masterImage.ActualHeight; return new DocRectangle(tlx, tly, brx - tlx, bry - tly); }