public DocTypeMatchResult GetMatchingDocType(ScanPages scanPages, List<DocTypeMatchResult> listOfPossibleMatches = null) { // Get list of types DocTypeMatchResult bestMatchResult = new DocTypeMatchResult(); var collection_doctypes = GetDocTypesCollection(); MongoCursor<DocType> foundSdf = collection_doctypes.Find(Query.EQ("isEnabled", true)); #if TEST_PERF_GETMATCHINGDOCTYPE Stopwatch stopWatch1 = new Stopwatch(); Stopwatch stopWatch2 = new Stopwatch(); #endif foreach (DocType doctype in foundSdf) { #if TEST_PERF_GETMATCHINGDOCTYPE stopWatch1.Start(); #endif // Check if document matches DocTypeMatchResult matchResult = CheckIfDocMatches(scanPages, doctype, false, null); #if TEST_PERF_GETMATCHINGDOCTYPE stopWatch1.Stop(); stopWatch2.Start(); #endif // Find the best match bool bThisIsBestMatch = false; if (bestMatchResult.matchCertaintyPercent < matchResult.matchCertaintyPercent) bThisIsBestMatch = true; else if (bestMatchResult.matchCertaintyPercent == matchResult.matchCertaintyPercent) if (bestMatchResult.matchFactor < matchResult.matchFactor) bThisIsBestMatch = true; // Redo match to get date and time info if (bThisIsBestMatch) { matchResult = CheckIfDocMatches(scanPages, doctype, true, null); bestMatchResult = matchResult; } // Check if this should be returned in the list of best matches if (listOfPossibleMatches != null) if ((matchResult.matchCertaintyPercent > 0) || (matchResult.matchFactor > 0)) listOfPossibleMatches.Add(matchResult); #if TEST_PERF_GETMATCHINGDOCTYPE stopWatch2.Stop(); #endif } #if TEST_PERF_GETMATCHINGDOCTYPE logger.Info("T1 : {0}ms, T2 : {1}ms", stopWatch1.ElapsedMilliseconds, stopWatch2.ElapsedMilliseconds); #endif // If no exact match get date info from entire doc if (bestMatchResult.matchCertaintyPercent != 100) { int bestDateIdx = 0; List<ExtractedDate> extractedDates = DocTextAndDateExtractor.ExtractDatesFromDoc(scanPages, "", out bestDateIdx); bestMatchResult.datesFoundInDoc = extractedDates; if (extractedDates.Count > 0) bestMatchResult.docDate = extractedDates[bestDateIdx].dateTime; } // If list of best matches to be returned then sort that list now if (listOfPossibleMatches != null) { listOfPossibleMatches = listOfPossibleMatches.OrderByDescending(o => o.matchCertaintyPercent).ThenBy(o => o.matchFactor).ToList(); } return bestMatchResult; }
public DocTypeMatchResult CheckIfDocMatches(ScanPages scanPages, DocType docType, bool extractDates, List<DocMatchingTextLoc> matchingTextLocs) { // Setup check info DocTypeMatchResult matchResult = new DocTypeMatchResult(); matchResult.matchCertaintyPercent = 0; matchResult.matchResultCode = DocTypeMatchResult.MatchResultCodes.NOT_FOUND; if (!docType.isEnabled) { matchResult.matchResultCode = DocTypeMatchResult.MatchResultCodes.DISABLED; return matchResult; } if (docType.matchExpression == null) { matchResult.matchResultCode = DocTypeMatchResult.MatchResultCodes.NO_EXPR; return matchResult; } // Check the expression double matchFactorTotal = 0; if (MatchAgainstDocText(docType.matchExpression, scanPages, ref matchFactorTotal, matchingTextLocs)) { matchResult.matchCertaintyPercent = 100; matchResult.matchResultCode = DocTypeMatchResult.MatchResultCodes.FOUND_MATCH; } matchResult.docTypeName = docType.docTypeName; matchResult.matchFactor = matchFactorTotal; // Extract date if (extractDates) { int bestDateIdx = 0; List<ExtractedDate> extractedDates = DocTextAndDateExtractor.ExtractDatesFromDoc(scanPages, docType.dateExpression, out bestDateIdx); matchResult.datesFoundInDoc = extractedDates; if (extractedDates.Count > 0) matchResult.docDate = extractedDates[bestDateIdx].dateTime; } return matchResult; }
private void CheckDisplayedDocForMatchAndShowResult() { if (_curDocDisplay_scanPages == null) { _curDocDisplay_lastMatchResult = null; return; } DocType chkDocType = GetDocTypeFromForm(new DocType()); chkDocType.isEnabled = true; List<DocMatchingTextLoc> matchingTextLocs = new List<DocMatchingTextLoc>(); DocTypeMatchResult matchRslt = _docTypesMatcher.CheckIfDocMatches(_curDocDisplay_scanPages, chkDocType, true, matchingTextLocs); _curDocDisplay_lastMatchResult = matchRslt; DisplayMatchResultForDoc(matchRslt, matchingTextLocs); }
private void DisplayMatchResultForDoc(DocTypeMatchResult matchRslt, List<DocMatchingTextLoc> matchingTextLocs) { List<Brush> exprColrBrushes = new List<Brush> { Brushes.DarkMagenta, Brushes.Green, Brushes.Red, Brushes.Orange, Brushes.Purple, Brushes.Peru, Brushes.Purple }; if (matchRslt.docDate != DateTime.MinValue) txtDateResult.Text = matchRslt.docDate.ToLongDateString(); else txtDateResult.Text = ""; // Display match status string matchFactorStr = String.Format("{0}", (int)matchRslt.matchFactor); if (matchRslt.matchCertaintyPercent == 100) { txtCheckResult.Text = "MATCHES (" + matchFactorStr + "%)"; txtCheckResult.Foreground = Brushes.White; txtCheckResult.Background = Brushes.Green; } else { txtCheckResult.Text = "FAILED (" + matchFactorStr + "%)"; txtCheckResult.Foreground = Brushes.White; txtCheckResult.Background = Brushes.Red; } // Display matching text locations locRectHandler.ClearTextMatchRect("txtMatch"); foreach (DocMatchingTextLoc txtLoc in matchingTextLocs) { if (txtLoc.pageIdx+1 == _curDocDisplay_pageNum) { DocRectangle inRect = _curDocDisplay_scanPages.scanPagesText[txtLoc.pageIdx][txtLoc.elemIdx].bounds; Brush colrBrush = exprColrBrushes[txtLoc.exprIdx % exprColrBrushes.Count]; DocRectangle computedLocation = new DocRectangle(inRect.X, inRect.Y, inRect.Width, inRect.Height); double wid = computedLocation.Width; double inx = computedLocation.X; computedLocation.X = inx + txtLoc.posInText * wid / txtLoc.foundInTxtLen; computedLocation.Width = txtLoc.matchLen * wid / txtLoc.foundInTxtLen; locRectHandler.DrawTextMatchRect(_curDocDisplay_scanPages.scanPagesText[txtLoc.pageIdx][txtLoc.elemIdx].bounds, colrBrush, "txtMatch"); } } }
private void HandleDocMatchingAndDisplay() { #if TEST_PERF_SHOWDOCFIRSTTIME Stopwatch stopWatch1 = new Stopwatch(); Stopwatch stopWatch2 = new Stopwatch(); Stopwatch stopWatch3 = new Stopwatch(); Stopwatch stopWatch4 = new Stopwatch(); Stopwatch stopWatch5 = new Stopwatch(); Stopwatch stopWatch6 = new Stopwatch(); stopWatch1.Start(); #endif #if TEST_PERF_SHOWDOCFIRSTTIME stopWatch1.Stop(); stopWatch2.Start(); #endif // Re-check the document DocTypeMatchResult latestMatchResult; List<DocTypeMatchResult> possMatches = new List<DocTypeMatchResult>(); if (_curDocScanPages != null) latestMatchResult = _docTypesMatcher.GetMatchingDocType(_curDocScanPages, possMatches); else latestMatchResult = new DocTypeMatchResult(); // Check for a new doc - so cancel processing this one if (!_newCurDocProcessingCancel) { #if TEST_PERF_SHOWDOCFIRSTTIME stopWatch2.Stop(); stopWatch3.Start(); #endif // Update the doc type list view for popup List<DocTypeMatchResult> possDocMatches = new List<DocTypeMatchResult>(); foreach (DocTypeMatchResult res in possMatches) possDocMatches.Add(res); // Check for a new doc - so cancel processing this one if (!_newCurDocProcessingCancel) { #if TEST_PERF_SHOWDOCFIRSTTIME stopWatch3.Stop(); stopWatch4.Start(); #endif // Add list of previously used doctypes List<string> lastUsedDocTypes = _scanDocHandler.GetLastNDocTypesUsed(10); foreach (string s in lastUsedDocTypes) { DocTypeMatchResult mr = new DocTypeMatchResult(); mr.docTypeName = s; possDocMatches.Add(mr); } // Check for a new doc - so cancel processing this one if (!_newCurDocProcessingCancel) { #if TEST_PERF_SHOWDOCFIRSTTIME stopWatch4.Stop(); stopWatch5.Start(); #endif this.Dispatcher.BeginInvoke((Action)delegate() { _listOfPossibleDocMatches.Clear(); foreach (DocTypeMatchResult dtmr in possDocMatches) _listOfPossibleDocMatches.Add(dtmr); }); // Check for a new doc - so cancel processing this one if (!_newCurDocProcessingCancel) { #if TEST_PERF_SHOWDOCFIRSTTIME stopWatch5.Stop(); stopWatch6.Start(); #endif // Show type and date ShowDocumentTypeAndDate(latestMatchResult.docTypeName); } } } } #if TEST_PERF_SHOWDOCFIRSTTIME stopWatch6.Stop(); logger.Info("ShowDocFirstTime: {6} A {0:0.00}, B {1:0.00}, C {2:0.00}, D {3:0.00}, E {4:0.00}, F {5:0.00}", stopWatch1.ElapsedTicks * 1000.0 / Stopwatch.Frequency, stopWatch2.ElapsedTicks * 1000.0 / Stopwatch.Frequency, stopWatch3.ElapsedTicks * 1000.0 / Stopwatch.Frequency, stopWatch4.ElapsedTicks * 1000.0 / Stopwatch.Frequency, stopWatch5.ElapsedTicks * 1000.0 / Stopwatch.Frequency, stopWatch6.ElapsedTicks * 1000.0 / Stopwatch.Frequency, _newCurDocProcessingCancel ? "CANCELLED" : ""); #endif }