Exemplo n.º 1
        private void imageDocToFile_MouseMove(object sender, MouseEventArgs e)
            // Only show tooltips when there is no touch select going on
            if (_touchFromPageText != TouchFromPageText.TOUCH_NONE)

            // Check if window is in foreground - remove popup if it isn't
            if (!IsActive)
                imageDocToFileToolTip.IsOpen = false;

            // Show tool tip
            Point curMousePoint = e.GetPosition(imageDocToFile);
            Point docCoords = ConvertImagePointToDocPoint(imageDocToFile, curMousePoint.X, curMousePoint.Y);
            DocRectangle docRect = new DocRectangle(docCoords.X, docCoords.Y, 0, 0);
            bool bToolTipSet = false;
            if ((_curDocScanDocInfo != null) && (_curDocScanPages != null))
                if ((_curDocDisplay_pageNum > 0) && (_curDocDisplay_pageNum <= _curDocScanPages.scanPagesText.Count))
                    if (!imageDocToFileToolTip.IsOpen)
                        imageDocToFileToolTip.IsOpen = true;
                    imageDocToFileToolTip.HorizontalOffset = curMousePoint.X - 50;
                    imageDocToFileToolTip.VerticalOffset = curMousePoint.Y;
                    List<ScanTextElem> scanTextElems = _curDocScanPages.scanPagesText[_curDocDisplay_pageNum - 1];
                    foreach (ScanTextElem el in scanTextElems)
                        if (el.bounds.Intersects(docRect))
                            imageDocToFileToolText.Text = el.text;
                            bToolTipSet = true;
            if (!bToolTipSet)
                imageDocToFileToolText.Text = "";
                imageDocToFileToolTip.IsOpen = false;
            e.Handled = true;
Exemplo n.º 2
 private string FormatLocationStr(DocRectangle docRect)
     string st = String.Format("{0:0},{1:0},{2:0},{3:0}", docRect.X, docRect.Y, docRect.Width, docRect.Height);
     return st;
 private DocRectangle ConvertDocPercentRectToCanvas(DocRectangle docPercentRect)
     double tlx = _masterImage.ActualWidth * docPercentRect.X / 100;
     double tly = _masterImage.ActualHeight * docPercentRect.Y / 100;
     Point tlPoint = _masterImage.TranslatePoint(new Point(tlx, tly), _uiOverlayCanvas);
     double wid = _masterImage.ActualWidth * docPercentRect.Width / 100;
     double hig = _masterImage.ActualHeight * docPercentRect.Height / 100;
     Point brPoint = _masterImage.TranslatePoint(new Point(tlx + wid, tly + hig), _uiOverlayCanvas);
     return new DocRectangle(tlPoint.X, tlPoint.Y, brPoint.X - tlPoint.X, brPoint.Y - tlPoint.Y);
Exemplo n.º 4
        private void imageDocToFile_MouseDown(object sender, MouseButtonEventArgs e)
            e.Handled = true;
            if (_curDocScanPages == null)

            if (_touchFromPageText == TouchFromPageText.TOUCH_NONE)

            // Check for touch point
            Point curMousePoint = e.GetPosition(imageDocToFile);
            Point docCoords = ConvertImagePointToDocPoint(imageDocToFile, curMousePoint.X, curMousePoint.Y);
            DocRectangle docRect = new DocRectangle(docCoords.X, docCoords.Y, 0, 0);
            int pgNum = _curDocDisplay_pageNum;
            if (_curDocDisplay_pageNum < 1)
                pgNum = 1;
            if (_curDocDisplay_pageNum >= _curDocScanPages.scanPagesText.Count)
                pgNum = _curDocScanPages.scanPagesText.Count;

            // Check for date
            if ((_touchFromPageText == TouchFromPageText.TOUCH_DATE) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_DATE))
                bool earliestDateReq = false;
                bool latestDateReq = false;
                List<ExtractedDate> extractedDates = new List<ExtractedDate>();
                DocTextAndDateExtractor.SearchForDateItem(_curDocScanPages, "", docRect, 0, extractedDates, ref latestDateReq, ref earliestDateReq, pgNum, false);
                if (extractedDates.Count <= 0)
                    DocTextAndDateExtractor.SearchForDateItem(_curDocScanPages, "", docRect, 0, extractedDates, ref latestDateReq, ref earliestDateReq, pgNum, true);
                if (extractedDates.Count > 0)
                    if (_touchFromPageText == TouchFromPageText.TOUCH_DATE)
                        SetDateRollers(extractedDates[0].dateTime.Year, extractedDates[0].dateTime.Month, extractedDates[0].dateTime.Day, dateRollerChange.none);
                        datePickerEventDate.SelectedDate = new DateTime(extractedDates[0].dateTime.Year, extractedDates[0].dateTime.Month, extractedDates[0].dateTime.Day);
            else if ((_touchFromPageText == TouchFromPageText.TOUCH_SUFFIX) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_NAME) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_DESC) || (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_LOCN))
                string extractedText = DocTextAndDateExtractor.ExtractTextFromPage(_curDocScanPages, docRect, pgNum);
                if (extractedText != "")
                    if (_touchFromPageText == TouchFromPageText.TOUCH_SUFFIX)
                        txtDestFileSuffix.Text = txtDestFileSuffix.Text + (txtDestFileSuffix.Text.Trim() == "" ? "" : " ") + extractedText;
                    else if (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_NAME)
                        txtEventName.Text = txtEventName.Text + (txtEventName.Text.Trim() == "" ? "" : " ") + extractedText;
                    else if (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_DESC)
                        txtEventDesc.Text = txtEventDesc.Text + (txtEventDesc.Text.Trim() == "" ? "" : " ") + extractedText;
                    else if (_touchFromPageText == TouchFromPageText.TOUCH_EVENT_LOCN)
                        txtEventLocn.Text = txtEventLocn.Text + (txtEventLocn.Text.Trim() == "" ? "" : " ") + extractedText;
            else if (_touchFromPageText == TouchFromPageText.TOUCH_MONEY)
                string extractedText = DocTextAndDateExtractor.ExtractTextFromPage(_curDocScanPages, docRect, pgNum);
                if (extractedText != "")
                    // Get currency symbol if available
                    int currencyLen = 1;
                    int currencyPos = extractedText.IndexOf('$');
                    if (currencyPos < 0)
                        currencyPos = extractedText.IndexOf('£');
                    if (currencyPos < 0)
                        currencyPos = extractedText.IndexOf('€');

                    // Find number matching money format
                    string noCurrencyStr = extractedText;
                    if ((currencyPos >= 0) && (extractedText.Length > currencyPos + 1))
                        noCurrencyStr = extractedText.Substring(currencyPos + 1);
                        int whitespacePos = noCurrencyStr.IndexOf(' ');
                        if (whitespacePos > 0)
                            noCurrencyStr = noCurrencyStr.Substring(0, whitespacePos);
                    Match match = Regex.Match(noCurrencyStr, @"((?:^\d{1,3}(?:\.?\d{3})*(?:,\d{2})?$))|((?:^\d{1,3}(?:,?\d{3})*(?:\.\d{2})?$)((\d+)?(\.\d{1,2})?))");
                    if ((match.Success) && (match.Groups.Count > 1))
                        // Found string may be ###,###.## or ###.###,##
                        string foundStr = match.Groups[1].Value;
                        if (foundStr.Trim() == "")
                            foundStr = match.Groups[2].Value;
                            foundStr = foundStr.Replace(",", "");
                            foundStr = foundStr.Replace(".", "");

                        // Form string
                        string numberText = (currencyPos >= 0 ? extractedText.Substring(currencyPos, currencyLen) : "") + foundStr;
                        txtMoneySum.Text = txtMoneySum.Text + (txtMoneySum.Text.Trim() == "" ? "" : " ") + numberText;
                    else if (currencyPos >= 0)
                        txtMoneySum.Text = extractedText.Substring(currencyPos, currencyLen) + txtMoneySum.Text;

            // Cancel touch activity
            _touchFromPageText = TouchFromPageText.TOUCH_NONE;
Exemplo n.º 5
        private void docRectChangesComplete(string nameOfMatchingTextBox, int docRectIdx, DocRectangle rectInDocPercent)
            // Check which text box the rectangle is in
            RichTextBox rtb = txtMatchExpression;
            string cacheTermForRtb = "Match";
            if ((nameOfMatchingTextBox == "Date") || ((nameOfMatchingTextBox == "New") && txtDateLocations.IsKeyboardFocused))
                rtb = txtDateLocations;
                cacheTermForRtb = "Date";

            // Extract string
            string txtExpr = GetTextFromRichTextBox(rtb);

            // Parse using our grammar
            List<ExprParseTerm> exprParseTermList = GetParseResultForMatchText(txtExpr, cacheTermForRtb);

            // Get current caret position
            int curCaretPos = GetCaretPos(rtb);

            // Find where to change/insert the location
            bool bInserted = false;
            int bestNewRectPos = txtExpr.Length;
            string newTextExpr = txtExpr;
            foreach (ExprParseTerm parseTerm in exprParseTermList)
                Run txtRun = new Run(txtExpr.Substring(parseTerm.stPos, parseTerm.termLen));
                // Check for location rectangle
                if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Location)
                    if (parseTerm.locationBracketIdx == docRectIdx)
                        newTextExpr = txtExpr.Substring(0, parseTerm.stPos) + FormatLocationStr(rectInDocPercent) + txtExpr.Substring(parseTerm.stPos + parseTerm.termLen);
                        bInserted = true;
                else if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Text)
                    if (curCaretPos >= parseTerm.stPos)
                        bestNewRectPos = parseTerm.stPos + parseTerm.termLen;
            if (!bInserted)
                newTextExpr = txtExpr.Substring(0, bestNewRectPos) + "{" + FormatLocationStr(rectInDocPercent) + "}";
                string endOfStr = txtExpr.Substring(bestNewRectPos);
                if (endOfStr.Trim().Length > 0)
                    if (endOfStr.Trim().Substring(0, 1) == "{")
                        int closePos = endOfStr.IndexOf('}');
                        if (closePos > 0)
                            endOfStr = endOfStr.Substring(closePos + 1);
                newTextExpr += endOfStr;

            // All rectangles will get redrawn as text expression is changed and causes trigger to refresh
            SetTextInRichTextBox(rtb, newTextExpr);
Exemplo n.º 6
 private bool MatchString(string str, DocRectangle docRectPercent, ScanPages scanPages, int exprIdx, List<DocMatchingTextLoc> matchingTextLocs)
     Stopwatch stopWatch1 = new Stopwatch();
     bool result = false;
     if (scanPages == null)
         return result;
     int elemCount = 0;
     for (int pageIdx = 0; pageIdx < scanPages.scanPagesText.Count; pageIdx++)
         List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageIdx];
         for (int elemIdx = 0; elemIdx < scanPageText.Count; elemIdx++)
             ScanTextElem textElem = scanPageText[elemIdx];
             // Check bounds
             if (docRectPercent.Intersects(textElem.bounds))
                 int mtchPos = textElem.text.IndexOf(str.Trim(), StringComparison.OrdinalIgnoreCase);
                 if (mtchPos >= 0)
                     result = true;
                     if (matchingTextLocs != null)
                         DocMatchingTextLoc dtml = new DocMatchingTextLoc();
                         dtml.pageIdx = pageIdx;
                         dtml.elemIdx = elemIdx;
                         dtml.exprIdx = exprIdx;
                         dtml.posInText = mtchPos;
                         dtml.matchLen = str.Trim().Length;
                         dtml.foundInTxtLen = textElem.text.Length;
                         // If not compiling all text match locations then return immediately to save time
                         return true;
     logger.Info("CheckForNewDocs : {0:0.00} uS, count {1}", stopWatch1.ElapsedTicks * 1000000.0 / Stopwatch.Frequency, elemCount);
     return result;
        public static void SearchForDateItem2(ScanPages scanPages, string dateSearchTerm, DocRectangle dateDocRect, double matchFactor, List<ExtractedDate> datesResult, int limitToPageNumN = -1)
            int firstPageIdx = 0;
            int lastPageIdxPlusOne = scanPages.scanPagesText.Count;
            if (limitToPageNumN != -1)
                firstPageIdx = limitToPageNumN - 1;
                lastPageIdxPlusOne = limitToPageNumN;
            for (int pageIdx = firstPageIdx; pageIdx < lastPageIdxPlusOne; pageIdx++)
                List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageIdx];
                foreach (ScanTextElem textElem in scanPageText)
                    // Check if there are at least two digits together in the text (any date format requires this at least)
                    if (!Regex.IsMatch(textElem.text, @"\d\d"))

                    // Check bounds
                    if (dateDocRect.Intersects(textElem.bounds))
                        // See which date formats to try
                        bool bTryLong = false;
                        bool bTryShort = false;
                        bool bTryUS = false;
                        bool bTryNoZeroes = false;
                        bool bTrySpaceSeparated = false;
                        if (dateSearchTerm.IndexOf("~long", StringComparison.OrdinalIgnoreCase) >= 0)
                            bTryLong = true;
                        if (dateSearchTerm.IndexOf("~short", StringComparison.OrdinalIgnoreCase) >= 0)
                            bTryShort = true;
                        if (dateSearchTerm.IndexOf("~US", StringComparison.OrdinalIgnoreCase) >= 0)
                            bTryUS = true;
                        if (dateSearchTerm.IndexOf("~No0", StringComparison.OrdinalIgnoreCase) >= 0)
                            bTryNoZeroes = true;
                        if (dateSearchTerm.IndexOf("~Spaces", StringComparison.OrdinalIgnoreCase) >= 0)
                            bTrySpaceSeparated = true;
                        if (!(bTryLong | bTryShort))
                            bTryLong = true;
                            bTryShort = true;
                            bTryUS = true;
                            bTryNoZeroes = true;
                            bTrySpaceSeparated = true;

                        // Get match text if any
                        string matchText = dateSearchTerm;
                        int squigPos = dateSearchTerm.IndexOf('~');
                        if (squigPos >= 0)
                            matchText = dateSearchTerm.Substring(0, squigPos);
                        double matchResultFactor = 0;
                        if (textElem.text.IndexOf(matchText, StringComparison.OrdinalIgnoreCase) >= 0)
                            matchResultFactor = matchFactor;

                        // Try to find dates
                        if (bTryLong)
                            MatchCollection ldMatches = Regex.Matches(textElem.text, longDateRegex, RegexOptions.IgnoreCase);
                            CoerceMatchesToDates(datesResult, matchResultFactor, textElem, ldMatches, ExtractedDate.DateMatchType.LongDate, 13, 11, 1);
                            if (bTryUS)
                                MatchCollection usldMatches = Regex.Matches(textElem.text, USlongDateRegex, RegexOptions.IgnoreCase);
                                CoerceMatchesToDates(datesResult, matchResultFactor, textElem, usldMatches, ExtractedDate.DateMatchType.USLongDate, 14, 1, 4);

                        if (bTryShort)
                            MatchCollection sdlzMatches = Regex.Matches(textElem.text, shortDateLeadingZeroesRegex, RegexOptions.IgnoreCase);
                            CoerceMatchesToDates(datesResult, matchResultFactor, textElem, sdlzMatches, ExtractedDate.DateMatchType.ShortDateLeadingZeroes, 3, 2, 1);
                            if (bTryNoZeroes)
                                MatchCollection sdnlzMatches = Regex.Matches(textElem.text, shortDateNoLeadingZeroesRegex, RegexOptions.IgnoreCase);
                                CoerceMatchesToDates(datesResult, matchResultFactor, textElem, sdnlzMatches, ExtractedDate.DateMatchType.ShortDateNoLeadingZeroes, 3, 2, 1);
                            if (bTrySpaceSeparated)
                                MatchCollection sdspMatches = Regex.Matches(textElem.text, shortDateSpacesRegex, RegexOptions.IgnoreCase);
                                CoerceMatchesToDates(datesResult, matchResultFactor, textElem, sdspMatches, ExtractedDate.DateMatchType.ShortDateNoLeadingZeroes, 3, 2, 1);
Exemplo n.º 8
        private void txtDateResult_MouseEnter(object sender, MouseEventArgs e)
            //            popupDateResult.HorizontalOffset = ptOnImage.X - 100;
            //            popupDateResult.VerticalOffset = ptOnImage.Y;
            if (_curDocDisplay_lastMatchResult != null)
                // Create popup string
                string datestr = "";
                foreach (ExtractedDate dat in _curDocDisplay_lastMatchResult.datesFoundInDoc)
                    if (datestr.Length != 0)
                        datestr += "\n";
                    datestr += dat.dateTime.ToLongDateString() + " (" + dat.matchFactor.ToString() + "%) Page " + dat.pageNum;

                    // Display match locations
                    if (dat.pageNum == _curDocDisplay_pageNum)
                        Brush colrBrush = Brushes.Firebrick;
                        DocRectangle inRect = dat.locationOfDateOnPagePercent;
                        DocRectangle computedLocation = new DocRectangle(inRect.X, inRect.Y, inRect.Width, inRect.Height);
                        double wid = computedLocation.Width;
                        double inx = computedLocation.X;
                        computedLocation.X = inx + dat.posnInText * wid / dat.foundInText.Length;
                        computedLocation.Width = dat.matchLength * wid / dat.foundInText.Length;
                        locRectHandler.DrawTextMatchRect(computedLocation, colrBrush, "dateMatch");

                popupDateResultText.Text = datestr;
                if (!popupDateResult.IsOpen)
                    popupDateResult.IsOpen = true;
            e.Handled = true;
        public static string ExtractTextFromPage(ScanPages scanPages, DocRectangle docRect, int pageNum)
            int pageIdx = pageNum-1;
            if ((pageIdx < 0) || (pageIdx >= scanPages.scanPagesText.Count))
                return "";

            // Get page to search
            List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageNum-1];

            // Iterate text elements
            foreach (ScanTextElem textElem in scanPageText)
                // Check rectangle bounds
                if (!docRect.Intersects(textElem.bounds))

                // Return first match
                return textElem.text;
            return "";
Exemplo n.º 10
        public static void SearchForDateItem(ScanPages scanPages, string dateSearchTerm, DocRectangle dateDocRect, double matchFactor, List<ExtractedDate> datesResult,
                                    ref bool latestDateRequested, ref bool earliestDateRequested, int limitToPageNumN = -1, bool ignoreWhitespace = false)
            // Get date search info
            DateSrchInfo dateSrchInfo = GetDateSearchInfo(dateSearchTerm);
            if (dateSrchInfo.bEarliestDate)
                earliestDateRequested = true;
            if (dateSrchInfo.bLatestDate)
                latestDateRequested = true;

            // Find first and last pages to search
            int firstPageIdx = 0;
            int lastPageIdxPlusOne = scanPages.scanPagesText.Count;
            if (limitToPageNumN != -1)
                firstPageIdx = limitToPageNumN - 1;
                lastPageIdxPlusOne = limitToPageNumN;

            // Iterate pages
            for (int pageIdx = firstPageIdx; pageIdx < lastPageIdxPlusOne; pageIdx++)
                List<ScanTextElem> scanPageText = scanPages.scanPagesText[pageIdx];
                string joinedText = "";     // This maybe used if ~join macrocommand used
                int joinCount = 0;

                double matchFactorForThisPage = matchFactor + (pageIdx == 0 ? MATCH_FACTOR_BUMP_FOR_PAGE1 : (pageIdx == 1 ? MATCH_FACTOR_BUMP_FOR_PAGE2 : 0));

                // Iterate text elements
                foreach (ScanTextElem textElem in scanPageText)
                    // Check that the text contains at least two digits together to avoid wasting time looking for dates where there can be none
                    if (!Regex.IsMatch(textElem.text, @"\d\d"))

                    // Check rectangle bounds
                    if (!dateDocRect.Intersects(textElem.bounds))

                    // Check for join
                    if (dateSrchInfo.bJoinTextInRect)
                        if (joinCount < MAX_TEXT_ELEMS_TO_JOIN)
                            joinedText += textElem.text + " ";

                    // Search within the found text
                    SearchWithinString(textElem.text, textElem.bounds, dateSearchTerm, dateSrchInfo, matchFactorForThisPage, pageIdx, datesResult, ignoreWhitespace);


                // If joined then search just once
                if (dateSrchInfo.bJoinTextInRect)
                    SearchWithinString(joinedText, dateDocRect, dateSearchTerm, dateSrchInfo, matchFactorForThisPage, pageIdx, datesResult, ignoreWhitespace);

            // TEST TEST TEST
                List<ExtractedDate> testDatesResult = new List<ExtractedDate>();
                SearchForDateItem2(scanPages, dateSearchTerm, dateDocRect, matchFactor, testDatesResult, limitToPageNumN);

                Console.WriteLine("File: " + scanPages.uniqName + " OldTime = " + stp2.ElapsedMilliseconds.ToString() + " NewTime = " + stp.ElapsedMilliseconds.ToString());

                foreach (ExtractedDate newD in datesResult)
                    bool bFound = false;
                    foreach (ExtractedDate oldD in testDatesResult)
                        if (oldD.dateTime == newD.dateTime)
                            bFound = true;
                    if (!bFound)
                        Console.WriteLine("Date Mismatch New=" + newD.dateTime.ToLongDateString());
                foreach (ExtractedDate oldD in testDatesResult)
                    bool bFound = false;
                    foreach (ExtractedDate newD in datesResult)
                        if (oldD.dateTime == newD.dateTime)
                            bFound = true;
                    if (!bFound)
                        Console.WriteLine("Date Mismatch Old=" + oldD.dateTime.ToLongDateString());
Exemplo n.º 11
 public bool Intersects(DocRectangle rect)
     if (X > rect.BottomRightX)
         return false;
     if (BottomRightX < rect.X)
         return false;
     if (Y > rect.BottomRightY)
         return false;
     if (BottomRightY < rect.Y)
         return false;
     return true;
Exemplo n.º 12
 public bool Contains(DocRectangle rect)
     return (this.X <= rect.X) &&
                     ((rect.X + rect.Width) <= (this.X + this.Width)) &&
                     (this.Y <= rect.Y) &&
                     ((rect.Y + rect.Height) <= (this.Y + this.Height));
 public void DrawTextMatchRect(DocRectangle docRect, Brush colr, string txtMatchStr)
     Rectangle rect = new Rectangle();
     rect.Opacity = 0.5;
     rect.Fill = colr;
     DocRectangle canvasRect = ConvertDocPercentRectToCanvas(docRect);
     rect.Width = canvasRect.Width;
     rect.Height = canvasRect.Height;
     rect.Tag = txtMatchStr;
     rect.IsHitTestVisible = false;
     rect.SetValue(Canvas.LeftProperty, canvasRect.X);
     rect.SetValue(Canvas.TopProperty, canvasRect.Y);
Exemplo n.º 14
 private string GetTextFromPageIntersectWithRect(DocRectangle ptInDocPercent)
     List<ScanTextElem> scanTextElems = _curDocDisplay_scanPages.scanPagesText[_curDocDisplay_pageNum - 1];
     foreach (ScanTextElem el in scanTextElems)
         if (el.bounds.Intersects(ptInDocPercent))
             return el.text;
     return "";
Exemplo n.º 15
        private static void AddCompletedDateToList(string srcStr, DocRectangle textBounds, double matchFactor, int year, int month, int day, bool bMonthFromChars, 
                                bool bRangeIndicatorFound, int firstMatchPos, int lastMatchPos, DateSrchInfo dateSrchInfo, int pageNum, List<ExtractedDate> datesResult)
            double finalMatchFactor = matchFactor;
            ExtractedDate fd = new ExtractedDate();
            if (bRangeIndicatorFound)
                finalMatchFactor += 10;

            // Bump the match factor for dates in the top 40% of page - letterhead dates
            if (textBounds.Y < 40)
                finalMatchFactor += MATCH_FACTOR_BUMP_FOR_TOP_40_PC_OF_PAGE;

            // Year
            if (year < 80)
                year += 2000;
                fd.yearWas2Digit = true;
            else if (year < 100)
                year += 1900;
                fd.yearWas2Digit = true;
                finalMatchFactor += MATCH_FACTOR_BUMP_FOR_4_DIGIT_YEAR;

            // Month
            if (bMonthFromChars)
                finalMatchFactor += MATCH_FACTOR_BUMP_FOR_TEXT_MONTH;

            // Check for bump
            if (dateSrchInfo.bPlusOneMonth)
                month += 1;
                if (month > 12)
                    month = 1;

            // Day
            if (day == -1)
                day = 1;
                fd.dayWasMissing = true;
                finalMatchFactor += MATCH_FACTOR_BUMP_FOR_DAY_MISSING;
            if (day > DateTime.DaysInMonth(year, month))
                day = DateTime.DaysInMonth(year, month);
            if (day < 1)
                day = 1;

            // Create datetime
            DateTime dt = DateTime.MinValue;
                dt = new DateTime(year, month, day);


            // Add date to list
            fd.foundInText = srcStr;
            fd.pageNum = pageNum;
            fd.posnInText = firstMatchPos;
            fd.matchLength = lastMatchPos-firstMatchPos+1;
            fd.dateTime = dt;
            fd.dateMatchType = ExtractedDate.DateMatchType.LongDate;
            fd.locationOfDateOnPagePercent = textBounds;
            fd.matchFactor = finalMatchFactor;
Exemplo n.º 16
 private void tooltipCallback_MouseMove(Point ptOnImage, DocRectangle ptInDocPercent)
     // Close tooltip if window isn't active
     if (!IsActive)
         exampleFileImageToolTip.IsOpen = false;
     bool bToolTipSet = false;
     if (_curDocDisplay_scanPages != null)
         if ((_curDocDisplay_pageNum > 0) && (_curDocDisplay_pageNum <= _curDocDisplay_scanPages.scanPagesText.Count))
             if (!exampleFileImageToolTip.IsOpen)
                 exampleFileImageToolTip.IsOpen = true;
             exampleFileImageToolTip.HorizontalOffset = ptOnImage.X - 100;
             exampleFileImageToolTip.VerticalOffset = ptOnImage.Y;
             string pgText = GetTextFromPageIntersectWithRect(ptInDocPercent);
             if (pgText != "")
                 exampleFileImageToolText.Text = pgText;
                 bToolTipSet = true;
     if (!bToolTipSet)
         exampleFileImageToolText.Text = "";
         exampleFileImageToolTip.IsOpen = false;
Exemplo n.º 17
        public static List<ExtractedDate> ExtractDatesFromDoc(ScanPages scanPages, string dateExpr, out int bestDateIdx)
            bestDateIdx = 0;
            List<ExtractedDate> datesResult = new List<ExtractedDate>();
            if (scanPages == null)
                return datesResult;

            // Extract location rectangles from doctype
            List<ExprParseTerm> parseTerms = DocTypesMatcher.ParseDocMatchExpression(dateExpr, 0);
            bool bAtLeastOneExprSearched = false;
            string lastDateSearchTerm = "";
            double lastDateSearchMatchFactor = 0;
            bool latestDateRequested = false;
            bool earliestDateRequested = false;
            foreach (ExprParseTerm parseTerm in parseTerms)
                if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Text)
                    if (lastDateSearchTerm != "")
                        SearchForDateItem(scanPages, lastDateSearchTerm, new DocRectangle(0, 0, 100, 100), lastDateSearchMatchFactor, datesResult, ref latestDateRequested, ref earliestDateRequested);
                        bAtLeastOneExprSearched = true;
                    lastDateSearchTerm = dateExpr.Substring(parseTerm.stPos, parseTerm.termLen);
                    // Reset matchFactor for next search term
                    lastDateSearchMatchFactor = 0;
                else if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_Location)
                    string locStr = dateExpr.Substring(parseTerm.stPos, parseTerm.termLen);
                    DocRectangle lastDateSearchRect = new DocRectangle(locStr);
                    SearchForDateItem(scanPages, lastDateSearchTerm, lastDateSearchRect, lastDateSearchMatchFactor, datesResult, ref latestDateRequested, ref earliestDateRequested);
                    lastDateSearchTerm = "";
                    lastDateSearchMatchFactor = 0;
                    bAtLeastOneExprSearched = true;
                else if (parseTerm.termType == ExprParseTerm.ExprParseTermType.exprTerm_MatchFactor)
                    if (dateExpr.Length > parseTerm.stPos + 1)
                        string valStr = dateExpr.Substring(parseTerm.stPos + 1, parseTerm.termLen-1);
                        Double.TryParse(valStr, out lastDateSearchMatchFactor);

            // There may be one last expression still to find - but be sure that at least one is searched for
            if ((lastDateSearchTerm != "") || (!bAtLeastOneExprSearched))
                SearchForDateItem(scanPages, lastDateSearchTerm, new DocRectangle(0, 0, 100, 100), lastDateSearchMatchFactor, datesResult, ref latestDateRequested, ref earliestDateRequested);

            // If required check for the earliest and/or latest dates and bump their factors
            DateTime earliestDate = DateTime.MaxValue;
            DateTime latestDate = DateTime.MinValue;
            int earliestIdx = -1;
            int latestIdx = -1;
            for (int dateIdx = 0; dateIdx < datesResult.Count; dateIdx++)
                if (earliestDate > datesResult[dateIdx].dateTime)
                    earliestDate = datesResult[dateIdx].dateTime;
                    earliestIdx = dateIdx;
                if (latestDate < datesResult[dateIdx].dateTime)
                    latestDate = datesResult[dateIdx].dateTime;
                    latestIdx = dateIdx;
            if (earliestDateRequested && (earliestIdx != -1))
                datesResult[earliestIdx].matchFactor += MATCH_FACTOR_BUMP_FOR_EARLIEST_DATE;
            if (latestDateRequested && (latestIdx != -1))
                datesResult[latestIdx].matchFactor += MATCH_FACTOR_BUMP_FOR_LATEST_DATE;

            // Find the best date index based on highest match factor
            bestDateIdx = 0;
            double highestDateMatchFactor = 0;
            for (int dateIdx = 0; dateIdx < datesResult.Count; dateIdx++)
                if (highestDateMatchFactor < datesResult[dateIdx].matchFactor)
                    bestDateIdx = dateIdx;
                    highestDateMatchFactor = datesResult[dateIdx].matchFactor;

            return datesResult;
Exemplo n.º 18
        private void DisplayMatchResultForDoc(DocTypeMatchResult matchRslt, List<DocMatchingTextLoc> matchingTextLocs)
            List<Brush> exprColrBrushes = new List<Brush>
                Brushes.DarkMagenta, Brushes.Green, Brushes.Red, Brushes.Orange, Brushes.Purple, Brushes.Peru, Brushes.Purple

            if (matchRslt.docDate != DateTime.MinValue)
                txtDateResult.Text = matchRslt.docDate.ToLongDateString();
                txtDateResult.Text = "";

            // Display match status
            string matchFactorStr = String.Format("{0}", (int)matchRslt.matchFactor);
            if (matchRslt.matchCertaintyPercent == 100)
                txtCheckResult.Text = "MATCHES (" + matchFactorStr + "%)";
                txtCheckResult.Foreground = Brushes.White;
                txtCheckResult.Background = Brushes.Green;
                txtCheckResult.Text = "FAILED (" + matchFactorStr + "%)";
                txtCheckResult.Foreground = Brushes.White;
                txtCheckResult.Background = Brushes.Red;

            // Display matching text locations
            foreach (DocMatchingTextLoc txtLoc in matchingTextLocs)
                if (txtLoc.pageIdx+1 == _curDocDisplay_pageNum)
                    DocRectangle inRect = _curDocDisplay_scanPages.scanPagesText[txtLoc.pageIdx][txtLoc.elemIdx].bounds;
                    Brush colrBrush = exprColrBrushes[txtLoc.exprIdx % exprColrBrushes.Count];
                    DocRectangle computedLocation = new DocRectangle(inRect.X, inRect.Y, inRect.Width, inRect.Height);
                    double wid = computedLocation.Width;
                    double inx = computedLocation.X;
                    computedLocation.X = inx + txtLoc.posInText * wid / txtLoc.foundInTxtLen;
                    computedLocation.Width = txtLoc.matchLen * wid / txtLoc.foundInTxtLen;
                    locRectHandler.DrawTextMatchRect(_curDocDisplay_scanPages.scanPagesText[txtLoc.pageIdx][txtLoc.elemIdx].bounds, colrBrush, "txtMatch");
Exemplo n.º 19
        private static void SearchWithinString(string inStr, DocRectangle textBounds, string dateSearchTerm, DateSrchInfo dateSrchInfo, double matchFactor, int pageIdx, List<ExtractedDate> datesResult, bool ignoreWhitespace)
            int numDatesFoundInString = 0;
            int year = -1;

            // Use regex to find financial
            if (dateSrchInfo.bFinancialYearEnd)
                const string finYearEndRegex = @"year end.{0,16}?\s?((19|20)?(\d\d))";
                Match fyMatch = Regex.Match(inStr, finYearEndRegex, RegexOptions.IgnoreCase);
                if (fyMatch.Success)
                    if (fyMatch.Groups.Count > 1)
                        // Add result
                        year = Convert.ToInt32(fyMatch.Groups[1].Value);
                        AddCompletedDateToList(inStr, textBounds, 100, year, 4, 5, false, false, fyMatch.Index, fyMatch.Length, dateSrchInfo, pageIdx+1, datesResult);

            // Start at the beginning of the string
            string s = inStr;
            if (ignoreWhitespace)
                s = s.Replace(" ", "");
            int dateSrchPos = 0;
            int chIdx = 0;
            string curStr = "";
            int day = -1;
            int month = -1;
            bool bMonthFromChars = false;
            year = -1;
            s = s.ToLower();
            bool strIsDigits = false;
            int firstMatchPos = -1;
            int lastMatchPos = 0;
            int commaCount = 0;
            bool bRangeIndicatorFound = false;
            int numSepChars = 0;
            for (chIdx = 0; chIdx < s.Length; chIdx++)
                char ch = s[chIdx];
                bool bResetNeeded = false;

                // Search element
                DateElemSrch el = null;
                int minChars = 1;
                int maxChars = 9;
                if (dateSrchPos < dateSrchInfo.dateEls.Count)
                    el = dateSrchInfo.dateEls[dateSrchPos];
                    minChars = el.minChars;
                    maxChars = el.maxChars;

                // Check if digits required
                if ((el == null) || (el.isDigits))
                    char testCh = ch;
                    if ((testCh == 'l') || (testCh == 'o'))
                        if (((strIsDigits) && (curStr.Length > 0)) || ((chIdx+1 < s.Length) && (Char.IsDigit(s[chIdx+1]))))
                            if (testCh == 'l')
                                testCh = '1';
                            else if (testCh == 'o')
                                testCh = '0';
                            else if (testCh == 'i')
                                testCh = '1';
                    if (Char.IsDigit(testCh))
                        numSepChars = 0;
                        // Ignore if it's a zero and we're not allowed leading zeroes
                        //                        if ((el != null) && (!el.allowLeadingZeroes) && (curStrPos == 0) && (ch == '0'))
                        //                            continue;

                        if (!strIsDigits)
                            curStr = "";
                        curStr += testCh;
                        strIsDigits = true;
                        if (curStr.Length < minChars)

                        // Check max chars
                        if (curStr.Length > maxChars)
                            curStr = "";

                        // Check if the next char is also a digit - if not then we've found what we're looking for
                        if (((chIdx + 1 >= s.Length) || (!Char.IsDigit(s[chIdx + 1]))) && (curStr != "0"))
                            // Is this a day / month or year??
                            DateElemSrch.DateElType elType = DateElemSrch.DateElType.DE_NONE;
                            if (el != null)
                                elType = el.dateElType;
                                // Handle one and two digit numbers
                                if (curStr.Length <= 2)
                                    // Already had a char based month?
                                    if (bMonthFromChars)
                                        if (!dateSrchInfo.bIsUsDate)
                                            elType = DateElemSrch.DateElType.DE_YEAR;
                                            elType = DateElemSrch.DateElType.DE_DAY;
                                        // Position for standard month?
                                        if ((dateSrchPos == 1) && (!dateSrchInfo.bIsUsDate))
                                            elType = DateElemSrch.DateElType.DE_MONTH;
                                        // Position for US month?
                                        else if ((dateSrchPos == 0) && (dateSrchInfo.bIsUsDate))
                                            elType = DateElemSrch.DateElType.DE_MONTH;
                                        else if (dateSrchPos < 2)
                                            elType = DateElemSrch.DateElType.DE_DAY;
                                        else if ((dateSrchPos > 0) && (curStr.Length == 2))
                                            elType = DateElemSrch.DateElType.DE_YEAR;
                                else if (curStr.Length == 4)
                                    // Num digits == 4
                                    if (dateSrchPos > 0)
                                        elType = DateElemSrch.DateElType.DE_YEAR;

                            // Handle the value
                            if (elType == DateElemSrch.DateElType.DE_DAY)
                                Int32.TryParse(curStr, out day);
                                if ((day < 1) || (day > 31))
                                    day = -1;
                            else if (elType == DateElemSrch.DateElType.DE_MONTH)
                                Int32.TryParse(curStr, out month);
                                if ((month < 1) || (month > 12))
                                    month = -1;
                                bMonthFromChars = false;
                            else if (elType == DateElemSrch.DateElType.DE_YEAR)
                                Int32.TryParse(curStr, out year);
                                if (curStr.Length == 2)
                                    if ((year < 0) || (year > 100))
                                        year = -1;
                                else if (curStr.Length == 4)
                                    if ((year < 1800) || (year > 2200))
                                        year = -1;

                                // If no date formatting string is used then year must be the last item
                                if ((el == null) && (year != -1))
                                    bResetNeeded = true;
                                curStr = "";
                            if (firstMatchPos == -1)
                                firstMatchPos = chIdx - curStr.Length;
                            lastMatchPos = chIdx;
                            curStr = "";
                if ((el == null) || (!el.isDigits))
                    if (Char.IsLetter(ch))
                        if (strIsDigits)
                            curStr = "";
                        strIsDigits = false;

                        // Check we're still looking for a month value
                        if (month != -1)

                        // Form a sub-string to test for month names
                        curStr += ch;

                        // Check for range indicator
                        if (numDatesFoundInString == 1)
                            if (chIdx - curStr.Length - 1 > 0)
                                string testStr = s.Substring(chIdx - curStr.Length - 1);
                                if (testStr.Contains(" to") || testStr.Contains(" to"))
                                    bRangeIndicatorFound = true;

                        // No point checking for month strings until 3 chars got
                        if (curStr.Length < 3)

                        // Check for a month name
                        if (shortMonthStrings.Any(curStr.Contains))
                            for (int monIdx = 0; monIdx < shortMonthStrings.Length; monIdx++)
                                if (curStr.Contains(shortMonthStrings[monIdx]))
                                    month = monIdx + 1;
                                    bMonthFromChars = true;
                            if (firstMatchPos == -1)
                                firstMatchPos = chIdx - curStr.Length;
                            lastMatchPos = chIdx;
                            curStr = "";
                            numSepChars = 0;

                            // Move chIdx on to skip to next non letter
                            while ((chIdx < s.Length-1) && (Char.IsLetter(s[chIdx+1])))

                            // Check for another valid month string in next few chars to detect ranges without a year
                            // e.g. should find ranges like 3 Jan - 4 Mar 2011 or 1st Jan to 31st May 2013
                            // but exlude ranges like 3 Jan 2012 - 4 Mar 2012 which would be seen as two separate dates
                            if (!dateSrchInfo.bNoDateRanges)
                                string strNextStr = "";
                                bool bStrRangeIndicatorFound = false;
                                int digitGroups = 0;
                                bool isInDigitGroup = false;
                                for (int chNext = chIdx+1; (chNext < s.Length) && (chNext < chIdx + 15); chNext++)
                                    // Count the groups of digits
                                    // (if we find two groups then break out as it's probably a range that contains separate years)
                                    if (Char.IsDigit(s[chNext]))
                                        if (!isInDigitGroup)
                                            isInDigitGroup = true;
                                            if (digitGroups >= 2)

                                    // Form a string from letters found
                                    else if (Char.IsLetter(s[chNext]))
                                        isInDigitGroup = false;
                                        strNextStr += s[chNext];

                                        // Check if the string contains "to"
                                        if (strNextStr.Length >= 2)
                                            if (strNextStr.Contains("to"))
                                                bStrRangeIndicatorFound = true;

                                        // Check if the string contains a short month name
                                        if (bStrRangeIndicatorFound && (strNextStr.Length >= 3))
                                            if (shortMonthStrings.Any(strNextStr.Contains))
                                                bResetNeeded = true;
                                        // Check punctuation - this assumes a - is a range seperator
                                        isInDigitGroup = false;
                                        if (s[chNext] == '-')
                                            bStrRangeIndicatorFound = true;
                                        strNextStr = "";
                                bResetNeeded = true;

                // Check for whitespace/punctuation/etc
                if (!Char.IsLetterOrDigit(ch))
                    if ((day != -1) || (month != -1) || (year != -1))
                        if (numSepChars > MAX_SEP_CHARS_BETWEEN_DATE_ELEMS)
                            bResetNeeded = true;
                            numSepChars = 0;

                    curStr = "";
                    switch (ch)
                        case ':':
                                if (!dateSrchInfo.bAllowColons)
                                    bResetNeeded = true;
                        case ',':
                                if ((!dateSrchInfo.bAllowTwoCommas) && (commaCount > 1))
                                    bResetNeeded = true;
                        case '.':
                                if (!dateSrchInfo.bAllowDots)
                                    bResetNeeded = true;
                        case '-':
                                if (numDatesFoundInString == 1)
                                    bRangeIndicatorFound = true;

                // Check for complete date
                if ((year != -1) && (month != -1) && ((day != -1) || (bMonthFromChars)))
                    // Add result
                    AddCompletedDateToList(s, textBounds, matchFactor, year, month, day, bMonthFromChars, bRangeIndicatorFound, firstMatchPos,
                                            lastMatchPos, dateSrchInfo, pageIdx+1, datesResult);

                    // Start again to see if another date can be found
                    curStr = "";
                    bResetNeeded = true;

                // Restart the process of finding a date if required
                if (bResetNeeded)
                    dateSrchPos = 0;
                    day = -1;
                    month = -1;
                    year = -1;
                    bMonthFromChars = false;
                    strIsDigits = false;
                    firstMatchPos = -1;
                    bResetNeeded = false;
                    commaCount = 0;
                    numSepChars = 0;
Exemplo n.º 20
        private bool EvalMatch(string matchExpression, StringTok st, ScanPages scanPages, ref double matchFactorTotal, ref int curExpressionIdx, List<DocMatchingTextLoc> matchingTextLocs)
            bool result = false;
            string token = "";
            bool curOpIsOr = true;
            bool opIsInverse = false;
            DocRectangle docRectPercent = new DocRectangle(0, 0, 100, 100);
            int docRectValIdx = 0;
            double matchFactorForTerm = 0;

            #if TEST_PERF_EVALMATCH
            Stopwatch stopWatch1 = new Stopwatch();

            while((token = st.GetNextToken()) != null)
                if (token.Trim() == "")
                else if (token == ")")
                    return result;
                else if (token == "(")
                    bool tmpRslt = EvalMatch(matchExpression, st, scanPages, ref matchFactorTotal, ref curExpressionIdx, matchingTextLocs);
                    if (opIsInverse)
                        tmpRslt = !tmpRslt;
                    if (curOpIsOr)
                        result |= tmpRslt;
                        result &= tmpRslt;
                else if (token == "&")
                    curOpIsOr = false;
                else if (token == "|")
                    curOpIsOr = true;
                else if (token == "!")
                    opIsInverse = true;
                    // We've reached a terminal token (string to match to text in the document)
                    string stringToMatch = token;

                    // Check for matchFactor - must have some text before it
                    if (token == ":")
                        return result;
                    // See if there is a location defined by the next token
                    while ((st.PeekNextToken() != null) && (st.PeekNextToken() == ""))
                    if ((st.PeekNextToken() != null) && (st.PeekNextToken() == ":"))
                        matchFactorForTerm = 0;
                        while ((st.PeekNextToken() != null) && (st.PeekNextToken() == ""))
                        token = st.GetNextToken();
                        if (token != null)
                            Double.TryParse(token, out matchFactorForTerm);

                    // Check for location on empty string
                    if (token == "{")
                        return result;
                    // See if there is a location defined by the next token
                    while ((st.PeekNextToken() != null) && (st.PeekNextToken() == ""))
                    if ((st.PeekNextToken() != null) && (st.PeekNextToken() == "{"))
                        while ((token = st.GetNextToken()) != null)
                            if (token == "")
                            else if (token == "{")
                                docRectValIdx = 0;
                            else if (token == ",")
                            else if (token == "}")
                                double rectVal = 0;
                                Double.TryParse(token, out rectVal);
                                docRectPercent.SetVal(docRectValIdx, rectVal);

                    // Process the match string using the location rectangle
                    // The check for curOpIsOr || result is to avoid unnecessary work if the expression is already false and we're doing a AND
                    if ((stringToMatch.Trim().Length >= 0) && (curOpIsOr || result))
                        bool tmpRslt = MatchString(stringToMatch, docRectPercent, scanPages, curExpressionIdx, matchingTextLocs);
                        if (opIsInverse)
                            tmpRslt = !tmpRslt;
                        if (curOpIsOr)
                            result |= tmpRslt;
                            result &= tmpRslt;

                        // Clear the inverse operator after 1 use
                        opIsInverse = false;
                        // Handle match factor
                        if (tmpRslt)
                            matchFactorTotal += matchFactorForTerm;

                    // Set the docRect to the entire page (ready for next term)
                    docRectPercent = new DocRectangle(0,0,100,100);
                    matchFactorForTerm = 0;

            #if TEST_PERF_EVALMATCH
            logger.Info("EvalMatch : {0:0.00} uS, expr {1}", stopWatch1.ElapsedTicks * 1000000.0 / Stopwatch.Frequency, matchExpression);
            return result;
Exemplo n.º 21
 public ScanTextElem(string a_text, DocRectangle a_bounds)
     text = a_text;
     bounds = a_bounds;
Exemplo n.º 22
 private DocRectangle ConvertToDocRect(iTextSharp.text.pdf.parser.Vector topLeftCoord, iTextSharp.text.pdf.parser.Vector bottomRightCoord,
                     iTextSharp.text.Rectangle pageRect, int pageRotation)
     double tlX = topLeftCoord.Dot(new iTextSharp.text.pdf.parser.Vector(1, 0, 0));
     double tlY = topLeftCoord.Dot(new iTextSharp.text.pdf.parser.Vector(0, 1, 0));
     double width = bottomRightCoord.Dot(new iTextSharp.text.pdf.parser.Vector(1, 0, 0)) - tlX;
     double height = tlY - bottomRightCoord.Dot(new iTextSharp.text.pdf.parser.Vector(0, 1, 0));
     DocRectangle docRect = new DocRectangle(tlX * 100 / pageRect.Width, (pageRect.Height - tlY) * 100 / pageRect.Height, width * 100 / pageRect.Width, height * 100 / pageRect.Height);
     docRect.RotateAt(pageRotation, 50, 50);
     return docRect;
 private DocRectangle ConvertCanvasRectToDocPercent(DocRectangle canvasRect)
     Point tlPoint = _uiOverlayCanvas.TranslatePoint(new Point(canvasRect.X, canvasRect.Y), _masterImage);
     double tlx = 100 * tlPoint.X / _masterImage.ActualWidth;
     double tly = 100 * tlPoint.Y / _masterImage.ActualHeight;
     Point brPoint = _uiOverlayCanvas.TranslatePoint(new Point(canvasRect.BottomRightX, canvasRect.BottomRightY), _masterImage);
     double brx = 100 * brPoint.X / _masterImage.ActualWidth;
     double bry = 100 * brPoint.Y / _masterImage.ActualHeight;
     return new DocRectangle(tlx, tly, brx - tlx, bry - tly);