コード例 #1
ファイル: Ocr.cs プロジェクト: alxwrd/pacs
        private bool IsRecursiveMatch(BinaryPixelBitmap binCell, ref int xPos,
                                      SortedDictionary <char, BinaryPixelBitmapChar> refChars, ref Stack <BinaryPixelBitmapChar> charsFound, int xPosPrevChar)
            // Recursive function to match up multiple characters until a space is found...
            // to help find characters when they might be adjacent pixels or overlapped by one pixel.
            // Must have a match for all characters up to the space in order to help avoid false
            // positives.

            if (binCell.GetColumn(xPos) == 0 || xPos >= binCell.Width)
                return(true);    // Found a space or end of bitmap, so assume matched up to this point
            bool      bAllMatchesFound = false;
            int       xInc             = 0;
            MatchType matchType        = MatchType.OverlapBothSides;

            // Ensure that first character must always have an exact match on the left side
            if (charsFound.Count <= 0)
                matchType = MatchType.OverlapRightSideOnly;

            foreach (BinaryPixelBitmapChar guessChar in refChars.Values)
                // Check using overlapped matching
                BinaryPixelBitmapChar last = charsFound.Count <= 0 ? null : charsFound.Peek();
                if (IsMatchBinBitmapChar(guessChar, binCell, xPos, matchType, last, xPosPrevChar))
                    // Found a potential match... so now check if it exactly matches on the right side
                    // and if so then next character test will be for adjacent, otherwise next character
                    // test will be for overlap
                    if (IsMatchBinBitmapChar(guessChar, binCell, xPos, MatchType.OverlapLeftSideOnly, last, xPosPrevChar))
                        xInc = guessChar.Width;
                        xInc = guessChar.Width - 1;
                    xPos += xInc;
                    if (IsRecursiveMatch(binCell, ref xPos, refChars, ref charsFound, xPos - xInc))
                        bAllMatchesFound = true;
                        break;  // Exit foreach
                        // subsequent pixels don't match any characters so back out
                        // previous one and keep trying
                        xPos -= xInc;

コード例 #2
ファイル: Ocr.cs プロジェクト: alxwrd/pacs
        public string ExtractText(BinaryPixelBitmap binCell, FontType selectFont)
            // Assumes cell is composed of only white and black pixels and that black pixels
            // form characters.  Also assumes that characters can be overlapped by one column
            // as long as no black pixels of each character overlap (black pixel form one
            // character can overlap white space of character next to it).
            string words = "";

            SortedDictionary <char, BinaryPixelBitmapChar> refChars;

            if (selectFont == FontType.Font1Numbers)
                refChars = font1.NumericalBinChars;
            else if (selectFont == FontType.Font2All)
                refChars = font2.AllBinChars;
                refChars = font1.AllBinChars;

            int xPos       = 0;
            int blankLines = 0;

            while (xPos >= 0 && xPos < binCell.Width)
                if (binCell.GetColumn(xPos) == 0)
                    // Blank column

                    if (words.Length > 0 && blankLines > 10)
                        break;  // Rest of cell is likely blank so don't bother searching the rest

                Stack <BinaryPixelBitmapChar> charsFound = new Stack <BinaryPixelBitmapChar>();

                // Find character(s) that match.  Will recursively look for matching
                // characters until it finds a blank column of pixels (this allows matching
                // of overlapped or adjacent characters).
                if (!IsRecursiveMatch(binCell, ref xPos, refChars, ref charsFound, -1))
                    xPos++; // If none found then increment xPos and try again
                if (charsFound.Count > 0)
                    // Found a space before the character?
                    if (blankLines > 0)
                        // Use Kerning and Tracking to figure out if there are spaces
                        // and if so how many spaces.

                        // Only check for spaces after we have found some characters
                        // (i.e. strip leading spaces)
                        if (words.Length > 0)
                            BinaryPixelBitmapChar prevChar = refChars[words[words.Length - 1]];
                            blankLines -= prevChar.KerningMin;  // Assumes Tracking is greater than KerningMax-KerningMin
                            while (blankLines >= prevChar.Tracking)
                                words      += " "; // space
                                blankLines -= prevChar.Tracking;

/*                    if (blankLines >= 2 && (selectFont != FontType.Font1Numbers))
 *                  {
 *                      char prevChar = ' ';
 *                      if (words.Length > 0)
 *                          prevChar = words[words.Length - 1];
 *                      if (prevChar.Equals('f'))
 *                          words = words + " ";    // space
 *                      else if (prevChar.Equals('1'))
 *                      {
 *                          if (blankLines > 4)
 *                              words = words + " ";    // space
 *                      }
 *                      else if (blankLines >= 3)
 *                          words = words + " ";    // space
 *                  }
                    blankLines = 0;
                    string s = "";
                    // Pop characters off stack and then add to word(s)
                    foreach (BinaryPixelBitmapChar c in charsFound)
                        s = c.Character + s;
                    words += s;

            string wordsFixed = "";

            // Hack: Upper-case 'i' and lower-case 'L' look the same.
            // If previous character was a space then this character is probably an
            // upper-case 'i', otherwise set it to lower-case 'L'
            for (int i = 0; i < words.Length; i++)
                if (i == 0)
                    if (words[0].Equals('l'))
                        wordsFixed += "I";
                        wordsFixed += words[i];
                    if (words[i].Equals('I') && !words[i - 1].Equals(' '))
                        wordsFixed += "l";
                    else if (words[i].Equals('l') && words[i - 1].Equals(' '))
                        if ((i + 1) < words.Length && words[i + 1].Equals('i'))
                            wordsFixed += "l";  // Not likely to have "Ii"  so it is probably "li" instead
                            wordsFixed += "I";
                        wordsFixed += words[i];

コード例 #3
ファイル: Ocr.cs プロジェクト: alxwrd/pacs
        private bool IsMatchBinBitmapChar(BinaryPixelBitmapChar charBmp, BinaryPixelBitmap unknownBmp, int xStart, MatchType matchType, BinaryPixelBitmapChar prevCharBmp, int xStartPrevChar)
            // Now check character matches, allow left most column and right most column
            // to overlap

            // Check left column
//            if (charBmp.CanOverlap && (matchType == MatchType.OverlapBothSides || matchType == MatchType.OverlapLeftSideOnly))
            if (charBmp.CanOverlap && prevCharBmp != null && prevCharBmp.KerningMin < 0)
                // Check for overlapped match on left column
                if ((charBmp.GetColumn(0) & ~unknownBmp.GetColumn(xStart)) > 0)

                // If overlapped character matches then check that whatever is left (after subtracting out
                // the bits for the current character) is a perfect match for the right side of the
                // previous character
                if (prevCharBmp != null && xStartPrevChar >= 0 && ((xStartPrevChar + prevCharBmp.Width - 1) == xStart))
                    if ((unknownBmp.GetColumn(xStart) & ~charBmp.GetColumn(0)) != prevCharBmp.GetColumn(prevCharBmp.Width - 1))
                if (charBmp.GetColumn(0) != unknownBmp.GetColumn(xStart))

                // If current character matches then check that whatever is left (after subtracting out
                // the bits for the current character) is a perfect match for the right side of the
                // previous character
                if (prevCharBmp != null && xStartPrevChar >= 0 && ((xStartPrevChar + prevCharBmp.Width - 1) == xStart))
                    if ((unknownBmp.GetColumn(xStart) & ~charBmp.GetColumn(0)) != prevCharBmp.GetColumn(prevCharBmp.Width - 1))

                // If single pixel wide character then check to ensure that there is nothing to
                // the right of it if it can't support adjacent characters
                if (charBmp.Width == 1 && !charBmp.CanAdjacentRight)
                    if (unknownBmp.GetColumn(xStart + 1) != 0)
                        return(false);   // Something adjacent when it is not allowed so not a match

            // Check middle part of character
            for (int x = 1; (x < charBmp.Width - 1) && ((xStart + x) < unknownBmp.Width); x++)
                if (charBmp.GetColumn(x) != unknownBmp.GetColumn(xStart + x))

            // Check right part of character
            if (charBmp.Width > 1)
                if (charBmp.CanOverlap && charBmp.CanAdjacentRight && (matchType == MatchType.OverlapBothSides || matchType == MatchType.OverlapRightSideOnly))
                    // Check for overlapped match on right side
                    if ((charBmp.GetColumn(charBmp.Width - 1) & ~unknownBmp.GetColumn(xStart + charBmp.Width - 1)) > 0)
                    if (charBmp.GetColumn(charBmp.Width - 1) != unknownBmp.GetColumn(xStart + charBmp.Width - 1))

                    if (!charBmp.CanAdjacentRight && !(unknownBmp.GetColumn(xStart + charBmp.Width) == 0))
                        return(false);   // Must have blank column to right of this character, so not a match

            // Found match