Example #1
0
File: Ocr.cs Project: alxwrd/pacs
        private bool IsRecursiveMatch(BinaryPixelBitmap binCell, ref int xPos,
                                      SortedDictionary <char, BinaryPixelBitmapChar> refChars, ref Stack <BinaryPixelBitmapChar> charsFound, int xPosPrevChar)
        {
            // Recursive function to match up multiple characters until a space is found...
            // to help find characters when they might be adjacent pixels or overlapped by one pixel.
            // Must have a match for all characters up to the space in order to help avoid false
            // positives.

            if (binCell.GetColumn(xPos) == 0 || xPos >= binCell.Width)
            {
                return(true);    // Found a space or end of bitmap, so assume matched up to this point
            }
            bool      bAllMatchesFound = false;
            int       xInc             = 0;
            MatchType matchType        = MatchType.OverlapBothSides;

            // Ensure that first character must always have an exact match on the left side
            if (charsFound.Count <= 0)
            {
                matchType = MatchType.OverlapRightSideOnly;
            }

            foreach (BinaryPixelBitmapChar guessChar in refChars.Values)
            {
                // Check using overlapped matching
                BinaryPixelBitmapChar last = charsFound.Count <= 0 ? null : charsFound.Peek();
                if (IsMatchBinBitmapChar(guessChar, binCell, xPos, matchType, last, xPosPrevChar))
                {
                    // Found a potential match... so now check if it exactly matches on the right side
                    // and if so then next character test will be for adjacent, otherwise next character
                    // test will be for overlap
                    if (IsMatchBinBitmapChar(guessChar, binCell, xPos, MatchType.OverlapLeftSideOnly, last, xPosPrevChar))
                    {
                        xInc = guessChar.Width;
                    }
                    else
                    {
                        xInc = guessChar.Width - 1;
                    }
                    xPos += xInc;
                    charsFound.Push(guessChar);
                    if (IsRecursiveMatch(binCell, ref xPos, refChars, ref charsFound, xPos - xInc))
                    {
                        bAllMatchesFound = true;
                        break;  // Exit foreach
                    }
                    else
                    {
                        // subsequent pixels don't match any characters so back out
                        // previous one and keep trying
                        charsFound.Pop();
                        xPos -= xInc;
                    }
                }
            }

            return(bAllMatchesFound);
        }
Example #2
0
        public List <TestResult> CheckIslandNames()
        {
            UnsafeBitmap bm = new UnsafeBitmap("Test\\IslandNames.tif");

            string[] islandNames = { "Wensleydale", "Ventress",       "Terjit",           "Squibnocket",    "Spaniel",      "Rowes",
                                     "Penobscot",   "Morannon",       "Mirage",           "Lincoln",        "Isle of Kent", "Jack's Last Gift",
                                     "Halley",      "Greenwich",      "Fluke",            "Descartes Isle", "Caravanserai", "Blackthorpe",
                                     "Barbary",     "Frond",          "Islay of Luthien", "Epsilon",        "Eta",          "Alpha",           "Namath",
                                     "Oyster",      "Vernal Equinox", "Xi",               "Zeta",           "Uxmal",        "Quetzal",         "Yax Mutal",
                                     "Swampfen",    "Spectre",        "Harmattan",        "Kirin",          "Typhoon" };

            List <TestResult> results = new List <TestResult>();

            int rowHeight   = 15;
            int i           = 0;
            int islandIndex = 0;

            while (i < bm.Height && islandIndex < islandNames.Length)
            {
                BinaryPixelBitmap binRow = new BinaryPixelBitmap(bm.CloneAsBin(new Rectangle(0, i, bm.Width, rowHeight), BinPixelConvertType.ColorIsZero, bm.GetPixel(0, i)), rowHeight);
                string            island = ocr.ExtractText(binRow, FontType.Font2All);

                // Remove the word "Island" or ":" and anything after it from the name and trim
                // white space from left and right
                int index = island.IndexOf("Island");
                index  = index > 0 ? index : island.Length;
                island = island.Substring(0, index).Trim();
                index  = island.IndexOf(":");
                index  = index > 0 ? index : island.Length;
                island = island.Substring(0, index).Trim();
                if (!island.Equals(islandNames[islandIndex]))
                {
                    results.Add(new TestResult("IslandNames", "Expected '" + islandNames[islandIndex] + "', got '" + island + "'"));
                }
                i          += rowHeight;
                islandIndex = i / rowHeight;
            }

            if (results.Count <= 0)
            {
                results.Add(new TestResult("IslandNames", "PASS"));
            }
            return(results);
        }
Example #3
0
File: Ocr.cs Project: alxwrd/pacs
        public string ExtractText(BinaryPixelBitmap binCell, FontType selectFont)
        {
            // Assumes cell is composed of only white and black pixels and that black pixels
            // form characters.  Also assumes that characters can be overlapped by one column
            // as long as no black pixels of each character overlap (black pixel form one
            // character can overlap white space of character next to it).
            string words = "";

            SortedDictionary <char, BinaryPixelBitmapChar> refChars;

            if (selectFont == FontType.Font1Numbers)
            {
                refChars = font1.NumericalBinChars;
            }
            else if (selectFont == FontType.Font2All)
            {
                refChars = font2.AllBinChars;
            }
            else
            {
                refChars = font1.AllBinChars;
            }

            int xPos       = 0;
            int blankLines = 0;

            while (xPos >= 0 && xPos < binCell.Width)
            {
                if (binCell.GetColumn(xPos) == 0)
                {
                    // Blank column
                    blankLines++;
                    xPos++;

                    if (words.Length > 0 && blankLines > 10)
                    {
                        break;  // Rest of cell is likely blank so don't bother searching the rest
                    }
                    continue;
                }

                Stack <BinaryPixelBitmapChar> charsFound = new Stack <BinaryPixelBitmapChar>();
                charsFound.Clear();

                // Find character(s) that match.  Will recursively look for matching
                // characters until it finds a blank column of pixels (this allows matching
                // of overlapped or adjacent characters).
                if (!IsRecursiveMatch(binCell, ref xPos, refChars, ref charsFound, -1))
                {
                    xPos++; // If none found then increment xPos and try again
                }
                if (charsFound.Count > 0)
                {
                    // Found a space before the character?
                    if (blankLines > 0)
                    {
                        // Use Kerning and Tracking to figure out if there are spaces
                        // and if so how many spaces.

                        // Only check for spaces after we have found some characters
                        // (i.e. strip leading spaces)
                        if (words.Length > 0)
                        {
                            BinaryPixelBitmapChar prevChar = refChars[words[words.Length - 1]];
                            blankLines -= prevChar.KerningMin;  // Assumes Tracking is greater than KerningMax-KerningMin
                            while (blankLines >= prevChar.Tracking)
                            {
                                words      += " "; // space
                                blankLines -= prevChar.Tracking;
                            }
                        }
                    }

/*                    if (blankLines >= 2 && (selectFont != FontType.Font1Numbers))
 *                  {
 *                      char prevChar = ' ';
 *                      if (words.Length > 0)
 *                          prevChar = words[words.Length - 1];
 *
 *                      if (prevChar.Equals('f'))
 *                          words = words + " ";    // space
 *                      else if (prevChar.Equals('1'))
 *                      {
 *                          if (blankLines > 4)
 *                              words = words + " ";    // space
 *                      }
 *                      else if (blankLines >= 3)
 *                          words = words + " ";    // space
 *                  }
 */
                    blankLines = 0;
                    string s = "";
                    // Pop characters off stack and then add to word(s)
                    foreach (BinaryPixelBitmapChar c in charsFound)
                    {
                        s = c.Character + s;
                    }
                    words += s;
                }
            }

            string wordsFixed = "";

            // Hack: Upper-case 'i' and lower-case 'L' look the same.
            // If previous character was a space then this character is probably an
            // upper-case 'i', otherwise set it to lower-case 'L'
            for (int i = 0; i < words.Length; i++)
            {
                if (i == 0)
                {
                    if (words[0].Equals('l'))
                    {
                        wordsFixed += "I";
                    }
                    else
                    {
                        wordsFixed += words[i];
                    }
                }
                else
                {
                    if (words[i].Equals('I') && !words[i - 1].Equals(' '))
                    {
                        wordsFixed += "l";
                    }
                    else if (words[i].Equals('l') && words[i - 1].Equals(' '))
                    {
                        if ((i + 1) < words.Length && words[i + 1].Equals('i'))
                        {
                            wordsFixed += "l";  // Not likely to have "Ii"  so it is probably "li" instead
                        }
                        else
                        {
                            wordsFixed += "I";
                        }
                    }
                    else
                    {
                        wordsFixed += words[i];
                    }
                }
            }

            return(wordsFixed);
        }
Example #4
0
File: Ocr.cs Project: alxwrd/pacs
        private bool IsMatchBinBitmapChar(BinaryPixelBitmapChar charBmp, BinaryPixelBitmap unknownBmp, int xStart, MatchType matchType, BinaryPixelBitmapChar prevCharBmp, int xStartPrevChar)
        {
            // Now check character matches, allow left most column and right most column
            // to overlap

            // Check left column
//            if (charBmp.CanOverlap && (matchType == MatchType.OverlapBothSides || matchType == MatchType.OverlapLeftSideOnly))
            if (charBmp.CanOverlap && prevCharBmp != null && prevCharBmp.KerningMin < 0)
            {
                // Check for overlapped match on left column
                if ((charBmp.GetColumn(0) & ~unknownBmp.GetColumn(xStart)) > 0)
                {
                    return(false);
                }

                // If overlapped character matches then check that whatever is left (after subtracting out
                // the bits for the current character) is a perfect match for the right side of the
                // previous character
                if (prevCharBmp != null && xStartPrevChar >= 0 && ((xStartPrevChar + prevCharBmp.Width - 1) == xStart))
                {
                    if ((unknownBmp.GetColumn(xStart) & ~charBmp.GetColumn(0)) != prevCharBmp.GetColumn(prevCharBmp.Width - 1))
                    {
                        return(false);
                    }
                }
            }
            else
            {
                if (charBmp.GetColumn(0) != unknownBmp.GetColumn(xStart))
                {
                    return(false);
                }

                // If current character matches then check that whatever is left (after subtracting out
                // the bits for the current character) is a perfect match for the right side of the
                // previous character
                if (prevCharBmp != null && xStartPrevChar >= 0 && ((xStartPrevChar + prevCharBmp.Width - 1) == xStart))
                {
                    if ((unknownBmp.GetColumn(xStart) & ~charBmp.GetColumn(0)) != prevCharBmp.GetColumn(prevCharBmp.Width - 1))
                    {
                        return(false);
                    }
                }

                // If single pixel wide character then check to ensure that there is nothing to
                // the right of it if it can't support adjacent characters
                if (charBmp.Width == 1 && !charBmp.CanAdjacentRight)
                {
                    if (unknownBmp.GetColumn(xStart + 1) != 0)
                    {
                        return(false);   // Something adjacent when it is not allowed so not a match
                    }
                }
            }


            // Check middle part of character
            for (int x = 1; (x < charBmp.Width - 1) && ((xStart + x) < unknownBmp.Width); x++)
            {
                if (charBmp.GetColumn(x) != unknownBmp.GetColumn(xStart + x))
                {
                    return(false);
                }
            }

            // Check right part of character
            if (charBmp.Width > 1)
            {
                if (charBmp.CanOverlap && charBmp.CanAdjacentRight && (matchType == MatchType.OverlapBothSides || matchType == MatchType.OverlapRightSideOnly))
                {
                    // Check for overlapped match on right side
                    if ((charBmp.GetColumn(charBmp.Width - 1) & ~unknownBmp.GetColumn(xStart + charBmp.Width - 1)) > 0)
                    {
                        return(false);
                    }
                }
                else
                {
                    if (charBmp.GetColumn(charBmp.Width - 1) != unknownBmp.GetColumn(xStart + charBmp.Width - 1))
                    {
                        return(false);
                    }

                    if (!charBmp.CanAdjacentRight && !(unknownBmp.GetColumn(xStart + charBmp.Width) == 0))
                    {
                        return(false);   // Must have blank column to right of this character, so not a match
                    }
                }
            }

            // Found match
            return(true);
        }