private bool IsRecursiveMatch(BinaryPixelBitmap binCell, ref int xPos, SortedDictionary <char, BinaryPixelBitmapChar> refChars, ref Stack <BinaryPixelBitmapChar> charsFound, int xPosPrevChar) { // Recursive function to match up multiple characters until a space is found... // to help find characters when they might be adjacent pixels or overlapped by one pixel. // Must have a match for all characters up to the space in order to help avoid false // positives. if (binCell.GetColumn(xPos) == 0 || xPos >= binCell.Width) { return(true); // Found a space or end of bitmap, so assume matched up to this point } bool bAllMatchesFound = false; int xInc = 0; MatchType matchType = MatchType.OverlapBothSides; // Ensure that first character must always have an exact match on the left side if (charsFound.Count <= 0) { matchType = MatchType.OverlapRightSideOnly; } foreach (BinaryPixelBitmapChar guessChar in refChars.Values) { // Check using overlapped matching BinaryPixelBitmapChar last = charsFound.Count <= 0 ? null : charsFound.Peek(); if (IsMatchBinBitmapChar(guessChar, binCell, xPos, matchType, last, xPosPrevChar)) { // Found a potential match... so now check if it exactly matches on the right side // and if so then next character test will be for adjacent, otherwise next character // test will be for overlap if (IsMatchBinBitmapChar(guessChar, binCell, xPos, MatchType.OverlapLeftSideOnly, last, xPosPrevChar)) { xInc = guessChar.Width; } else { xInc = guessChar.Width - 1; } xPos += xInc; charsFound.Push(guessChar); if (IsRecursiveMatch(binCell, ref xPos, refChars, ref charsFound, xPos - xInc)) { bAllMatchesFound = true; break; // Exit foreach } else { // subsequent pixels don't match any characters so back out // previous one and keep trying charsFound.Pop(); xPos -= xInc; } } } return(bAllMatchesFound); }
public string ExtractText(BinaryPixelBitmap binCell, FontType selectFont) { // Assumes cell is composed of only white and black pixels and that black pixels // form characters. Also assumes that characters can be overlapped by one column // as long as no black pixels of each character overlap (black pixel form one // character can overlap white space of character next to it). string words = ""; SortedDictionary <char, BinaryPixelBitmapChar> refChars; if (selectFont == FontType.Font1Numbers) { refChars = font1.NumericalBinChars; } else if (selectFont == FontType.Font2All) { refChars = font2.AllBinChars; } else { refChars = font1.AllBinChars; } int xPos = 0; int blankLines = 0; while (xPos >= 0 && xPos < binCell.Width) { if (binCell.GetColumn(xPos) == 0) { // Blank column blankLines++; xPos++; if (words.Length > 0 && blankLines > 10) { break; // Rest of cell is likely blank so don't bother searching the rest } continue; } Stack <BinaryPixelBitmapChar> charsFound = new Stack <BinaryPixelBitmapChar>(); charsFound.Clear(); // Find character(s) that match. Will recursively look for matching // characters until it finds a blank column of pixels (this allows matching // of overlapped or adjacent characters). if (!IsRecursiveMatch(binCell, ref xPos, refChars, ref charsFound, -1)) { xPos++; // If none found then increment xPos and try again } if (charsFound.Count > 0) { // Found a space before the character? if (blankLines > 0) { // Use Kerning and Tracking to figure out if there are spaces // and if so how many spaces. // Only check for spaces after we have found some characters // (i.e. strip leading spaces) if (words.Length > 0) { BinaryPixelBitmapChar prevChar = refChars[words[words.Length - 1]]; blankLines -= prevChar.KerningMin; // Assumes Tracking is greater than KerningMax-KerningMin while (blankLines >= prevChar.Tracking) { words += " "; // space blankLines -= prevChar.Tracking; } } } /* if (blankLines >= 2 && (selectFont != FontType.Font1Numbers)) * { * char prevChar = ' '; * if (words.Length > 0) * prevChar = words[words.Length - 1]; * * if (prevChar.Equals('f')) * words = words + " "; // space * else if (prevChar.Equals('1')) * { * if (blankLines > 4) * words = words + " "; // space * } * else if (blankLines >= 3) * words = words + " "; // space * } */ blankLines = 0; string s = ""; // Pop characters off stack and then add to word(s) foreach (BinaryPixelBitmapChar c in charsFound) { s = c.Character + s; } words += s; } } string wordsFixed = ""; // Hack: Upper-case 'i' and lower-case 'L' look the same. // If previous character was a space then this character is probably an // upper-case 'i', otherwise set it to lower-case 'L' for (int i = 0; i < words.Length; i++) { if (i == 0) { if (words[0].Equals('l')) { wordsFixed += "I"; } else { wordsFixed += words[i]; } } else { if (words[i].Equals('I') && !words[i - 1].Equals(' ')) { wordsFixed += "l"; } else if (words[i].Equals('l') && words[i - 1].Equals(' ')) { if ((i + 1) < words.Length && words[i + 1].Equals('i')) { wordsFixed += "l"; // Not likely to have "Ii" so it is probably "li" instead } else { wordsFixed += "I"; } } else { wordsFixed += words[i]; } } } return(wordsFixed); }
private bool IsMatchBinBitmapChar(BinaryPixelBitmapChar charBmp, BinaryPixelBitmap unknownBmp, int xStart, MatchType matchType, BinaryPixelBitmapChar prevCharBmp, int xStartPrevChar) { // Now check character matches, allow left most column and right most column // to overlap // Check left column // if (charBmp.CanOverlap && (matchType == MatchType.OverlapBothSides || matchType == MatchType.OverlapLeftSideOnly)) if (charBmp.CanOverlap && prevCharBmp != null && prevCharBmp.KerningMin < 0) { // Check for overlapped match on left column if ((charBmp.GetColumn(0) & ~unknownBmp.GetColumn(xStart)) > 0) { return(false); } // If overlapped character matches then check that whatever is left (after subtracting out // the bits for the current character) is a perfect match for the right side of the // previous character if (prevCharBmp != null && xStartPrevChar >= 0 && ((xStartPrevChar + prevCharBmp.Width - 1) == xStart)) { if ((unknownBmp.GetColumn(xStart) & ~charBmp.GetColumn(0)) != prevCharBmp.GetColumn(prevCharBmp.Width - 1)) { return(false); } } } else { if (charBmp.GetColumn(0) != unknownBmp.GetColumn(xStart)) { return(false); } // If current character matches then check that whatever is left (after subtracting out // the bits for the current character) is a perfect match for the right side of the // previous character if (prevCharBmp != null && xStartPrevChar >= 0 && ((xStartPrevChar + prevCharBmp.Width - 1) == xStart)) { if ((unknownBmp.GetColumn(xStart) & ~charBmp.GetColumn(0)) != prevCharBmp.GetColumn(prevCharBmp.Width - 1)) { return(false); } } // If single pixel wide character then check to ensure that there is nothing to // the right of it if it can't support adjacent characters if (charBmp.Width == 1 && !charBmp.CanAdjacentRight) { if (unknownBmp.GetColumn(xStart + 1) != 0) { return(false); // Something adjacent when it is not allowed so not a match } } } // Check middle part of character for (int x = 1; (x < charBmp.Width - 1) && ((xStart + x) < unknownBmp.Width); x++) { if (charBmp.GetColumn(x) != unknownBmp.GetColumn(xStart + x)) { return(false); } } // Check right part of character if (charBmp.Width > 1) { if (charBmp.CanOverlap && charBmp.CanAdjacentRight && (matchType == MatchType.OverlapBothSides || matchType == MatchType.OverlapRightSideOnly)) { // Check for overlapped match on right side if ((charBmp.GetColumn(charBmp.Width - 1) & ~unknownBmp.GetColumn(xStart + charBmp.Width - 1)) > 0) { return(false); } } else { if (charBmp.GetColumn(charBmp.Width - 1) != unknownBmp.GetColumn(xStart + charBmp.Width - 1)) { return(false); } if (!charBmp.CanAdjacentRight && !(unknownBmp.GetColumn(xStart + charBmp.Width) == 0)) { return(false); // Must have blank column to right of this character, so not a match } } } // Found match return(true); }