private string OcrViaNOCR(Bitmap bitmap, int listViewIndex) { if (_ocrFixEngine == null) comboBoxDictionaries_SelectedIndexChanged(null, null); string line = string.Empty; if (_nocrThreadResults != null) line = _nocrThreadResults[listViewIndex]; if (string.IsNullOrEmpty(line)) { var nbmpInput = new NikseBitmap(bitmap); var matches = new List<CompareMatch>(); int minLineHeight = _binOcrLastLowercaseHeight - 3; if (minLineHeight < 5) minLineHeight = _nocrLastLowercaseHeight - 3; if (minLineHeight < 5) minLineHeight = 5; List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmpInput, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight); foreach (ImageSplitterItem item in list) { if (item.NikseBitmap != null) { item.NikseBitmap.ReplaceTransparentWith(Color.Black); } } int index = 0; bool expandSelection = false; bool shrinkSelection = false; var expandSelectionList = new List<ImageSplitterItem>(); while (index < list.Count) { ImageSplitterItem item = list[index]; if (expandSelection || shrinkSelection) { expandSelection = false; if (shrinkSelection && index > 0) { shrinkSelection = false; } else if (index + 1 < list.Count && list[index + 1].NikseBitmap != null) // only allow expand to EndOfLine or space { index++; expandSelectionList.Add(list[index]); } item = GetExpandedSelection(nbmpInput, expandSelectionList, checkBoxRightToLeft.Checked); if (item.NikseBitmap != null) { item.NikseBitmap.ReplaceTransparentWith(Color.Black); } _vobSubOcrNOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, expandSelectionList.Count > 1, null, _lastAdditions, this); DialogResult result = _vobSubOcrNOcrCharacter.ShowDialog(this); _manualOcrDialogPosition = _vobSubOcrNOcrCharacter.FormPosition; if (result == DialogResult.OK && _vobSubOcrNOcrCharacter.ShrinkSelection) { shrinkSelection = true; index--; if (expandSelectionList.Count > 0) expandSelectionList.RemoveAt(expandSelectionList.Count - 1); } else if (result == DialogResult.OK && _vobSubOcrNOcrCharacter.ExpandSelection) { expandSelection = true; } else if (result == DialogResult.OK) { var c = _vobSubOcrNOcrCharacter.NOcrChar; if (expandSelectionList.Count > 1) c.ExpandCount = expandSelectionList.Count; _nOcrDb.Add(c); SaveNOcrWithCurrentLanguage(); string text = _vobSubOcrNOcrCharacter.NOcrChar.Text; string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrNOcrCharacter.IsItalic, expandSelectionList.Count); var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrNOcrCharacter.IsItalic, listViewIndex); _lastAdditions.Add(addition); matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, expandSelectionList.Count, null)); expandSelectionList = new List<ImageSplitterItem>(); } else if (result == DialogResult.Abort) { _abort = true; } else { matches.Add(new CompareMatch("*", false, 0, null)); } _italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic; } else if (item.NikseBitmap == null) { matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); } else { CompareMatch match = GetNOcrCompareMatchNew(item, nbmpInput, _nOcrDb, _unItalicFactor, checkBoxNOcrItalic.Checked, !checkBoxNOcrCorrect.Checked); if (match == null) { _vobSubOcrNOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, false, null, _lastAdditions, this); DialogResult result = _vobSubOcrNOcrCharacter.ShowDialog(this); _manualOcrDialogPosition = _vobSubOcrNOcrCharacter.FormPosition; if (result == DialogResult.OK && _vobSubOcrNOcrCharacter.ExpandSelection) { expandSelectionList.Add(item); expandSelection = true; } else if (result == DialogResult.OK) { _nOcrDb.Add(_vobSubOcrNOcrCharacter.NOcrChar); SaveNOcrWithCurrentLanguage(); string text = _vobSubOcrNOcrCharacter.NOcrChar.Text; string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrNOcrCharacter.IsItalic, 0); var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrNOcrCharacter.IsItalic, listViewIndex); _lastAdditions.Add(addition); matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null)); } else if (result == DialogResult.Abort) { _abort = true; } else { matches.Add(new CompareMatch("*", false, 0, null)); } _italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic; } else // found image match { matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); if (match.ExpandCount > 0) index += match.ExpandCount - 1; } } if (_abort) return string.Empty; if (!expandSelection && !shrinkSelection) index++; if (shrinkSelection && expandSelectionList.Count < 2) { shrinkSelection = false; expandSelectionList = new List<ImageSplitterItem>(); } } line = GetStringWithItalicTags(matches); } line = FixNocrHardcodedStuff(line); //ocr fix engine string textWithOutFixes = line; if (_ocrFixEngine.IsDictionaryLoaded) { if (checkBoxAutoFixCommonErrors.Checked) line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, checkBoxGuessUnknownWords.Checked); int correctWords; int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords); if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && textWithOutFixes.ToString().Replace("~", string.Empty).Trim().Length == 0) { _ocrFixEngine.AutoGuessesUsed.Clear(); _ocrFixEngine.UnknownWordsFound.Clear(); line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, checkBoxGuessUnknownWords.Checked); } if (_ocrFixEngine.Abort) { ButtonStopClick(null, null); _ocrFixEngine.Abort = false; return string.Empty; } // Log used word guesses (via word replace list) foreach (string guess in _ocrFixEngine.AutoGuessesUsed) listBoxLogSuggestions.Items.Add(guess); _ocrFixEngine.AutoGuessesUsed.Clear(); // Log unkown words guess (found via spelling dictionaries) LogUnknownWords(); if (wordsNotFound >= 3) subtitleListView1.SetBackgroundColor(listViewIndex, Color.Red); if (wordsNotFound == 2) subtitleListView1.SetBackgroundColor(listViewIndex, Color.Orange); else if (wordsNotFound == 1) subtitleListView1.SetBackgroundColor(listViewIndex, Color.Yellow); else if (line.Trim().Length == 0) subtitleListView1.SetBackgroundColor(listViewIndex, Color.Orange); else subtitleListView1.SetBackgroundColor(listViewIndex, Color.LightGreen); } if (textWithOutFixes.Trim() != line.Trim()) { _tesseractOcrAutoFixes++; labelFixesMade.Text = string.Format(" - {0}", _tesseractOcrAutoFixes); LogOcrFix(listViewIndex, textWithOutFixes.ToString(), line); } return line; }
/// <summary> /// Ocr via image compare /// </summary> private string SplitAndOcrBitmapNormal(Bitmap bitmap, int listViewIndex) { if (_ocrFixEngine == null) LoadOcrFixEngine(null, LanguageString); string threadText = null; if (_icThreadResults != null && !string.IsNullOrEmpty(_icThreadResults[listViewIndex])) threadText = _icThreadResults[listViewIndex]; string line = string.Empty; if (threadText == null) { var matches = new List<CompareMatch>(); var parentBitmap = new NikseBitmap(bitmap); List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLetters(parentBitmap, (int)numericUpDownPixelsIsSpace.Value, checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom); int index = 0; bool expandSelection = false; bool shrinkSelection = false; var expandSelectionList = new List<ImageSplitterItem>(); while (index < list.Count) { ImageSplitterItem item = list[index]; if (expandSelection || shrinkSelection) { expandSelection = false; if (shrinkSelection && index > 0) { shrinkSelection = false; } else if (index + 1 < list.Count && list[index + 1].NikseBitmap != null) // only allow expand to EndOfLine or space { index++; expandSelectionList.Add(list[index]); } item = GetExpandedSelection(parentBitmap, expandSelectionList, checkBoxRightToLeft.Checked); _vobSubOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, expandSelectionList.Count > 1, null, _lastAdditions, this); DialogResult result = _vobSubOcrCharacter.ShowDialog(this); _manualOcrDialogPosition = _vobSubOcrCharacter.FormPosition; if (result == DialogResult.OK && _vobSubOcrCharacter.ShrinkSelection) { shrinkSelection = true; index--; if (expandSelectionList.Count > 0) expandSelectionList.RemoveAt(expandSelectionList.Count - 1); } else if (result == DialogResult.OK && _vobSubOcrCharacter.ExpandSelection) { expandSelection = true; } else if (result == DialogResult.OK) { string text = _vobSubOcrCharacter.ManualRecognizedCharacters; string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrCharacter.IsItalic, expandSelectionList.Count); var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrCharacter.IsItalic, listViewIndex); _lastAdditions.Add(addition); matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, expandSelectionList.Count, null)); expandSelectionList = new List<ImageSplitterItem>(); } else if (result == DialogResult.Abort) { _abort = true; } else { matches.Add(new CompareMatch("*", false, 0, null)); } _italicCheckedLast = _vobSubOcrCharacter.IsItalic; } else if (item.NikseBitmap == null) { matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); } else { CompareMatch bestGuess; CompareMatch match = GetCompareMatch(item, parentBitmap, out bestGuess, list, index); if (match == null) { _vobSubOcrCharacter.Initialize(bitmap, item, _manualOcrDialogPosition, _italicCheckedLast, false, bestGuess, _lastAdditions, this); DialogResult result = _vobSubOcrCharacter.ShowDialog(this); _manualOcrDialogPosition = _vobSubOcrCharacter.FormPosition; if (result == DialogResult.OK && _vobSubOcrCharacter.ExpandSelection) { expandSelectionList.Add(item); expandSelection = true; } else if (result == DialogResult.OK) { string text = _vobSubOcrCharacter.ManualRecognizedCharacters; string name = SaveCompareItem(item.NikseBitmap, text, _vobSubOcrCharacter.IsItalic, 0); var addition = new ImageCompareAddition(name, text, item.NikseBitmap, _vobSubOcrCharacter.IsItalic, listViewIndex); _lastAdditions.Add(addition); matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null)); } else if (result == DialogResult.Abort) { _abort = true; } else { matches.Add(new CompareMatch("*", false, 0, null)); } _italicCheckedLast = _vobSubOcrCharacter.IsItalic; } else // found image match { matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); if (match.ExpandCount > 0) index += match.ExpandCount - 1; } } if (_abort) return string.Empty; if (!expandSelection && !shrinkSelection) index++; if (shrinkSelection && expandSelectionList.Count < 2) { shrinkSelection = false; expandSelectionList = new List<ImageSplitterItem>(); } } line = GetStringWithItalicTags(matches); } else { line = threadText; } if (checkBoxAutoFixCommonErrors.Checked && _ocrFixEngine != null) line = _ocrFixEngine.FixOcrErrorsViaHardcodedRules(line, _lastLine, null); // TODO: Add abbreviations list if (checkBoxRightToLeft.Checked) line = ReverseNumberStrings(line); //OCR fix engine string textWithOutFixes = line; //OCR fix engine not loaded, when no dictionary is selected if (_ocrFixEngine != null && _ocrFixEngine.IsDictionaryLoaded) { if (checkBoxAutoFixCommonErrors.Checked) line = _ocrFixEngine.FixOcrErrors(line, listViewIndex, _lastLine, true, GetAutoGuessLevel()); int correctWords; int wordsNotFound = _ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords); if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && string.IsNullOrWhiteSpace(textWithOutFixes.Replace("~", string.Empty))) { _ocrFixEngine.AutoGuessesUsed.Clear(); _ocrFixEngine.UnknownWordsFound.Clear(); line = _ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, checkBoxAutoFixCommonErrors.Checked, checkBoxPromptForUnknownWords.Checked, true, GetAutoGuessLevel()); } if (_ocrFixEngine.Abort) { ButtonStopClick(null, null); _ocrFixEngine.Abort = false; return string.Empty; } // Log used word guesses (via word replace list) foreach (string guess in _ocrFixEngine.AutoGuessesUsed) listBoxLogSuggestions.Items.Add(guess); _ocrFixEngine.AutoGuessesUsed.Clear(); // Log unkown words guess (found via spelling dictionaries) LogUnknownWords(); ColorLineByNumberOfUnknownWords(listViewIndex, wordsNotFound, line); } if (textWithOutFixes.Trim() != line.Trim()) { _tesseractOcrAutoFixes++; labelFixesMade.Text = string.Format(" - {0}", _tesseractOcrAutoFixes); LogOcrFix(listViewIndex, textWithOutFixes, line); } return line; }
/// <summary> /// Ocr via image compare /// </summary> /// <param name="bitmap"> /// The bitmap. /// </param> /// <param name="listViewIndex"> /// The list View Index. /// </param> /// <returns> /// The <see cref="string"/>. /// </returns> private string SplitAndOcrBitmapNormalNew(Bitmap bitmap, int listViewIndex) { if (this._ocrFixEngine == null) { this.LoadOcrFixEngine(null, this.LanguageString); } string line = string.Empty; var matches = new List<CompareMatch>(); var parentBitmap = new NikseBitmap(bitmap); int minLineHeight = this._binOcrLastLowercaseHeight - 3; if (minLineHeight < 5) { minLineHeight = this._nocrLastLowercaseHeight; } if (minLineHeight < 5) { minLineHeight = 6; } List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(parentBitmap, (int)this.numericUpDownPixelsIsSpace.Value, this.checkBoxRightToLeft.Checked, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight); int index = 0; bool expandSelection = false; bool shrinkSelection = false; var expandSelectionList = new List<ImageSplitterItem>(); while (index < list.Count) { ImageSplitterItem item = list[index]; if (expandSelection || shrinkSelection) { expandSelection = false; if (shrinkSelection && index > 0) { shrinkSelection = false; } else if (index + 1 < list.Count && list[index + 1].NikseBitmap != null) { // only allow expand to EndOfLine or space index++; expandSelectionList.Add(list[index]); } item = GetExpandedSelectionNew(parentBitmap, expandSelectionList); this._vobSubOcrCharacter.Initialize(bitmap, item, this._manualOcrDialogPosition, this._italicCheckedLast, expandSelectionList.Count > 1, null, this._lastAdditions, this); DialogResult result = this._vobSubOcrCharacter.ShowDialog(this); this._manualOcrDialogPosition = this._vobSubOcrCharacter.FormPosition; if (result == DialogResult.OK && this._vobSubOcrCharacter.ShrinkSelection) { shrinkSelection = true; index--; if (expandSelectionList.Count > 0) { expandSelectionList.RemoveAt(expandSelectionList.Count - 1); } } else if (result == DialogResult.OK && this._vobSubOcrCharacter.ExpandSelection) { expandSelection = true; } else if (result == DialogResult.OK) { string text = this._vobSubOcrCharacter.ManualRecognizedCharacters; string name = this.SaveCompareItemNew(item, text, this._vobSubOcrCharacter.IsItalic, expandSelectionList); var addition = new ImageCompareAddition(name, text, item.NikseBitmap, this._vobSubOcrCharacter.IsItalic, listViewIndex); this._lastAdditions.Add(addition); matches.Add(new CompareMatch(text, this._vobSubOcrCharacter.IsItalic, expandSelectionList.Count, null)); expandSelectionList = new List<ImageSplitterItem>(); } else if (result == DialogResult.Abort) { this._abort = true; } else { matches.Add(new CompareMatch("*", false, 0, null)); } this._italicCheckedLast = this._vobSubOcrCharacter.IsItalic; } else if (item.NikseBitmap == null) { matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); } else { CompareMatch bestGuess; CompareMatch match = this.GetCompareMatchNew(item, out bestGuess, list, index); if (match == null) { // Try line OCR if no image compare match if (this._nOcrDb != null && this._nOcrDb.OcrCharacters.Count > 0) { match = this.GetNOcrCompareMatchNew(item, parentBitmap, this._nOcrDb, true, true); } } if (match == null) { this._vobSubOcrCharacter.Initialize(bitmap, item, this._manualOcrDialogPosition, this._italicCheckedLast, false, bestGuess, this._lastAdditions, this); DialogResult result = this._vobSubOcrCharacter.ShowDialog(this); this._manualOcrDialogPosition = this._vobSubOcrCharacter.FormPosition; if (result == DialogResult.OK && this._vobSubOcrCharacter.ExpandSelection) { expandSelectionList.Add(item); expandSelection = true; } else if (result == DialogResult.OK) { string text = this._vobSubOcrCharacter.ManualRecognizedCharacters; string name = this.SaveCompareItemNew(item, text, this._vobSubOcrCharacter.IsItalic, null); var addition = new ImageCompareAddition(name, text, item.NikseBitmap, this._vobSubOcrCharacter.IsItalic, listViewIndex); this._lastAdditions.Add(addition); matches.Add(new CompareMatch(text, this._vobSubOcrCharacter.IsItalic, 0, null)); this.SetBinOcrLowercaseUppercase(item.NikseBitmap.Height, text); } else if (result == DialogResult.Abort) { this._abort = true; } else { matches.Add(new CompareMatch("*", false, 0, null)); } this._italicCheckedLast = this._vobSubOcrCharacter.IsItalic; } else { // found image match matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); if (match.ExpandCount > 0) { index += match.ExpandCount - 1; } } } if (this._abort) { return string.Empty; } if (!expandSelection && !shrinkSelection) { index++; } if (shrinkSelection && expandSelectionList.Count < 2) { shrinkSelection = false; expandSelectionList = new List<ImageSplitterItem>(); } } line = GetStringWithItalicTags(matches); if (this.checkBoxAutoFixCommonErrors.Checked && this._ocrFixEngine != null) { line = this._ocrFixEngine.FixOcrErrorsViaHardcodedRules(line, this._lastLine, null); // TODO: Add abbreviations list } if (this.checkBoxRightToLeft.Checked) { line = ReverseNumberStrings(line); } // OCR fix engine string textWithOutFixes = line; if (this._ocrFixEngine.IsDictionaryLoaded) { var autoGuessLevel = OcrFixEngine.AutoGuessLevel.None; if (this.checkBoxGuessUnknownWords.Checked) { autoGuessLevel = OcrFixEngine.AutoGuessLevel.Aggressive; } if (this.checkBoxAutoFixCommonErrors.Checked) { line = this._ocrFixEngine.FixOcrErrors(line, listViewIndex, this._lastLine, true, autoGuessLevel); } int correctWords; int wordsNotFound = this._ocrFixEngine.CountUnknownWordsViaDictionary(line, out correctWords); if (wordsNotFound > 0 || correctWords == 0 || textWithOutFixes != null && string.IsNullOrWhiteSpace(textWithOutFixes.Replace("~", string.Empty))) { this._ocrFixEngine.AutoGuessesUsed.Clear(); this._ocrFixEngine.UnknownWordsFound.Clear(); line = this._ocrFixEngine.FixUnknownWordsViaGuessOrPrompt(out wordsNotFound, line, listViewIndex, bitmap, this.checkBoxAutoFixCommonErrors.Checked, this.checkBoxPromptForUnknownWords.Checked, true, autoGuessLevel); } if (this._ocrFixEngine.Abort) { this.ButtonStopClick(null, null); this._ocrFixEngine.Abort = false; return string.Empty; } // Log used word guesses (via word replace list) foreach (string guess in this._ocrFixEngine.AutoGuessesUsed) { this.listBoxLogSuggestions.Items.Add(guess); } this._ocrFixEngine.AutoGuessesUsed.Clear(); // Log unkown words guess (found via spelling dictionaries) this.LogUnknownWords(); if (wordsNotFound >= 3) { this.subtitleListView1.SetBackgroundColor(listViewIndex, Color.Red); } if (wordsNotFound == 2) { this.subtitleListView1.SetBackgroundColor(listViewIndex, Color.Orange); } else if (wordsNotFound == 1) { this.subtitleListView1.SetBackgroundColor(listViewIndex, Color.Yellow); } else if (string.IsNullOrWhiteSpace(line)) { this.subtitleListView1.SetBackgroundColor(listViewIndex, Color.Orange); } else { this.subtitleListView1.SetBackgroundColor(listViewIndex, Color.LightGreen); } } if (textWithOutFixes.Trim() != line.Trim()) { this._tesseractOcrAutoFixes++; this.labelFixesMade.Text = string.Format(" - {0}", this._tesseractOcrAutoFixes); this.LogOcrFix(listViewIndex, textWithOutFixes, line); } return line; }