private void ButtonStartOcrClick(object sender, EventArgs e) { Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked; _lastLine = null; buttonOK.Enabled = false; buttonCancel.Enabled = false; buttonStartOcr.Enabled = false; buttonStop.Enabled = true; buttonNewCharacterDatabase.Enabled = false; buttonEditCharacterDatabase.Enabled = false; _fromMenuItem = false; _abort = false; _autoBreakLines = checkBoxAutoBreakLines.Checked; listBoxUnknownWords.Items.Clear(); int max = GetSubtitleCount(); if (_ocrMethodIndex == _ocrMethodTesseract && _tesseractAsyncStrings == null) { _nOcrDb = null; _tesseractAsyncStrings = new string[max]; _tesseractAsyncIndex = (int)numericUpDownStartNumber.Value + 5; _tesseractThread = new BackgroundWorker(); _tesseractThread.DoWork += TesseractThreadDoWork; _tesseractThread.RunWorkerCompleted += TesseractThreadRunWorkerCompleted; _tesseractThread.WorkerSupportsCancellation = true; if (_tesseractAsyncIndex >= 0 && _tesseractAsyncIndex < max) _tesseractThread.RunWorkerAsync(GetSubtitleBitmap(_tesseractAsyncIndex)); } else if (_ocrMethodIndex == _ocrMethodImageCompare) { if (_compareBitmaps == null) LoadImageCompareBitmaps(); _numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value; } else if (_ocrMethodIndex == _ocrMethodNocr) { if (_nOcrDb == null) LoadNOcrWithCurrentLanguage(); if (_nOcrDb == null) { MessageBox.Show("Fatal - No NOCR dictionary loaded!"); SetButtonsEnabledAfterOcrDone(); return; } _nocrThreadsStop = false; _nocrThreadResults = new string[_subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 1; if (noOfThreads >= max) noOfThreads = max - 1; int start = (int)numericUpDownStartNumber.Value + 5; if (noOfThreads >= 1 && max > 5) { // finder letter size (uppercase/lowercase) int testIndex = 0; while (testIndex < 6 && (_nocrLastLowercaseHeight == -1 || _nocrLastUppercaseHeight == -1)) { NOCRIntialize(GetSubtitleBitmap(testIndex)); testIndex++; } for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { var bw = new BackgroundWorker(); var p = new NOcrThreadParameter(GetSubtitleBitmap(start + i), start + i, _nOcrDb.OcrCharacters, bw, noOfThreads, _unItalicFactor, checkBoxNOcrItalic.Checked, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked); p.NOcrLastLowercaseHeight = _nocrLastLowercaseHeight; p.NOcrLastUppercaseHeight = _nocrLastUppercaseHeight; bw.DoWork += NOcrThreadDoWork; bw.RunWorkerCompleted += NOcrThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } } else if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) { if (_binaryOcrDb == null) { _binaryOcrDbFileName = Configuration.OcrFolder + "Latin.db"; _binaryOcrDb = new BinaryOcrDb(_binaryOcrDbFileName, true); } _nOcrDb = new NOcrDb(_binaryOcrDb.FileName.Replace(".db", ".nocr")); checkBoxNOcrCorrect.Checked = true; _numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value; } progressBar1.Maximum = max; progressBar1.Value = 0; progressBar1.Visible = true; _mainOcrTimerMax = max; _mainOcrIndex = (int)numericUpDownStartNumber.Value - 1; _mainOcrTimer = new Timer(); _mainOcrTimer.Tick += mainOcrTimer_Tick; _mainOcrTimer.Interval = 5; _mainOcrRunning = true; subtitleListView1.MultiSelect = false; mainOcrTimer_Tick(null, null); if (_ocrMethodIndex == _ocrMethodImageCompare) { _icThreadsStop = false; _icThreadResults = new string[_subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 2; // -1 or -2? if (noOfThreads >= max) noOfThreads = max - 1; int start = (int)numericUpDownStartNumber.Value + 5; if (noOfThreads > 2) noOfThreads = 2; // Threading is not really good - subtitle picture creation should probably be threaded also/instead for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { Application.DoEvents(); var bw = new BackgroundWorker(); var p = new ImageCompareThreadParameter(GetSubtitleBitmap(start + i), start + i, _compareBitmaps, bw, noOfThreads, (int)numericUpDownPixelsIsSpace.Value, checkBoxRightToLeft.Checked, (float)numericUpDownMaxErrorPct.Value); bw.DoWork += ImageCompareThreadDoWork; bw.RunWorkerCompleted += ImageCompareThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } }
static string NocrThreadDoItalicWork(NOcrThreadParameter p) { var unItalicedBmp = UnItalic(p.Picture, p.UnItalicFactor); var nbmp = new NikseBitmap(unItalicedBmp); // nbmp.ReplaceNonWhiteWithTransparent(); // Bitmap bitmap = nbmp.GetBitmap(); unItalicedBmp.Dispose(); var matches = new List<CompareMatch>(); int minLineHeight = p.NOcrLastLowercaseHeight; if (minLineHeight < 10) minLineHeight = 22; int maxLineHeight = p.NOcrLastUppercaseHeight; if (maxLineHeight < 10) minLineHeight = 80; List<ImageSplitterItem> lines = NikseBitmapImageSplitter.SplitVertical(nbmp, minLineHeight); List<ImageSplitterItem> list = NikseBitmapImageSplitter.SplitBitmapToLetters(lines, p.NumberOfPixelsIsSpace, p.RightToLeft, Configuration.Settings.VobSubOcr.TopToBottom); foreach (ImageSplitterItem item in list) { if (item.NikseBitmap != null) { item.NikseBitmap.ReplaceTransparentWith(Color.Black); } } int index = 0; while (index < list.Count) { ImageSplitterItem item = list[index]; if (item.NikseBitmap == null) { matches.Add(new CompareMatch(item.SpecialCharacter, false, 0, null)); } else { bool old = p.AdvancedItalicDetection; p.AdvancedItalicDetection = false; CompareMatch match = GetNOcrCompareMatch(item, nbmp, p); p.AdvancedItalicDetection = old; if (match == null) { return string.Empty; } else // found image match { matches.Add(new CompareMatch(match.Text, match.Italic, 0, null)); if (match.ExpandCount > 0) index += match.ExpandCount - 1; } } index++; } return "<i>" + Utilities.RemoveHtmlTags(GetStringWithItalicTags(matches)) + "</i>"; }
internal static CompareMatch GetNOcrCompareMatch(ImageSplitterItem targetItem, NikseBitmap parentBitmap, NOcrThreadParameter p) { bool italic; var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, p.NOcrChars); if (expandedResult != null) return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult); var result = NOcrFindBestMatch(targetItem, targetItem.Y - targetItem.ParentY, out italic, p.NOcrChars, p.UnItalicFactor, p.AdvancedItalicDetection, true); if (result == null) return null; // Fix uppercase/lowercase issues (not I/l) if (result.Text == "e") p.NOcrLastLowercaseHeight = targetItem.NikseBitmap.Height; else if (p.NOcrLastLowercaseHeight == -1 && result.Text == "a") p.NOcrLastLowercaseHeight = targetItem.NikseBitmap.Height; if (result.Text == "E" || result.Text == "H" || result.Text == "R" || result.Text == "D" || result.Text == "T") p.NOcrLastUppercaseHeight = targetItem.NikseBitmap.Height; else if (p.NOcrLastUppercaseHeight == -1 && result.Text == "M") p.NOcrLastUppercaseHeight = targetItem.NikseBitmap.Height; if (result.Text == "V" || result.Text == "W" || result.Text == "U" || result.Text == "S" || result.Text == "Z" || result.Text == "O" || result.Text == "X" || result.Text == "Ø" || result.Text == "C") { if (p.NOcrLastLowercaseHeight > 3 && targetItem.NikseBitmap.Height - p.NOcrLastLowercaseHeight < 2) result.Text = result.Text.ToLower(); } else if (result.Text == "v" || result.Text == "w" || result.Text == "u" || result.Text == "s" || result.Text == "z" || result.Text == "o" || result.Text == "x" || result.Text == "ø" || result.Text == "c") { if (p.NOcrLastUppercaseHeight > 3 && p.NOcrLastUppercaseHeight - targetItem.NikseBitmap.Height < 2) result.Text = result.Text.ToUpper(); } if (italic) return new CompareMatch(result.Text, true, 0, null, result); return new CompareMatch(result.Text, result.Italic, 0, null, result); }
/// <summary> /// The button start ocr click. /// </summary> /// <param name="sender"> /// The sender. /// </param> /// <param name="e"> /// The e. /// </param> private void ButtonStartOcrClick(object sender, EventArgs e) { Configuration.Settings.VobSubOcr.RightToLeft = this.checkBoxRightToLeft.Checked; this._lastLine = null; this.buttonOK.Enabled = false; this.buttonCancel.Enabled = false; this.buttonStartOcr.Enabled = false; this.buttonStop.Enabled = true; this.buttonNewCharacterDatabase.Enabled = false; this.buttonEditCharacterDatabase.Enabled = false; this._abort = false; int max = this.GetSubtitleCount(); if (this.comboBoxOcrMethod.SelectedIndex == 0 && this._tesseractAsyncStrings == null) { this._nOcrDb = null; this._tesseractAsyncStrings = new string[max]; this._tesseractAsyncIndex = (int)this.numericUpDownStartNumber.Value + 5; this._tesseractThread = new BackgroundWorker(); this._tesseractThread.DoWork += this.TesseractThreadDoWork; this._tesseractThread.RunWorkerCompleted += this.TesseractThreadRunWorkerCompleted; this._tesseractThread.WorkerSupportsCancellation = true; if (this._tesseractAsyncIndex >= 0 && this._tesseractAsyncIndex < max) { this._tesseractThread.RunWorkerAsync(this.GetSubtitleBitmap(this._tesseractAsyncIndex)); } } else if (this.comboBoxOcrMethod.SelectedIndex == 1) { if (this._compareBitmaps == null) { this.LoadImageCompareBitmaps(); } } else if (this.comboBoxOcrMethod.SelectedIndex == 3) { if (this._nOcrDb == null) { this.LoadNOcrWithCurrentLanguage(); } if (this._nOcrDb == null) { MessageBox.Show("Fatal - No NOCR dictionary loaded!"); this.SetButtonsEnabledAfterOcrDone(); return; } this._nocrThreadsStop = false; this._nocrThreadResults = new string[this._subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 1; if (noOfThreads >= max) { noOfThreads = max - 1; } int start = (int)this.numericUpDownStartNumber.Value + 5; if (noOfThreads >= 1 && max > 5) { // finder letter size (uppercase/lowercase) int testIndex = 0; while (testIndex < 6 && (this._nocrLastLowercaseHeight == -1 || this._nocrLastUppercaseHeight == -1)) { this.NOCRIntialize(this.GetSubtitleBitmap(testIndex)); testIndex++; } for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { var bw = new BackgroundWorker(); var p = new NOcrThreadParameter(this.GetSubtitleBitmap(start + i), start + i, this._nOcrDb.OcrCharacters, bw, noOfThreads, this._unItalicFactor, this.checkBoxNOcrItalic.Checked, (int)this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value, this.checkBoxRightToLeft.Checked); p.NOcrLastLowercaseHeight = this._nocrLastLowercaseHeight; p.NOcrLastUppercaseHeight = this._nocrLastUppercaseHeight; bw.DoWork += NOcrThreadDoWork; bw.RunWorkerCompleted += this.NOcrThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } } else if (this.comboBoxOcrMethod.SelectedIndex == 4) { if (this._binaryOcrDb == null) { this._binaryOcrDbFileName = Configuration.OcrFolder + "Latin.db"; this._binaryOcrDb = new BinaryOcrDb(this._binaryOcrDbFileName, true); } this._nOcrDb = new NOcrDb(this._binaryOcrDb.FileName.Replace(".db", ".nocr")); } this.progressBar1.Maximum = max; this.progressBar1.Value = 0; this.progressBar1.Visible = true; this._mainOcrTimerMax = max; this._mainOcrIndex = (int)this.numericUpDownStartNumber.Value - 1; this._mainOcrTimer = new Timer(); this._mainOcrTimer.Tick += this.mainOcrTimer_Tick; this._mainOcrTimer.Interval = 5; this._mainOcrRunning = true; this.subtitleListView1.MultiSelect = false; this.mainOcrTimer_Tick(null, null); if (this.comboBoxOcrMethod.SelectedIndex == 1) { this._icThreadsStop = false; this._icThreadResults = new string[this._subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 2; // -1 or -2? if (noOfThreads >= max) { noOfThreads = max - 1; } int start = (int)this.numericUpDownStartNumber.Value + 5; if (noOfThreads > 2) { noOfThreads = 2; // Threading is not really good - subtitle picture creation should probably be threaded also/instead } for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { Application.DoEvents(); var bw = new BackgroundWorker(); var p = new ImageCompareThreadParameter(this.GetSubtitleBitmap(start + i), start + i, this._compareBitmaps, bw, noOfThreads, (int)this.numericUpDownPixelsIsSpace.Value, this.checkBoxRightToLeft.Checked, (float)this.numericUpDownMaxErrorPct.Value); bw.DoWork += ImageCompareThreadDoWork; bw.RunWorkerCompleted += this.ImageCompareThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } }