public NOcrDb(NOcrDb db, string fileName) { FileName = fileName; OcrCharacters = new List <NOcrChar>(db.OcrCharacters); OcrCharactersExpanded = new List <NOcrChar>(db.OcrCharactersExpanded); }
public void TestNOcrSaveLoad() { string tempFileName = Path.GetTempFileName(); var db = new NOcrDb(tempFileName); var nOcrChar = new NOcrChar("t"); nOcrChar.ExpandCount = 0; nOcrChar.Italic = false; nOcrChar.MarginTop = 2; nOcrChar.Width = 10; nOcrChar.Height = 10; nOcrChar.LinesForeground.Add(new NOcrPoint(new Point(1, 1), new Point(2, 2))); nOcrChar.LinesBackground.Add(new NOcrPoint(new Point(3, 4), new Point(5, 6))); db.Add(nOcrChar); var nOcrChar2 = new NOcrChar("u"); nOcrChar2.ExpandCount = 0; nOcrChar2.Italic = false; nOcrChar2.MarginTop = 3; nOcrChar2.Width = 12; nOcrChar2.Height = 12; nOcrChar2.LinesForeground.Add(new NOcrPoint(new Point(1, 1), new Point(2, 2))); nOcrChar2.LinesBackground.Add(new NOcrPoint(new Point(3, 4), new Point(5, 6))); db.Add(nOcrChar2); db.Save(); db = new NOcrDb(tempFileName); Assert.IsTrue(db.OcrCharacters.Count == 2); Assert.IsTrue(db.OcrCharacters[0].Text == nOcrChar2.Text); Assert.IsTrue(db.OcrCharacters[0].Italic == nOcrChar2.Italic); Assert.IsTrue(db.OcrCharacters[0].MarginTop == nOcrChar2.MarginTop); Assert.IsTrue(db.OcrCharacters[0].LinesForeground.Count == nOcrChar2.LinesForeground.Count); Assert.IsTrue(db.OcrCharacters[0].LinesForeground[0].Start.X == nOcrChar2.LinesForeground[0].Start.X); Assert.IsTrue(db.OcrCharacters[0].LinesForeground[0].Start.Y == nOcrChar2.LinesForeground[0].Start.Y); Assert.IsTrue(db.OcrCharacters[0].LinesBackground.Count == nOcrChar2.LinesBackground.Count); Assert.IsTrue(db.OcrCharacters[0].LinesBackground[0].Start.X == nOcrChar2.LinesBackground[0].Start.X); Assert.IsTrue(db.OcrCharacters[0].LinesBackground[0].Start.Y == nOcrChar2.LinesBackground[0].Start.Y); Assert.IsTrue(db.OcrCharacters[1].Text == nOcrChar.Text); try { File.Delete(tempFileName); } catch { } }
internal void Initialize(Bitmap bitmap, int pixelsIsSpace, bool rightToLeft, NOcrDb nOcrDb, VobSubOcr vobSubOcr) { _bitmap = bitmap; var nbmp = new NikseBitmap(bitmap); nbmp.ReplaceNonWhiteWithTransparent(); bitmap = nbmp.GetBitmap(); _bitmap2 = bitmap; _nocrChars = nOcrDb.OcrCharacters; _matchList = new List<VobSubOcr.CompareMatch>(); _vobSubOcr = vobSubOcr; int minLineHeight = 6; _imageList = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmp, pixelsIsSpace, rightToLeft, Configuration.Settings.VobSubOcr.TopToBottom, minLineHeight); // _imageList = NikseBitmapImageSplitter.SplitBitmapToLetters(nbmp, pixelsIsSpace, rightToLeft, Configuration.Settings.VobSubOcr.TopToBottom); int index = 0; while (index < _imageList.Count) { ImageSplitterItem item = _imageList[index]; if (item.NikseBitmap == null) { listBoxInspectItems.Items.Add(item.SpecialCharacter); _matchList.Add(null); } else { nbmp = item.NikseBitmap; nbmp.ReplaceNonWhiteWithTransparent(); item.Y += nbmp.CropTopTransparent(0); nbmp.CropTransparentSidesAndBottom(0, true); nbmp.ReplaceTransparentWith(Color.Black); //get nocr matches Nikse.SubtitleEdit.Forms.VobSubOcr.CompareMatch match = vobSubOcr.GetNOcrCompareMatchNew(item, nbmp, nOcrDb, false, false); if (match == null) { listBoxInspectItems.Items.Add("?"); _matchList.Add(null); } else { listBoxInspectItems.Items.Add(match.Text); _matchList.Add(match); } } index++; } }
private void buttonTrain_Click(object sender, EventArgs e) { if (!System.IO.File.Exists(textBoxInputFile.Text)) { return; } int numberOfCharactersLeaned = 0; int numberOfCharactersSkipped = 0; var nOcrD = new NOcrDb(textBoxNOcrDb.Text); var lines = new List<string>(); foreach (string line in System.IO.File.ReadAllLines(textBoxInputFile.Text)) lines.Add(line); var format = new SubRip(); var sub = new Subtitle(); format.LoadSubtitle(sub, lines, textBoxInputFile.Text); var charactersLearned = new List<string>(); foreach (ListViewItem item in listViewFonts.Items) { if (item.Checked) { _subtitleFontName = item.Text; _subtitleFontSize = Convert.ToInt32(comboBoxSubtitleFontSize.Items[comboBoxSubtitleFontSize.SelectedIndex].ToString()); charactersLearned = new List<string>(); foreach (Paragraph p in sub.Paragraphs) { foreach (char ch in p.Text) { string s = ch.ToString(); if (s.Trim().Length > 0) { if (!charactersLearned.Contains(s)) { TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, s, false); if (checkBoxBold.Checked) TrainLetter(ref numberOfCharactersLeaned, ref numberOfCharactersSkipped, nOcrD, charactersLearned, s, true); } } } } } } nOcrD.Save(); }
private void LoadNOcrWithCurrentLanguage() { string fileName = GetNOcrLanguageFileName(); if (!string.IsNullOrEmpty(fileName)) { _nOcrDb = new NOcrDb(fileName); } }
private void ComboBoxOcrMethodSelectedIndexChanged(object sender, EventArgs e) { _icThreadsStop = true; _binaryOcrDb = null; _nOcrDb = null; _ocrMethodIndex = comboBoxOcrMethod.SelectedIndex; if (_ocrMethodIndex == _ocrMethodTesseract) { ShowOcrMethodGroupBox(GroupBoxTesseractMethod); Configuration.Settings.VobSubOcr.LastOcrMethod = "Tesseract"; } else if (_ocrMethodIndex == _ocrMethodImageCompare) { ShowOcrMethodGroupBox(groupBoxImageCompareMethod); Configuration.Settings.VobSubOcr.LastOcrMethod = "BitmapCompare"; checkBoxPromptForUnknownWords.Checked = false; LoadImageCompareCharacterDatabaseList(); } else if (_ocrMethodIndex == _ocrMethodNocr) { ShowOcrMethodGroupBox(groupBoxNOCR); Configuration.Settings.VobSubOcr.LastOcrMethod = "nOCR"; SetSpellCheckLanguage(Configuration.Settings.VobSubOcr.LineOcrLastSpellCheck); comboBoxNOcrLanguage.Items.Clear(); int index = 0; int selIndex = 0; foreach (string fileName in Directory.GetFiles(Configuration.OcrFolder, "*.nocr")) { string s = Path.GetFileNameWithoutExtension(fileName); if (s == Configuration.Settings.VobSubOcr.LineOcrLastLanguages) selIndex = index; comboBoxNOcrLanguage.Items.Add(s); index++; } if (comboBoxNOcrLanguage.Items.Count > 0) comboBoxNOcrLanguage.SelectedIndex = selIndex; } else if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) { ShowOcrMethodGroupBox(groupBoxImageCompareMethod); Configuration.Settings.VobSubOcr.LastOcrMethod = "BinaryImageCompare"; checkBoxPromptForUnknownWords.Checked = false; numericUpDownMaxErrorPct.Minimum = 0; _binaryOcrDb = new BinaryOcrDb(_binaryOcrDbFileName, true); LoadImageCompareCharacterDatabaseList(); } else if (_ocrMethodIndex == _ocrMethodModi) { ShowOcrMethodGroupBox(groupBoxModiMethod); Configuration.Settings.VobSubOcr.LastOcrMethod = "MODI"; } SubtitleListView1SelectedIndexChanged(null, null); }
private void comboBoxNOcrLanguage_SelectedIndexChanged(object sender, EventArgs e) { _nOcrDb = null; }
private void ButtonStartOcrClick(object sender, EventArgs e) { Configuration.Settings.VobSubOcr.RightToLeft = checkBoxRightToLeft.Checked; _lastLine = null; buttonOK.Enabled = false; buttonCancel.Enabled = false; buttonStartOcr.Enabled = false; buttonStop.Enabled = true; buttonNewCharacterDatabase.Enabled = false; buttonEditCharacterDatabase.Enabled = false; _fromMenuItem = false; _abort = false; _autoBreakLines = checkBoxAutoBreakLines.Checked; listBoxUnknownWords.Items.Clear(); int max = GetSubtitleCount(); if (_ocrMethodIndex == _ocrMethodTesseract && _tesseractAsyncStrings == null) { _nOcrDb = null; _tesseractAsyncStrings = new string[max]; _tesseractAsyncIndex = (int)numericUpDownStartNumber.Value + 5; _tesseractThread = new BackgroundWorker(); _tesseractThread.DoWork += TesseractThreadDoWork; _tesseractThread.RunWorkerCompleted += TesseractThreadRunWorkerCompleted; _tesseractThread.WorkerSupportsCancellation = true; if (_tesseractAsyncIndex >= 0 && _tesseractAsyncIndex < max) _tesseractThread.RunWorkerAsync(GetSubtitleBitmap(_tesseractAsyncIndex)); } else if (_ocrMethodIndex == _ocrMethodImageCompare) { if (_compareBitmaps == null) LoadImageCompareBitmaps(); _numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value; } else if (_ocrMethodIndex == _ocrMethodNocr) { if (_nOcrDb == null) LoadNOcrWithCurrentLanguage(); if (_nOcrDb == null) { MessageBox.Show("Fatal - No NOCR dictionary loaded!"); SetButtonsEnabledAfterOcrDone(); return; } _nocrThreadsStop = false; _nocrThreadResults = new string[_subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 1; if (noOfThreads >= max) noOfThreads = max - 1; int start = (int)numericUpDownStartNumber.Value + 5; if (noOfThreads >= 1 && max > 5) { // finder letter size (uppercase/lowercase) int testIndex = 0; while (testIndex < 6 && (_nocrLastLowercaseHeight == -1 || _nocrLastUppercaseHeight == -1)) { NOCRIntialize(GetSubtitleBitmap(testIndex)); testIndex++; } for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { var bw = new BackgroundWorker(); var p = new NOcrThreadParameter(GetSubtitleBitmap(start + i), start + i, _nOcrDb.OcrCharacters, bw, noOfThreads, _unItalicFactor, checkBoxNOcrItalic.Checked, (int)numericUpDownNumberOfPixelsIsSpaceNOCR.Value, checkBoxRightToLeft.Checked); p.NOcrLastLowercaseHeight = _nocrLastLowercaseHeight; p.NOcrLastUppercaseHeight = _nocrLastUppercaseHeight; bw.DoWork += NOcrThreadDoWork; bw.RunWorkerCompleted += NOcrThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } } else if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) { if (_binaryOcrDb == null) { _binaryOcrDbFileName = Configuration.OcrFolder + "Latin.db"; _binaryOcrDb = new BinaryOcrDb(_binaryOcrDbFileName, true); } _nOcrDb = new NOcrDb(_binaryOcrDb.FileName.Replace(".db", ".nocr")); checkBoxNOcrCorrect.Checked = true; _numericUpDownMaxErrorPct = (double)numericUpDownMaxErrorPct.Value; } progressBar1.Maximum = max; progressBar1.Value = 0; progressBar1.Visible = true; _mainOcrTimerMax = max; _mainOcrIndex = (int)numericUpDownStartNumber.Value - 1; _mainOcrTimer = new Timer(); _mainOcrTimer.Tick += mainOcrTimer_Tick; _mainOcrTimer.Interval = 5; _mainOcrRunning = true; subtitleListView1.MultiSelect = false; mainOcrTimer_Tick(null, null); if (_ocrMethodIndex == _ocrMethodImageCompare) { _icThreadsStop = false; _icThreadResults = new string[_subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 2; // -1 or -2? if (noOfThreads >= max) noOfThreads = max - 1; int start = (int)numericUpDownStartNumber.Value + 5; if (noOfThreads > 2) noOfThreads = 2; // Threading is not really good - subtitle picture creation should probably be threaded also/instead for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { Application.DoEvents(); var bw = new BackgroundWorker(); var p = new ImageCompareThreadParameter(GetSubtitleBitmap(start + i), start + i, _compareBitmaps, bw, noOfThreads, (int)numericUpDownPixelsIsSpace.Value, checkBoxRightToLeft.Checked, (float)numericUpDownMaxErrorPct.Value); bw.DoWork += ImageCompareThreadDoWork; bw.RunWorkerCompleted += ImageCompareThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } }
private void buttonLineOcrNewLanguage_Click(object sender, EventArgs e) { using (var newFolder = new VobSubOcrNewFolder(_ocrMethodIndex == _ocrMethodImageCompare)) { if (newFolder.ShowDialog(this) == DialogResult.OK) { string s = newFolder.FolderName; if (string.IsNullOrEmpty(s)) return; s = s.Replace("?", string.Empty).Replace("/", string.Empty).Replace("*", string.Empty).Replace("\\", string.Empty); if (string.IsNullOrEmpty(s)) return; if (File.Exists(Configuration.DictionariesFolder + "nOCR_" + newFolder.FolderName + ".xml")) { MessageBox.Show("Line OCR language file already exists!"); return; } _nOcrDb = null; comboBoxNOcrLanguage.Items.Add(s); comboBoxNOcrLanguage.SelectedIndex = comboBoxNOcrLanguage.Items.Count - 1; } } }
private static NOcrChar NOcrFindBestMatchNew(ImageSplitterItem targetItem, int topMargin, out bool italic, NOcrDb nOcrDb, bool tryItalicScaling, bool deepSeek) { italic = false; if (nOcrDb == null) { return null; } var nbmp = targetItem.NikseBitmap; int index; foreach (NOcrChar oc in nOcrDb.OcrCharacters) { if (Math.Abs(oc.Width - nbmp.Width) < 3 && Math.Abs(oc.Height - nbmp.Height) < 3 && Math.Abs(oc.MarginTop - topMargin) < 3) { // only very accurate matches bool ok = true; index = 0; while (index < oc.LinesForeground.Count && ok) { NOcrPoint op = oc.LinesForeground[index]; foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { Point p = new Point(point.X - 1, point.Y); if (p.X < 0) p.X = 1; c = nbmp.GetPixel(p.X, p.Y); if (nbmp.Width > 20 && c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { ok = false; break; } } } } index++; } index = 0; while (index < oc.LinesBackground.Count && ok) { NOcrPoint op = oc.LinesBackground[index]; foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { Point p = new Point(point.X, point.Y); if (oc.Width > 19 && point.X > 0) p.X = p.X - 1; c = nbmp.GetPixel(p.X, p.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { ok = false; break; } } } } index++; } if (ok) return oc; } } foreach (NOcrChar oc in nOcrDb.OcrCharacters) { int marginTopDiff = Math.Abs(oc.MarginTop - topMargin); if (Math.Abs(oc.Width - nbmp.Width) < 4 && Math.Abs(oc.Height - nbmp.Height) < 4 && marginTopDiff > 4 && marginTopDiff < 9) { // only very accurate matches - but not for margin top bool ok = true; index = 0; while (index < oc.LinesForeground.Count && ok) { NOcrPoint op = oc.LinesForeground[index]; foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { ok = false; break; } } } index++; } index = 0; while (index < oc.LinesBackground.Count && ok) { NOcrPoint op = oc.LinesBackground[index]; foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { ok = false; break; } } } index++; } if (ok) return oc; } } // try some resize if aspect ratio is about the same double widthPercent = nbmp.Height * 100.0 / nbmp.Width; foreach (NOcrChar oc in nOcrDb.OcrCharacters) { if (!oc.IsSensitive) { if (Math.Abs(oc.WidthPercent - widthPercent) < 15 && oc.Width > 12 && oc.Height > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < nbmp.Height / 4) { bool ok = true; index = 0; while (index < oc.LinesForeground.Count && ok) { NOcrPoint op = oc.LinesForeground[index]; foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { ok = false; break; } } } index++; } index = 0; while (index < oc.LinesBackground.Count && ok) { NOcrPoint op = oc.LinesBackground[index]; foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { ok = false; break; } } } index++; } if (ok) return oc; } } } if (deepSeek) // if we do now draw then just try anything... { widthPercent = nbmp.Height * 100.0 / nbmp.Width; foreach (NOcrChar oc in nOcrDb.OcrCharacters) { if (!oc.IsSensitive) { if (Math.Abs(oc.WidthPercent - widthPercent) < 40 && nbmp.Height > 11) // && oc.Height > 12 && oc.Width > 16 && nbmp.Width > 16 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15) { bool ok = true; foreach (NOcrPoint op in oc.LinesForeground) { foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { ok = false; break; } } } } foreach (NOcrPoint op in oc.LinesBackground) { foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { ok = false; break; } } } } if (ok) return oc; } } } foreach (NOcrChar oc in nOcrDb.OcrCharacters) { if (Math.Abs(oc.WidthPercent - widthPercent) < 40 && oc.Height > 12 && oc.Width > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15) { bool ok = true; foreach (NOcrPoint op in oc.LinesForeground) { foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width - 3, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { ok = false; break; } } } } foreach (NOcrPoint op in oc.LinesBackground) { foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width - 3, nbmp.Height)) { if (point.X >= 0 && point.Y >= 0 && point.X < nbmp.Width && point.Y < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { ok = false; break; } } } } if (ok) return oc; } } foreach (NOcrChar oc in nOcrDb.OcrCharacters) { if (Math.Abs(oc.WidthPercent - widthPercent) < 40 && oc.Height > 12 && oc.Width > 19 && nbmp.Width > 19 && nbmp.Height > 12 && Math.Abs(oc.MarginTop - topMargin) < 15) { bool ok = true; foreach (NOcrPoint op in oc.LinesForeground) { foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - 4)) { if (point.X >= 0 && point.Y + 4 >= 0 && point.X < nbmp.Width && point.Y + 4 < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y + 4); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { } else { ok = false; break; } } } } foreach (NOcrPoint op in oc.LinesBackground) { foreach (Point point in op.ScaledGetPoints(oc, nbmp.Width, nbmp.Height - 4)) { if (point.X >= 0 && point.Y + 4 >= 0 && point.X < nbmp.Width && point.Y + 4 < nbmp.Height) { Color c = nbmp.GetPixel(point.X, point.Y + 4); if (c.A > 150 && c.R + c.G + c.B > NocrMinColor) { ok = false; break; } } } } if (ok) return oc; } } } if (tryItalicScaling) { // int left = targetItem.X; // int width = targetItem.Bitmap.Width; // //if (left > 3) // //{ // // left -= 3; // // width += 3; // //} // var temp = ImageSplitter.Copy(parentBitmap, new Rectangle(left, targetItem.Y, width , targetItem.Bitmap.Height)); // var bitmap2 = UnItalic(temp, unItalicFactor); // //var nbmpUnItalic = new NikseBitmap(unItalicedBmp); // //nbmpUnItalic.ReplaceNonWhiteWithTransparent(); // //Bitmap bitmap2 = nbmpUnItalic.GetBitmap(); //// bitmap2.Save(@"D:\Download\__" + Guid.NewGuid().ToString() + ".bmp"); // var list = ImageSplitter.SplitBitmapToLetters(bitmap2, 10, false, false); // var matches = new List<NOcrChar>(); // bool unitalicOk = true; // foreach (var spi in list) // { // var m = NOcrFindBestMatch(spi, topMargin, out italic, nOcrChars, unItalicFactor, false, true); // if (m == null) // { // if (spi.Bitmap.Width > 2) // { // unitalicOk = false; // break; // } // } // else // { // matches.Add(m); // } // } // if (unitalicOk && matches.Count > 0) // { // italic = true; // if (matches.Count == 1) // { // return matches[0]; // } // else if (matches.Count > 1) // { // NOcrChar c = new NOcrChar(matches[0]); // c.LinesBackground.Clear(); // c.LinesForeground.Clear(); // c.Text = string.Empty; // foreach (var m in matches) // c.Text += m.Text; // return c; // } // } } return null; }
internal CompareMatch GetNOcrCompareMatchNew(ImageSplitterItem targetItem, NikseBitmap parentBitmap, NOcrDb nOcrDb, bool tryItalicScaling, bool deepSeek) { var expandedResult = NOcrFindExpandedMatch(parentBitmap, targetItem, nOcrDb.OcrCharactersExpanded); if (expandedResult != null) { return new CompareMatch(expandedResult.Text, expandedResult.Italic, expandedResult.ExpandCount, null, expandedResult); } bool italic; var result = NOcrFindBestMatchNew(targetItem, targetItem.Y - targetItem.ParentY, out italic, nOcrDb, tryItalicScaling, deepSeek); if (result == null) { if (checkBoxNOcrCorrect.Checked) return null; return new CompareMatch("*", false, 0, null); } // Fix uppercase/lowercase issues (not I/l) if (result.Text == "e") _nocrLastLowercaseHeight = targetItem.NikseBitmap.Height; else if (_nocrLastLowercaseHeight == -1 && result.Text == "a") _nocrLastLowercaseHeight = targetItem.NikseBitmap.Height; if (result.Text == "E" || result.Text == "H" || result.Text == "R" || result.Text == "D" || result.Text == "T") _nocrLastUppercaseHeight = targetItem.NikseBitmap.Height; else if (_nocrLastUppercaseHeight == -1 && result.Text == "M") _nocrLastUppercaseHeight = targetItem.NikseBitmap.Height; if (result.Text == "V" || result.Text == "W" || result.Text == "U" || result.Text == "S" || result.Text == "Z" || result.Text == "O" || result.Text == "X" || result.Text == "Ø" || result.Text == "C") { if (_nocrLastLowercaseHeight > 3 && targetItem.NikseBitmap.Height - _nocrLastLowercaseHeight < 2) result.Text = result.Text.ToLower(); } else if (result.Text == "v" || result.Text == "w" || result.Text == "u" || result.Text == "s" || result.Text == "z" || result.Text == "o" || result.Text == "x" || result.Text == "ø" || result.Text == "c") { if (_nocrLastUppercaseHeight > 3 && _nocrLastUppercaseHeight - targetItem.NikseBitmap.Height < 2) result.Text = result.Text.ToUpper(); } if (italic) return new CompareMatch(result.Text, true, 0, null, result); else return new CompareMatch(result.Text, result.Italic, 0, null, result); }
private void buttonImport_Click(object sender, EventArgs e) { int importedCount = 0; int notImportedCount = 0; openFileDialog1.Filter = "nOCR files|*.nocr"; openFileDialog1.InitialDirectory = Configuration.DataDirectory; openFileDialog1.FileName = string.Empty; openFileDialog1.Title = "Import existing nOCR database into current"; if (openFileDialog1.ShowDialog(this) == DialogResult.OK) { NOcrDb newDb = new NOcrDb(openFileDialog1.FileName); foreach (NOcrChar newChar in newDb.OcrCharacters) { bool found = false; foreach (NOcrChar oldChar in _nocrChars) { if (oldChar.Text == newChar.Text && oldChar.Width == newChar.Width && oldChar.Height == newChar.Height && oldChar.MarginTop == newChar.MarginTop && oldChar.ExpandCount == newChar.ExpandCount && oldChar.LinesForeground.Count == newChar.LinesForeground.Count && oldChar.LinesBackground.Count == newChar.LinesBackground.Count) { found = true; for (int i = 0; i < oldChar.LinesForeground.Count; i++) { if (oldChar.LinesForeground[i].Start.X != newChar.LinesForeground[i].Start.X || oldChar.LinesForeground[i].Start.Y != newChar.LinesForeground[i].Start.Y || oldChar.LinesForeground[i].End.X != newChar.LinesForeground[i].End.X || oldChar.LinesForeground[i].End.Y != newChar.LinesForeground[i].End.Y) { found = false; } } for (int i = 0; i < oldChar.LinesBackground.Count; i++) { if (oldChar.LinesBackground[i].Start.X != newChar.LinesBackground[i].Start.X || oldChar.LinesBackground[i].Start.Y != newChar.LinesBackground[i].Start.Y || oldChar.LinesBackground[i].End.X != newChar.LinesBackground[i].End.X || oldChar.LinesBackground[i].End.Y != newChar.LinesBackground[i].End.Y) { found = false; } } } } if (!found) { _nocrChars.Add(newChar); importedCount++; } else { notImportedCount++; } } MessageBox.Show(string.Format("Number of characters imported: {0}\r\nNumber of characters not imported (already present): {1}", importedCount, notImportedCount)); } }
internal void Initialize(NOcrDb _nOcrDb) { if (_nOcrDb != null) { } }
private void TrainLetter(ref int numberOfCharactersLeaned, ref int numberOfCharactersSkipped, NOcrDb nOcrD, List<string> charactersLearned, string s, bool bold) { Bitmap bmp = GenerateImageFromTextWithStyle(s, bold); var nbmp = new NikseBitmap(bmp); nbmp.MakeTwoColor(280); var list = NikseBitmapImageSplitter.SplitBitmapToLettersNew(nbmp, 10, false, false, 25); if (list.Count == 1) { NOcrChar match = nOcrD.GetMatch(list[0].NikseBitmap); if (match == null) { pictureBox1.Image = list[0].NikseBitmap.GetBitmap(); this.Refresh(); Application.DoEvents(); System.Threading.Thread.Sleep(100); NOcrChar nOcrChar = new NOcrChar(s); nOcrChar.Width = list[0].NikseBitmap.Width; nOcrChar.Height = list[0].NikseBitmap.Height; VobSubOcrNOcrCharacter.GenerateLineSegments((int)numericUpDownSegmentsPerCharacter.Value, checkBoxVeryAccurate.Checked, nOcrChar, list[0].NikseBitmap); nOcrD.Add(nOcrChar); charactersLearned.Add(s); numberOfCharactersLeaned++; labelInfo.Text = string.Format("Now training font '{1}', total characters leaned is {0}, {2} skipped", numberOfCharactersLeaned, _subtitleFontName, numberOfCharactersSkipped); bmp.Dispose(); } else { numberOfCharactersSkipped++; } } }
/// <summary> /// The button start ocr click. /// </summary> /// <param name="sender"> /// The sender. /// </param> /// <param name="e"> /// The e. /// </param> private void ButtonStartOcrClick(object sender, EventArgs e) { Configuration.Settings.VobSubOcr.RightToLeft = this.checkBoxRightToLeft.Checked; this._lastLine = null; this.buttonOK.Enabled = false; this.buttonCancel.Enabled = false; this.buttonStartOcr.Enabled = false; this.buttonStop.Enabled = true; this.buttonNewCharacterDatabase.Enabled = false; this.buttonEditCharacterDatabase.Enabled = false; this._abort = false; int max = this.GetSubtitleCount(); if (this.comboBoxOcrMethod.SelectedIndex == 0 && this._tesseractAsyncStrings == null) { this._nOcrDb = null; this._tesseractAsyncStrings = new string[max]; this._tesseractAsyncIndex = (int)this.numericUpDownStartNumber.Value + 5; this._tesseractThread = new BackgroundWorker(); this._tesseractThread.DoWork += this.TesseractThreadDoWork; this._tesseractThread.RunWorkerCompleted += this.TesseractThreadRunWorkerCompleted; this._tesseractThread.WorkerSupportsCancellation = true; if (this._tesseractAsyncIndex >= 0 && this._tesseractAsyncIndex < max) { this._tesseractThread.RunWorkerAsync(this.GetSubtitleBitmap(this._tesseractAsyncIndex)); } } else if (this.comboBoxOcrMethod.SelectedIndex == 1) { if (this._compareBitmaps == null) { this.LoadImageCompareBitmaps(); } } else if (this.comboBoxOcrMethod.SelectedIndex == 3) { if (this._nOcrDb == null) { this.LoadNOcrWithCurrentLanguage(); } if (this._nOcrDb == null) { MessageBox.Show("Fatal - No NOCR dictionary loaded!"); this.SetButtonsEnabledAfterOcrDone(); return; } this._nocrThreadsStop = false; this._nocrThreadResults = new string[this._subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 1; if (noOfThreads >= max) { noOfThreads = max - 1; } int start = (int)this.numericUpDownStartNumber.Value + 5; if (noOfThreads >= 1 && max > 5) { // finder letter size (uppercase/lowercase) int testIndex = 0; while (testIndex < 6 && (this._nocrLastLowercaseHeight == -1 || this._nocrLastUppercaseHeight == -1)) { this.NOCRIntialize(this.GetSubtitleBitmap(testIndex)); testIndex++; } for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { var bw = new BackgroundWorker(); var p = new NOcrThreadParameter(this.GetSubtitleBitmap(start + i), start + i, this._nOcrDb.OcrCharacters, bw, noOfThreads, this._unItalicFactor, this.checkBoxNOcrItalic.Checked, (int)this.numericUpDownNumberOfPixelsIsSpaceNOCR.Value, this.checkBoxRightToLeft.Checked); p.NOcrLastLowercaseHeight = this._nocrLastLowercaseHeight; p.NOcrLastUppercaseHeight = this._nocrLastUppercaseHeight; bw.DoWork += NOcrThreadDoWork; bw.RunWorkerCompleted += this.NOcrThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } } else if (this.comboBoxOcrMethod.SelectedIndex == 4) { if (this._binaryOcrDb == null) { this._binaryOcrDbFileName = Configuration.OcrFolder + "Latin.db"; this._binaryOcrDb = new BinaryOcrDb(this._binaryOcrDbFileName, true); } this._nOcrDb = new NOcrDb(this._binaryOcrDb.FileName.Replace(".db", ".nocr")); } this.progressBar1.Maximum = max; this.progressBar1.Value = 0; this.progressBar1.Visible = true; this._mainOcrTimerMax = max; this._mainOcrIndex = (int)this.numericUpDownStartNumber.Value - 1; this._mainOcrTimer = new Timer(); this._mainOcrTimer.Tick += this.mainOcrTimer_Tick; this._mainOcrTimer.Interval = 5; this._mainOcrRunning = true; this.subtitleListView1.MultiSelect = false; this.mainOcrTimer_Tick(null, null); if (this.comboBoxOcrMethod.SelectedIndex == 1) { this._icThreadsStop = false; this._icThreadResults = new string[this._subtitle.Paragraphs.Count]; int noOfThreads = Environment.ProcessorCount - 2; // -1 or -2? if (noOfThreads >= max) { noOfThreads = max - 1; } int start = (int)this.numericUpDownStartNumber.Value + 5; if (noOfThreads > 2) { noOfThreads = 2; // Threading is not really good - subtitle picture creation should probably be threaded also/instead } for (int i = 0; i < noOfThreads; i++) { if (start + i < max) { Application.DoEvents(); var bw = new BackgroundWorker(); var p = new ImageCompareThreadParameter(this.GetSubtitleBitmap(start + i), start + i, this._compareBitmaps, bw, noOfThreads, (int)this.numericUpDownPixelsIsSpace.Value, this.checkBoxRightToLeft.Checked, (float)this.numericUpDownMaxErrorPct.Value); bw.DoWork += ImageCompareThreadDoWork; bw.RunWorkerCompleted += this.ImageCompareThreadRunWorkerCompleted; bw.RunWorkerAsync(p); } } } }