/// <summary>
        /// Advanced OCR fixing via replace/spelling dictionaries + some hardcoded rules
        /// </summary>
        /// <param name="threeLetterIsoLanguageName">E.g. eng for English</param>
        /// <param name="hunspellName">Name of hunspell dictionary</param>
        /// <param name="parentForm">Used for centering/show spell check dialog</param>
        public OcrFixEngine(string threeLetterIsoLanguageName, string hunspellName, Form parentForm)
        {
            if (threeLetterIsoLanguageName == "per")
                threeLetterIsoLanguageName = "fas";

            _threeLetterIsoLanguageName = threeLetterIsoLanguageName;
            _parentForm = parentForm;

            _spellCheck = new OcrSpellCheck { StartPosition = FormStartPosition.Manual };
            _spellCheck.Location = new Point(parentForm.Left + (parentForm.Width / 2 - _spellCheck.Width / 2),
                                             parentForm.Top + (parentForm.Height / 2 - _spellCheck.Height / 2));

            _ocrFixReplaceList = OcrFixReplaceList.FromLanguageId(threeLetterIsoLanguageName);
            LoadSpellingDictionaries(threeLetterIsoLanguageName, hunspellName); // Hunspell etc.

            AutoGuessesUsed = new List<string>();
            UnknownWordsFound = new List<string>();
        }
        public void OcrFixReplaceListAddWord()
        {
            // Arrange
            string fileName = Path.Combine(Directory.GetCurrentDirectory(), Guid.NewGuid() + ".xml");
            var fixList = new OcrFixReplaceList(fileName);
            fixList.WordReplaceList.Clear();

            // Act
            fixList.AddWordOrPartial("from", "to");

            // Assert
            Assert.IsTrue(fixList.WordReplaceList["from"] == "to");

            // Clean up
            try
            {
                File.Delete(fileName);
            }
            catch
            {
            }
        }
        public void OcrFixReplaceListRemovePartialLineReload()
        {
            // Arrange
            string fileName = Path.Combine(Directory.GetCurrentDirectory(), Guid.NewGuid() + ".xml");
            var fixList = new OcrFixReplaceList(fileName);
            fixList.PartialLineWordBoundaryReplaceList.Clear();
            fixList.AddWordOrPartial("from me", "to you");
            fixList = new OcrFixReplaceList(fileName);
            fixList.RemoveWordOrPartial("from me");

            // Act
            fixList = new OcrFixReplaceList(fileName);

            // Assert
            Assert.IsTrue(!fixList.WordReplaceList.ContainsKey("from me"));

            // Clean up
            try
            {
                File.Delete(fileName);
            }
            catch
            {
            }
        }
Example #4
0
        private void LoadOcrFixList(bool reloadListBox)
        {
            var cb = comboBoxWordListLanguage.Items[comboBoxWordListLanguage.SelectedIndex] as ComboBoxLanguage;
            if (cb == null)
                return;

            if (reloadListBox)
                listBoxOcrFixList.Items.Clear();
            _ocrFixReplaceList = OcrFixReplaceList.FromLanguageId(cb.CultureInfo.ThreeLetterISOLanguageName);
            if (reloadListBox)
            {
                listBoxOcrFixList.BeginUpdate();
                foreach (var pair in _ocrFixReplaceList.WordReplaceList)
                {
                    listBoxOcrFixList.Items.Add(pair.Key + " --> " + pair.Value);
                }
                foreach (var pair in _ocrFixReplaceList.PartialLineWordBoundaryReplaceList)
                {
                    listBoxOcrFixList.Items.Add(pair.Key + " --> " + pair.Value);
                }
                listBoxOcrFixList.Sorted = true;
                listBoxOcrFixList.EndUpdate();
            }
        }