private void ContinueDisplayingResults() { List <string> KnownCommodityNames; Dictionary <String, String> EconomyLevels; try { KnownCommodityNames = Program.Data.getCommodityNames(); EconomyLevels = Program.Data.getEconomyLevels(); do { _correctionRow++; if (_correctionRow > _commodityTexts.GetLength(0) - 1) { _correctionRow = 0; _correctionColumn++; } if (_commodityTexts.GetLength(0) == 0) { return; } if (_correctionColumn < _commodityTexts.GetLength(1)) { //Debug.WriteLine(correctionRow + " - " + correctionColumn); pbOcrCurrent.Image = _originalBitmaps[_correctionRow, _correctionColumn]; } if (_correctionColumn == 0) // hacks for commodity name { var currentTextCamelCase = _parent._textInfo.ToTitleCase(_commodityTexts[_correctionRow, _correctionColumn].ToLower()); // There *was* a reason why I did this... // if the ocr have found no char so we dont need to ask Mr. Levenshtein if (currentTextCamelCase.Trim().Length > 0) { if (KnownCommodityNames.Contains( currentTextCamelCase)) { _originalBitmapConfidences[_correctionRow, _correctionColumn] = 1; } else { var replacedCamelCase = StripPunctuationFromScannedText(currentTextCamelCase); // ignore spaces when using levenshtein to find commodity names var lowestLevenshteinNumber = 10000; var nextLowestLevenshteinNumber = 10000; var lowestMatchingCommodity = ""; var lowestMatchingCommodityRef = ""; double LevenshteinLimit = 0; foreach (var reference in KnownCommodityNames) { var upperRef = StripPunctuationFromScannedText(reference); var levenshteinNumber = _parent._levenshtein.LD2(upperRef, replacedCamelCase); //if(levenshteinNumber != _levenshtein.LD(upperRef, replacedCamelCase)) // Debug.WriteLine("Doh!"); if (upperRef != lowestMatchingCommodityRef) { if (levenshteinNumber < lowestLevenshteinNumber) { nextLowestLevenshteinNumber = lowestLevenshteinNumber; lowestLevenshteinNumber = levenshteinNumber; lowestMatchingCommodityRef = upperRef; lowestMatchingCommodity = reference.ToUpper(); } else if (levenshteinNumber < nextLowestLevenshteinNumber) { nextLowestLevenshteinNumber = levenshteinNumber; } } } // it's better if this depends on the length of the word - this factor works pretty good LevenshteinLimit = Math.Round((currentTextCamelCase.Length * 0.7), 0); if (lowestLevenshteinNumber <= LevenshteinLimit) { _originalBitmapConfidences[_correctionRow, _correctionColumn] = .9f; _commodityTexts[_correctionRow, _correctionColumn] = lowestMatchingCommodity; } if (lowestLevenshteinNumber <= LevenshteinLimit && lowestLevenshteinNumber + 3 < nextLowestLevenshteinNumber) // INDIUM versus INDITE... could factor length in here { _originalBitmapConfidences[_correctionRow, _correctionColumn] = 1; } } if (_commodityTexts[_correctionRow, _correctionColumn].Equals("Getreide", StringComparison.InvariantCultureIgnoreCase)) { Debug.Print("STOP"); } if (_commoditiesSoFar.Contains(_commodityTexts[_correctionRow, _correctionColumn].ToUpper())) { _commodityTexts[_correctionRow, _correctionColumn] = ""; _originalBitmapConfidences[_correctionRow, _correctionColumn] = 1; } // If we're doing a batch of screenshots, don't keep doing the same commodity when we keep finding it // but only if it's sure - otherwise it will be registered later if (_originalBitmapConfidences[_correctionRow, _correctionColumn] == 1) { _commoditiesSoFar.Add(_commodityTexts[_correctionRow, _correctionColumn].ToUpper()); } } else { // that was nothing _originalBitmapConfidences[_correctionRow, _correctionColumn] = 1; _commodityTexts[_correctionRow, _correctionColumn] = ""; } } else if (_correctionColumn == 5 || _correctionColumn == 7) // hacks for LOW/MED/HIGH { var commodityLevelUpperCase = StripPunctuationFromScannedText(_commodityTexts[_correctionRow, _correctionColumn]); var levenshteinLow = _parent._levenshtein.LD2(EconomyLevels["LOW"].ToUpper(), commodityLevelUpperCase); var levenshteinMed = _parent._levenshtein.LD2(EconomyLevels["MED"].ToUpper(), commodityLevelUpperCase); var levenshteinHigh = _parent._levenshtein.LD2(EconomyLevels["HIGH"].ToUpper(), commodityLevelUpperCase); var levenshteinBlank = _parent._levenshtein.LD2("", commodityLevelUpperCase); //Pick the lowest levenshtein number var lowestLevenshtein = Math.Min(Math.Min(levenshteinLow, levenshteinMed), Math.Min(levenshteinHigh, levenshteinBlank)); if (lowestLevenshtein == levenshteinLow) { _commodityTexts[_correctionRow, _correctionColumn] = EconomyLevels["LOW"]; } else if (lowestLevenshtein == levenshteinMed) { _commodityTexts[_correctionRow, _correctionColumn] = EconomyLevels["MED"]; } else if (lowestLevenshtein == levenshteinHigh) { _commodityTexts[_correctionRow, _correctionColumn] = EconomyLevels["HIGH"]; } else // lowestLevenshtein == levenshteinBlank { _commodityTexts[_correctionRow, _correctionColumn] = ""; } // we will never be challenged on low/med/high again. this doesn't get internationalized on foreign-language installs... does it? :) _originalBitmapConfidences[_correctionRow, _correctionColumn] = 1; } } // Don't pause for cells which have a high confidence, or have no commodity name // ReSharper disable once CompareOfFloatsByEqualityOperator while (_correctionColumn < _commodityTexts.GetLength(1) && (_originalBitmapConfidences[_correctionRow, _correctionColumn] > .9f || _originalBitmapConfidences[_correctionRow, _correctionColumn] == 0 || _commodityTexts[_correctionRow, 0] == "")); if (_correctionColumn < _commodityTexts.GetLength(1)) { // doing again some stateful enabling tbCommoditiesOcrOutput.Text = _commodityTexts[_correctionRow, _correctionColumn]; tbConfidence.Text = _originalBitmapConfidences[_correctionRow, _correctionColumn].ToString(CultureInfo.InvariantCulture); bContinueOcr.Enabled = true; bIgnoreTrash.Enabled = true; } else { bContinueOcr.Enabled = false; bIgnoreTrash.Enabled = false; string finalOutput = _csvOutputSoFar; for (int row = 0; row < _commodityTexts.GetLength(0); row++) { if (_commodityTexts[row, 0] != "") // don't create CSV if there's no commodity name { finalOutput += tbOcrSystemName.Text + ";" + tbOcrStationName.Text + ";"; for (int col = 0; col < _commodityTexts.GetLength(1); col++) { _commodityTexts[row, col] = _commodityTexts[row, col].Replace("\r", "").Replace("\n", ""); if (col == 3) { continue; // don't export cargo levels } finalOutput += _commodityTexts[row, col] + ";"; } finalOutput += ocr.CurrentScreenshotDateTime.ToString("s").Substring(0, 16) + ";"; //if (cbExtendedInfoInCSV.Checked) finalOutput += Path.GetFileName(_screenshotName) + ";"; finalOutput += _rowIds[row] + "\r\n"; } } _csvOutputSoFar += finalOutput; if (pbOriginalImage.Image != null) { pbOriginalImage.Image.Dispose(); } UpdateOriginalImage(null); UpdateTrimmedImage(null, null); if (Program.DBCon.getIniValue <Boolean>(IBE.IBESettingsView.DB_GROUPNAME, "DeleteScreenshotOnImport")) { File.Delete(_screenshotName); } Acquisition(); } } catch (Exception ex) { CErr.processError(ex); } }
private void bContinueOcr_Click(object sender, EventArgs e) { Boolean isOK = false; Boolean finished = false; DialogResult Answer; string commodity; List <string> KnownCommodityNames; commodity = _parent._textInfo.ToTitleCase(tbCommoditiesOcrOutput.Text.ToLower().Trim()); KnownCommodityNames = Program.Data.getCommodityNames(); if (commodity.ToUpper() == "Implausible Results!".ToUpper()) { // check results var f = new EditOcrResults(tbFinalOcrOutput.Text); f.onlyImplausible = true; var q = f.ShowDialog(); if (q == DialogResult.OK) { tbFinalOcrOutput.Text = f.ReturnValue; } Acquisition(true); isOK = false; } else if (commodity.ToUpper() == "Imported!".ToUpper() || commodity.ToUpper() == "Finished!".ToUpper() || commodity.ToUpper() == "No rows found...".ToUpper()) { // its the end isOK = true; finished = true; } else if (commodity.Length == 0 || KnownCommodityNames.Contains(commodity)) { // ok, no typing error isOK = true; } else { // unknown commodity, is it a new one or a typing error ? Answer = MsgBox.Show(String.Format("Do you want to add '{0}' to the known commodities ?", commodity), "Unknown commodity !", MessageBoxButtons.OKCancel, MessageBoxIcon.Question, MessageBoxDefaultButton.Button1); if (Answer == System.Windows.Forms.DialogResult.OK) { // yes, it's really new //Program.Data.ImportCommodity(commodity); throw new NotImplementedException(); //_Milkyway.addLocalized2RN(_commodities.Names); isOK = true; } } if (isOK) { if (_commodityTexts == null || _correctionColumn >= _commodityTexts.GetLength(1) || finished) { if (MsgBox.Show("Import this?", "Import?", MessageBoxButtons.YesNo) == DialogResult.Yes) { ImportFinalOcrOutput(); tbFinalOcrOutput.Text = ""; bContinueOcr.Enabled = false; bIgnoreTrash.Enabled = false; _commoditiesSoFar = new List <string>(); bClearOcrOutput.Enabled = false; bEditResults.Enabled = false; } } else { _commodityTexts[_correctionRow, _correctionColumn] = commodity.ToUpper(); _commoditiesSoFar.Add(_commodityTexts[_correctionRow, _correctionColumn].ToUpper()); ContinueDisplayingResults(); } } }