/// <summary> /// Methode try to get right X position of Column /// if there is text in line after column, methode GetDataFromLine is called on dictionary of general info /// if found left X position of found key is returned /// else look for text for client and width of paper is returned /// </summary> /// <param name="a">Object of TextLine</param> /// <param name="line">Text in current line</param> /// <param name="stringKey">Found key</param> /// <param name="n">Object of Client</param> /// <returns></returns> private void TryGetRightXOfColumn(Column col, TextLine a, ref string line, string stringKey, Client n) { if (!SetRightXByExistingColumn(col)) { line = line.Substring(line.IndexOf(stringKey) + stringKey.Length); if (GetDataFromLine(a, ref line, _dic.header, _eud, false, null, true)) { int len = _pair.Key.IndexOf(" "); if (len == -1) { len = _pair.Key.Length; } string word = col.Text.Substring(0, len); if (!string.IsNullOrEmpty(word)) { foreach (Word w in a.Words) { int sim = SimilarityService.GetSimilarity(word.ToLower(), w.Text.ToLower()); if (sim > CONSTANTS.SIMILARITY) { if (w.Bounds.Right > _p.Img.Width / 2) { col.Right = _p.Img.Width; } else { col.Right = _p.Img.Width / 2; } break; } } } } else { GetDataFromLine(a, ref line, _dic.clients, n, false, null, true); // check if current text belongs to client if (col.Left < _p.Img.Width / 2) { col.Right = _p.Img.Width / 2; } else { col.Right = _p.Img.Width; } } } if (col.Right == 0) { col.Right = _p.Img.Width; } }
public bool CheckImageForPatternAndGetDataFromIt(Mat image, List <PossitionOfWord> pos, IProgress <int> progress, out PreviewObject prew, double ratioX, double ratioY, bool checkPattern = false) { PreviewObject p = new PreviewObject(); if (pos.Count > 0) { int step = 1; bool isPattern = true; p.ListOfKeyColumn = new List <Column>(); try { int conf = 0; p.ListOfKeyPossitions = new List <PossitionOfWord>(); p.Lines = new List <TextLine>(); if (!checkPattern) { conf = RunTesseract(image); progress.Report(20); foreach (PossitionOfWord w in pos) { if (w.KeyBounds.Equals(w.ValueBounds)) { p.ListOfKeyColumn.Add(new Column(w.Value, w.ValueBounds.Left, w.ValueBounds.Right, w.ValueBounds.Bottom, w.ValueBounds.Top)); } var line = GetLineForValueBounds(w); string text = Common.GetWordsForColumn(new Column("", w.ValueBounds.Left, w.ValueBounds.Right, 0, 0), line); w.Value = text.Trim(CONSTANTS.charsToTrimLineForpossition); var key = Common.RemoveDiacritism(w.Key); SaveDataToPreviewObject(key, w, p); progress.Report(step); p.ListOfKeyPossitions.Add(w); } p.Lines.AddRange(_textLines); } else { foreach (PossitionOfWord w in pos) { OpenCvSharp.Rect rec; // if checking pattern, check only the keys rec = new Rect(w.KeyBounds.X, w.KeyBounds.Y, w.KeyBounds.Width, w.KeyBounds.Height); rec.X -= CONSTANTS.PATTERN_CHECK_XY_PROXIMITY; rec.Y -= CONSTANTS.PATTERN_CHECK_XY_PROXIMITY; rec.Width += CONSTANTS.PATTERN_CHECK_WIDTHHEIGHT_PROXIMITY; //if checking for patterns, boundung box should be as small as it's possible rec.Height += CONSTANTS.PATTERN_CHECK_WIDTHHEIGHT_PROXIMITY; // set positions according to document size, bacuase i can get same document but with different size if (ratioX != 0 && ratioY != 0) { rec.X = (int)(rec.X * ratioX); rec.Y = (int)(rec.Y * ratioY); rec.Width = (int)(rec.Width * ratioX); rec.Height = (int)(rec.Height * ratioY); } // hceck max/min positions if (image.Cols < rec.X + rec.Width) { rec.Width -= (rec.X + rec.Width) - image.Cols; } if (image.Rows < rec.Y + rec.Height) { rec.Height = image.Rows; } Mat im = new Mat(image, rec); conf = RunTesseract(im); bool pat = false; foreach (TextLine line in _textLines) { var s = Common.RemoveDiacritism(line.Text.Trim(CONSTANTS.charsToTrimLineForpossition)); var k = Common.RemoveDiacritism(w.Key); if (SimilarityService.GetSimilarity(s, k) > 90 || s.Contains(k) || k.Contains(s)) { pat = true; } } if (!pat) { isPattern = false; break; } w.Confidence = GetConfForLine(_textLines[0]); p.ListOfKeyPossitions.Add(w); p.Lines.AddRange(_textLines); _textLines.Clear(); } } p.Confidence = string.Format("{0:N2}%", (conf) / 100); p.Img = OpenCvSharp.Extensions.BitmapConverter.ToBitmap(image); p.Lang = _lang; } catch (Exception e) { throw new Exception(e.Message, e.InnerException); } prew = p; return(isPattern); } prew = p; return(false); }
/// <summary> /// Methode gets a text from line and try to get relevant data based on dictionary /// Methode is called recursively, if key is found methode is called for the rest of the text if there are any keys /// if yes methode is called again /// if not then text is value for key in previous call and is saved to object /// </summary> /// <param name="line">Object Of TextLine</param> /// <param name="dic">Dictionary where is methode looking for keys</param> /// <param name="type">Type of object where the found data will be stored</param> /// <param name="data">Object for data</param> /// <returns></returns> private bool GetDataFromLine(TextLine line, ref string lineText, Dictionary <string, string> dictionary, Object data, bool isColumn = false, Column col = null, bool lookingForRight = false) { int firstCharindex; int keyLength; bool keyFound = false; int similarity = 0; bool IndexIsNull = false; string stringKey = string.Empty; CONSTANTS.Result res = CONSTANTS.Result.Continue; foreach (KeyValuePair <string, string> key in dictionary) { firstCharindex = lineText.ToLower().IndexOf(key.Key.Substring(0, 1).ToLower()); // index of the first occurrence of the first character of the key keyLength = key.Key.Length; // length of the key while ((keyLength + firstCharindex) <= lineText.Length && firstCharindex != -1) // if the key is longet than found text it si not the text i'm looking for, go to next text { stringKey = lineText.Substring(firstCharindex, keyLength); // this is the text i got from OC similarity = SimilarityService.GetSimilarity(key.Key.ToLower(), stringKey.ToLower()); if (similarity > CONSTANTS.SIMILARITY && !IsInMiddleOfWord(lineText, firstCharindex, keyLength, key.Key[0], key.Key[key.Key.Length - 1], stringKey)) { if (col != null && data.GetType() == _eud.GetType()) { // current column ended col.Completed = true; col.Bottom = line.Words[0].Bounds.Top; } res = PrepareToSave(line, key, stringKey, ref lineText, firstCharindex, dictionary, data, ref keyFound, lookingForRight, ref isColumn, col); if (res == CONSTANTS.Result.Continue) { break; } else if (res == CONSTANTS.Result.True) { return(true); } else if (res == CONSTANTS.Result.False) { return(false); } else if (res == CONSTANTS.Result.Break) { break; } } else { string s = lineText.Substring(lineText.IndexOf(stringKey) + 1).ToLower(); int index = s.IndexOf(key.Key.Substring(0, 1).ToLower()); if (index == 0 && IndexIsNull || index == -1) { // nothing has been found, move to the next key break; } if (index == 0) { IndexIsNull = true; } firstCharindex += index + 1; } } if (res == CONSTANTS.Result.Break && lineText.Length < 10) { break; } else if (res == CONSTANTS.Result.Continue) { continue; } else { continue; } } if (isColumn && !keyFound && col != null && col.FirstLineInColumn > 4) { // try different dictionary if (data.GetType() == _eud.GetType()) { foreach (Column c in _listOfColumns) { GetDataFromLine(line, ref lineText, _dic.clients, _listOfClients[c.Id - 1], false, col, false); } } else { GetDataFromLine(line, ref lineText, _dic.header, _eud, false, col, false); } } if (isColumn && col != null && !keyFound) { GetDataFromLine(line, ref lineText, _dic.header, _eud, false, col, false); if (_keysInRow == 0) { Client client = (Client)data; string s = lineText.Trim(CONSTANTS.charsToTrim); if (!string.IsNullOrEmpty(s) && s.Length >= 5) { switch (col.FirstLineInColumn) { case 1: client.Name = lineText; col.FirstLineInColumn++; SavePossitionToLists("Name", string.Empty, client.Name, line, line, col.Text + " Meno"); break; case 2: client.Street = lineText; col.FirstLineInColumn++; SavePossitionToLists("Street", string.Empty, client.Street, line, line, col.Text + " Ulica"); break; case 3: client.PSCCity = lineText; col.FirstLineInColumn++; SavePossitionToLists("PSCCity", string.Empty, client.PSCCity, line, line, col.Text + " Psč"); break; case 4: client.State = lineText; col.FirstLineInColumn++; SavePossitionToLists("State", string.Empty, client.State, line, line, col.Text + " Štát"); break; } } } } if (_keysInRow >= 2) { return(false); } if (lookingForRight && _pair.Key != null) { return(true); } return(keyFound); }