static void setMapping(ICollection <MappedWord> data, ICollection <OcrLine> lines) { foreach (MappedWord word in data) { word.ClosestToRight = word.Clusterline.Fields.Where(a => a.Rectangle.X > word.Rectangle.X).FirstOrDefault(); word.ClosestToLeft = word.Clusterline.Fields.Where(a => a.Rectangle.X < word.Rectangle.X).FirstOrDefault(); FieldClusterLine Downline = word.Cluster.lines.Where(a => a.ID == word.Clusterline.ID + 1).FirstOrDefault(); if (Downline != null) { word.ClosestToBottom = Downline.Fields.Where(a => ((a.Rectangle.X > word.Rectangle.X - word.Rectangle.Width / 2) && (a.Rectangle.Right > word.Rectangle.X))).FirstOrDefault(); } FieldClusterLine Upline = word.Cluster.lines.Where(a => a.ID == word.Clusterline.ID - 1).FirstOrDefault(); if (Upline != null) { word.ClosestToTop = Upline.Fields.Where(a => (a.Center.X > word.Rectangle.Left && a.Center.X < word.Rectangle.Right)).FirstOrDefault(); } } }
public static List <FieldClusterLine> CreateLines(DocumentData doc, fieldClusterModel cluster) { DocumentDataNavigation docNavigate = doc.WordsNavigator; var wordList = cluster.Fields; var wordHeight = CalcWordHeight(wordList); List <FieldClusterLine> result = new List <FieldClusterLine>(); wordList = wordList.OrderBy(a => a.Rectangle.Top).ToList(); List <MappedWord> newWordList = new List <MappedWord>(wordList); Dictionary <MappedWord, List <MappedWord> > wordLines = new Dictionary <MappedWord, List <MappedWord> >(); foreach (var word in newWordList) { var inTheSameLine = docNavigate.GetWords(new System.Windows.Rect(cluster.Area.Left, word.Rectangle.Top, cluster.Area.Width, word.Rectangle.Height)) .Where(x => x.Cluster.ID == word.Cluster.ID && x.Clusterline == null) .Where(x => x == word || x.Rectangle.Top + (x.Rectangle.Height / 2) * 1.2 < word.Rectangle.Bottom) .ToList(); if (inTheSameLine.Count == 0) { throw new Exception("A) No words in word line"); } wordLines.Add(word, inTheSameLine); } int index = 0; while (newWordList.Count > 0) { MappedWord WordTop = newWordList.FirstOrDefault(x => x.Rectangle.Height < wordHeight * 2); if (WordTop == null) { WordTop = newWordList.First(); } var baseWords = wordLines[WordTop].Where(x => x.Clusterline == null).ToList(); if (baseWords.Count == 0) { // Bad word throw new Exception("B) No words in word line"); } FieldClusterLine FieldLine = new FieldClusterLine(); FieldLine.ID = index++; FieldLine.Fields = wordLines.Where(x => baseWords.Contains(x.Key)) .SelectMany(x => x.Value) .Where(x => x.Clusterline == null) .Distinct() .OrderBy(x => x.Rectangle.Left) .ToList(); newWordList.RemoveAll(a => FieldLine.Fields.Contains(a)); //FieldLine.Fields = newWordList.Where(a => WordTop.Line.Words.Contains(a)).ToList().OrderBy(b => b.Rectangle.X).ToList(); // FieldLine.Fields = newWordList.Where(a => (a.Rectangle.Top <= (WordTop.Rectangle.Top + (WordTop.Rectangle.Height * PrecentAsLine)))).ToList().OrderBy(b=>b.Rectangle.X).ToList(); FieldLine.Fields.ForEach(a => a.Clusterline = FieldLine); result.Add(FieldLine); } return(result); }