private bool ExistLineBetweenUnits(LineSearchHelper lineSearchHelper, MinimumUnit unit1, MinimumUnit unit2) { MinimumUnit left, right; if (unit1.GetRealUpRight().X < unit2.GetRealUpRight().X) { left = unit1; right = unit2; } else { left = unit2; right = unit1; } int width = Math.Abs(right.GetRealUpRight().X - left.GetRealUpLeft().X); int height = Math.Abs(Math.Max(right.GetRealDownLeft().Y, left.GetRealDownLeft().Y) - Math.Min(right.GetRealUpLeft().Y, left.GetRealUpLeft().Y)); Rectangle rect = new Rectangle(left.GetRealUpLeft().X, left.GetRealUpLeft().Y, width, height); if (lineSearchHelper.ExistLineOnTheRegion(rect)) { return(true); } return(false); }
/// <summary> /// /// </summary> /// <param name="minimumUnitList"></param> /// <param name="lines"></param> /// <returns></returns> public GroupedResult Group(List <MinimumUnit> minimumUnitList, LineSegment2DF[] lines, DistanceFuncTpye Type = DistanceFuncTpye.Fixed) { if (minimumUnitList == null) { return(null); } LineSearchHelper lineSearchHelper = new LineSearchHelper(lines); var combined = new List <GroupedRegion>(); var lowConfidence = new List <GroupedRegion>(); var allRegions = new List <GroupedRegion>(); var units = new List <MinimumUnit>(); var lastXDistance = 0; // TO-DO, sort minimum units based on y and x here // TO-DO, use table line to enhance grouping foreach (var item in minimumUnitList.Select((value, index) => new { value, index })) { var current = item.value; if (units.Count != 0) { // previous minimum unit var previous = units.LastOrDefault(); MinimumUnit next = null; var xBackDistance = 0; if (previous.IsStop()) { allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else { if (!current.Equals(minimumUnitList.LastOrDefault())) { // next minimum unit next = minimumUnitList[item.index + 1]; xBackDistance = next.GetRealUpLeft().X - current.GetRealUpRight().X; } // check syntax if (current.IsPunctuations()) { current.Type = UnitType.Punctuation; bool oneLine = IsSameLine(previous, current); if (oneLine) { if (!current.IsRightBracket()) { if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { if (current.IsStop()) { // TO-DO, review current.UpRight = previous.UpRight; current.DownRight = previous.DownRight; previous.Break = true; current.Break = true; previous.Append(current); allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else if (current.IsJoin()) { previous.Append(current); previous.Type = UnitType.CharWithJoin; } else if (current.IsSplit()) { previous.Append(current); previous.Break = true; } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { previous.Append(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } continue; } // check distance var xFrontDistance = current.GetRealUpLeft().X - previous.GetRealUpRight().X; var angle = GetAngle(previous.GetRealUpRight(), current.GetRealUpLeft()); var isOneWord = CanCombine(xFrontDistance, xBackDistance, lastXDistance, angle, DistanceFunctions.GetDistanceFunc(Type).Calculate(minimumUnitList, item.index)); lastXDistance = xFrontDistance; // additional checking by table line support if (isOneWord && lines != null) { isOneWord = !ExistLineBetweenUnits(lineSearchHelper, previous, current); } if (!isOneWord) { // create a word and add it to word regions. allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; // line break so separate if (current.Break) { MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); allRegions.Clear(); } } } } units.Add(current); } allRegions.Add(CombineUnits(units)); MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); return(new GroupedResult { GroupedRegions = combined, LowConfidenceRegions = lowConfidence }); }