private Point GetCenterPoint(MinimumUnit unit) { #if DEBUG // crash it in debug build to make sure all the caller never pass null and find the caller pass null into this function System.Diagnostics.Debug.Assert(unit != null); #endif int centerX = unit.GetRealUpLeft().X + unit.GetRealUpRight().X + unit.GetRealDownRight().X + unit.GetRealDownLeft().X; int centerY = unit.GetRealUpLeft().Y + unit.GetRealUpRight().Y + unit.GetRealDownRight().Y + unit.GetRealDownLeft().Y; return(new Point(centerX / 4, centerY / 4)); }
private Tuple <int, int> GetMinMaxY(MinimumUnit unit) { #if DEBUG // crash it in debug build to make sure all the caller never pass null and find the caller pass null into this function System.Diagnostics.Debug.Assert(unit != null); #endif int lowY = Math.Min(unit.GetRealUpRight().Y, unit.GetRealUpLeft().Y); int highY = Math.Max(unit.GetRealDownRight().Y, unit.GetRealDownLeft().Y); return(new Tuple <int, int>(lowY, highY)); }
/// <summary> /// /// </summary> /// <param name="minimumUnitList"></param> /// <param name="lines"></param> /// <returns></returns> public GroupedResult Group(List <MinimumUnit> minimumUnitList, LineSegment2DF[] lines, DistanceFuncTpye Type = DistanceFuncTpye.Fixed) { if (minimumUnitList == null) { return(null); } LineSearchHelper lineSearchHelper = new LineSearchHelper(lines); var combined = new List <GroupedRegion>(); var lowConfidence = new List <GroupedRegion>(); var allRegions = new List <GroupedRegion>(); var units = new List <MinimumUnit>(); var lastXDistance = 0; // TO-DO, sort minimum units based on y and x here // TO-DO, use table line to enhance grouping foreach (var item in minimumUnitList.Select((value, index) => new { value, index })) { var current = item.value; if (units.Count != 0) { // previous minimum unit var previous = units.LastOrDefault(); MinimumUnit next = null; var xBackDistance = 0; if (previous.IsStop()) { allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else { if (!current.Equals(minimumUnitList.LastOrDefault())) { // next minimum unit next = minimumUnitList[item.index + 1]; xBackDistance = next.GetRealUpLeft().X - current.GetRealUpRight().X; } // check syntax if (current.IsPunctuations()) { current.Type = UnitType.Punctuation; bool oneLine = IsSameLine(previous, current); if (oneLine) { if (!current.IsRightBracket()) { if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { if (current.IsStop()) { // TO-DO, review current.UpRight = previous.UpRight; current.DownRight = previous.DownRight; previous.Break = true; current.Break = true; previous.Append(current); allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else if (current.IsJoin()) { previous.Append(current); previous.Type = UnitType.CharWithJoin; } else if (current.IsSplit()) { previous.Append(current); previous.Break = true; } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { previous.Append(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } continue; } // check distance var xFrontDistance = current.GetRealUpLeft().X - previous.GetRealUpRight().X; var angle = GetAngle(previous.GetRealUpRight(), current.GetRealUpLeft()); var isOneWord = CanCombine(xFrontDistance, xBackDistance, lastXDistance, angle, DistanceFunctions.GetDistanceFunc(Type).Calculate(minimumUnitList, item.index)); lastXDistance = xFrontDistance; // additional checking by table line support if (isOneWord && lines != null) { isOneWord = !ExistLineBetweenUnits(lineSearchHelper, previous, current); } if (!isOneWord) { // create a word and add it to word regions. allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; // line break so separate if (current.Break) { MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); allRegions.Clear(); } } } } units.Add(current); } allRegions.Add(CombineUnits(units)); MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); return(new GroupedResult { GroupedRegions = combined, LowConfidenceRegions = lowConfidence }); }