public static IDistanceFunc GetDistanceFunc(DistanceFuncTpye type) { try { return(Funcs[type]); } catch (KeyNotFoundException) { return(null); } }
/// <summary> /// /// </summary> /// <param name="minimumUnitList"></param> /// <param name="lines"></param> /// <returns></returns> public GroupedResult Group(List <MinimumUnit> minimumUnitList, LineSegment2DF[] lines, DistanceFuncTpye Type = DistanceFuncTpye.Fixed) { if (minimumUnitList == null) { return(null); } LineSearchHelper lineSearchHelper = new LineSearchHelper(lines); var combined = new List <GroupedRegion>(); var lowConfidence = new List <GroupedRegion>(); var allRegions = new List <GroupedRegion>(); var units = new List <MinimumUnit>(); var lastXDistance = 0; // TO-DO, sort minimum units based on y and x here // TO-DO, use table line to enhance grouping foreach (var item in minimumUnitList.Select((value, index) => new { value, index })) { var current = item.value; if (units.Count != 0) { // previous minimum unit var previous = units.LastOrDefault(); MinimumUnit next = null; var xBackDistance = 0; if (previous.IsStop()) { allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else { if (!current.Equals(minimumUnitList.LastOrDefault())) { // next minimum unit next = minimumUnitList[item.index + 1]; xBackDistance = next.GetRealUpLeft().X - current.GetRealUpRight().X; } // check syntax if (current.IsPunctuations()) { current.Type = UnitType.Punctuation; bool oneLine = IsSameLine(previous, current); if (oneLine) { if (!current.IsRightBracket()) { if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { if (current.IsStop()) { // TO-DO, review current.UpRight = previous.UpRight; current.DownRight = previous.DownRight; previous.Break = true; current.Break = true; previous.Append(current); allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else if (current.IsJoin()) { previous.Append(current); previous.Type = UnitType.CharWithJoin; } else if (current.IsSplit()) { previous.Append(current); previous.Break = true; } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { previous.Append(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } continue; } // check distance var xFrontDistance = current.GetRealUpLeft().X - previous.GetRealUpRight().X; var angle = GetAngle(previous.GetRealUpRight(), current.GetRealUpLeft()); var isOneWord = CanCombine(xFrontDistance, xBackDistance, lastXDistance, angle, DistanceFunctions.GetDistanceFunc(Type).Calculate(minimumUnitList, item.index)); lastXDistance = xFrontDistance; // additional checking by table line support if (isOneWord && lines != null) { isOneWord = !ExistLineBetweenUnits(lineSearchHelper, previous, current); } if (!isOneWord) { // create a word and add it to word regions. allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; // line break so separate if (current.Break) { MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); allRegions.Clear(); } } } } units.Add(current); } allRegions.Add(CombineUnits(units)); MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); return(new GroupedResult { GroupedRegions = combined, LowConfidenceRegions = lowConfidence }); }