/// <summary> /// Insert content, specified by the parameter, to the end /// </summary> /// <param name="text"></param> /// <param name="UpRight"></param> /// <param name="DownRight"></param> public void Append(String text, Point UpRight, Point DownRight) { AppendedPunctuation = new MinimumUnit { Text = text, UpRight = UpRight, DownRight = DownRight }; }
/// <summary> /// Insert content, specified by the parameter, to the beginning /// </summary> /// <param name="text"></param> /// <param name="UpLeft"></param> /// <param name="DownLeft"></param> public void Prepend(String text, Point UpLeft, Point DownLeft) { PrependedPunctuation = new MinimumUnit { Text = text, UpLeft = UpLeft, DownLeft = DownLeft }; }
private Point GetCenterPoint(MinimumUnit unit) { #if DEBUG // crash it in debug build to make sure all the caller never pass null and find the caller pass null into this function System.Diagnostics.Debug.Assert(unit != null); #endif int centerX = unit.GetRealUpLeft().X + unit.GetRealUpRight().X + unit.GetRealDownRight().X + unit.GetRealDownLeft().X; int centerY = unit.GetRealUpLeft().Y + unit.GetRealUpRight().Y + unit.GetRealDownRight().Y + unit.GetRealDownLeft().Y; return(new Point(centerX / 4, centerY / 4)); }
private Tuple <int, int> GetMinMaxY(MinimumUnit unit) { #if DEBUG // crash it in debug build to make sure all the caller never pass null and find the caller pass null into this function System.Diagnostics.Debug.Assert(unit != null); #endif int lowY = Math.Min(unit.GetRealUpRight().Y, unit.GetRealUpLeft().Y); int highY = Math.Max(unit.GetRealDownRight().Y, unit.GetRealDownLeft().Y); return(new Tuple <int, int>(lowY, highY)); }
private bool IsSameLine(MinimumUnit source, MinimumUnit target) { if (source == null || target == null) { return(false); } var tuple = GetMinMaxY(source); var tuple2 = GetMinMaxY(target); var centerPoint1 = GetCenterPoint(source); var centerPoint2 = GetCenterPoint(target); var centerYDifference = Math.Abs(centerPoint1.Y - centerPoint2.Y); var minYDifference = Math.Abs(tuple.Item1 - tuple2.Item1); var maxYDifference = Math.Abs(tuple.Item2 - tuple2.Item2); if (centerYDifference <= 5 && minYDifference <= 5 && maxYDifference <= 5) { return(true); } return(false); }
private bool ExistLineBetweenUnits(LineSearchHelper lineSearchHelper, MinimumUnit unit1, MinimumUnit unit2) { MinimumUnit left, right; if (unit1.GetRealUpRight().X < unit2.GetRealUpRight().X) { left = unit1; right = unit2; } else { left = unit2; right = unit1; } int width = Math.Abs(right.GetRealUpRight().X - left.GetRealUpLeft().X); int height = Math.Abs(Math.Max(right.GetRealDownLeft().Y, left.GetRealDownLeft().Y) - Math.Min(right.GetRealUpLeft().Y, left.GetRealUpLeft().Y)); Rectangle rect = new Rectangle(left.GetRealUpLeft().X, left.GetRealUpLeft().Y, width, height); if (lineSearchHelper.ExistLineOnTheRegion(rect)) { return(true); } return(false); }
/// <summary> /// /// </summary> /// <param name="minimumUnitList"></param> /// <param name="lines"></param> /// <returns></returns> public GroupedResult Group(List <MinimumUnit> minimumUnitList, LineSegment2DF[] lines, DistanceFuncTpye Type = DistanceFuncTpye.Fixed) { if (minimumUnitList == null) { return(null); } LineSearchHelper lineSearchHelper = new LineSearchHelper(lines); var combined = new List <GroupedRegion>(); var lowConfidence = new List <GroupedRegion>(); var allRegions = new List <GroupedRegion>(); var units = new List <MinimumUnit>(); var lastXDistance = 0; // TO-DO, sort minimum units based on y and x here // TO-DO, use table line to enhance grouping foreach (var item in minimumUnitList.Select((value, index) => new { value, index })) { var current = item.value; if (units.Count != 0) { // previous minimum unit var previous = units.LastOrDefault(); MinimumUnit next = null; var xBackDistance = 0; if (previous.IsStop()) { allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else { if (!current.Equals(minimumUnitList.LastOrDefault())) { // next minimum unit next = minimumUnitList[item.index + 1]; xBackDistance = next.GetRealUpLeft().X - current.GetRealUpRight().X; } // check syntax if (current.IsPunctuations()) { current.Type = UnitType.Punctuation; bool oneLine = IsSameLine(previous, current); if (oneLine) { if (!current.IsRightBracket()) { if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { if (current.IsStop()) { // TO-DO, review current.UpRight = previous.UpRight; current.DownRight = previous.DownRight; previous.Break = true; current.Break = true; previous.Append(current); allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; } else if (current.IsJoin()) { previous.Append(current); previous.Type = UnitType.CharWithJoin; } else if (current.IsSplit()) { previous.Append(current); previous.Break = true; } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } } else if (!ExistLineBetweenUnits(lineSearchHelper, previous, current)) { previous.Append(current); } } else if (IsSameLine(next, current) && !ExistLineBetweenUnits(lineSearchHelper, current, next)) { next.Prepend(current); } continue; } // check distance var xFrontDistance = current.GetRealUpLeft().X - previous.GetRealUpRight().X; var angle = GetAngle(previous.GetRealUpRight(), current.GetRealUpLeft()); var isOneWord = CanCombine(xFrontDistance, xBackDistance, lastXDistance, angle, DistanceFunctions.GetDistanceFunc(Type).Calculate(minimumUnitList, item.index)); lastXDistance = xFrontDistance; // additional checking by table line support if (isOneWord && lines != null) { isOneWord = !ExistLineBetweenUnits(lineSearchHelper, previous, current); } if (!isOneWord) { // create a word and add it to word regions. allRegions.Add(CombineUnits(units)); units.Clear(); lastXDistance = 0; // line break so separate if (current.Break) { MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); allRegions.Clear(); } } } } units.Add(current); } allRegions.Add(CombineUnits(units)); MoveToResultLists(combined, lowConfidence, allRegions, ConfidenceScoreThreshold * 100); return(new GroupedResult { GroupedRegions = combined, LowConfidenceRegions = lowConfidence }); }
/// <summary> /// Remove append punctuation /// </summary> public void DiscardAppend() { this.AppendedPunctuation = null; }
/// <summary> /// Remove prepend punctuation /// </summary> public void DiscardPrepend() { this.PrependedPunctuation = null; }
/// <summary> /// Insert content, specified by the parameter, to the end /// </summary> /// <param name="unit"></param> public void Append(MinimumUnit unit) { AppendedPunctuation = unit; }
/// <summary> /// Insert content, specified by the parameter, to the beginning /// </summary> /// <param name="unit"></param> public void Prepend(MinimumUnit unit) { PrependedPunctuation = unit; }