private PageUnit[] GetTableColumn(Page page, PageIndex index, string type) { bool stopAtNextEmpty = false; bool active = true; var columnMatches = new List <PageUnit>(); var traverser = new PageTraverser(page, index); traverser.Traverse(Direction.Down, 1); while (active && !traverser.ErrorOccurred) { string match = string.Empty; var surroundingPageUnits = new PageUnit[9][] { traverser.Peek(Direction.Left), traverser.GetCurrentPageUnits(), traverser.Peek(Direction.Right), traverser.Peek(Direction.Left, 2), traverser.Peek(Direction.Right, 2), traverser.Peek(Direction.Left, 3), traverser.Peek(Direction.Right, 3), traverser.Peek(Direction.Left, 4), traverser.Peek(Direction.Right, 4) }; var pageUnits = surroundingPageUnits .Where(x => x != null) .Where(x => x.Any(p => p.TextType == type)) .OrderBy(x => Math.Abs(traverser.GetCurrentPosition().Coordinate.X - x.First().Coordinate.X)); var matchingPageUnit = pageUnits.FirstOrDefault(); if (matchingPageUnit != null) { columnMatches.Add(matchingPageUnit.Where(x => x.TextType == type).First()); } traverser.Traverse(Direction.Down, 1); if (!string.IsNullOrWhiteSpace(match)) { stopAtNextEmpty = true; } if (string.IsNullOrWhiteSpace(match) && stopAtNextEmpty) { active = false; } } return(columnMatches.ToArray()); }
public TraversalResult Traverse(Direction direction, int?max = 0) { if (max == null) { max = int.MaxValue; } PageUnit current = null; int steps = 0; while (current == null && max-- > 0) { switch (direction) { case Direction.Up: TraverseUp(); break; case Direction.Down: TraverseDown(); break; case Direction.Left: TraverseLeft(); break; case Direction.Right: TraverseRight(); break; case Direction.NotSet: ErrorOccurred = true; break; } if (ErrorOccurred) { return(null); } current = CurrentPageUnit; steps++; } return(new TraversalResult() { PageUnit = current, Steps = steps }); }
private bool ValidateTextTypeMatch(PageUnit pageUnit, string textType, string[] matchContent) => pageUnit != null && pageUnit?.TextType == textType && (matchContent?.Any(x => FuzzyTextComparer.FuzzyEquals(x, pageUnit.Value)) ?? true);