private static IEnumerable<TextBlock> GetLinesGroups(TextLine[] lines, double maxDist, int maxDegreeOfParallelism) { /************************************************************************************************** * We want to measure the distance between two lines using the following method: * We check if two lines are overlapping horizontally. * If they are overlapping, we compute the middle point (new X coordinate) of the overlapping area. * We finally compute the Euclidean distance between these two middle points. * If the two lines are not overlapping, the distance is set to the max distance. **************************************************************************************************/ double euclidianOverlappingMiddleDistance(PdfLine l1, PdfLine l2) { var left = Math.Max(l1.Point1.X, l2.Point1.X); var d = (Math.Min(l1.Point2.X, l2.Point2.X) - left); if (d < 0) return double.MaxValue; // not overlapping -> max distance return Distances.Euclidean( new PdfPoint(left + d / 2, l1.Point1.Y), new PdfPoint(left + d / 2, l2.Point1.Y)); } var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(lines, euclidianOverlappingMiddleDistance, (pivot, candidate) => maxDist, pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight), candidate => new PdfLine(candidate.BoundingBox.TopLeft, candidate.BoundingBox.TopRight), pivot => true, (pivot, candidate) => true, maxDegreeOfParallelism).ToList(); for (int a = 0; a < groupedIndexes.Count; a++) { yield return new TextBlock(groupedIndexes[a].Select(i => lines[i]).ToList()); } }
public void TestEuclidean() { int[,] tss = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 } }; float[,] expected = { { 0, 0, 0 }, { 8, 0, 0 }, { 16, 8, 0 } }; using (KhivaArray arr = KhivaArray.Create(tss), euclidean = Distances.Euclidean(arr)) { var result = euclidean.GetData2D <float>(); Assert.AreEqual(expected, result); } }
// .NET Framework 3.5 project that implements the library targeting .NET Standard 2.0 - This will NOT work static void Main(string[] args) { List <double> p = new List <double> { 2.5, 2.7, 3.5, 7.4 }; List <double> q = new List <double> { 27.5, 12.7, 41.5, 35.4 }; Console.WriteLine($"Euclidean distance between p and q is {Distances.Euclidean(p, q)}"); Console.ReadKey(); }
/// <summary> /// Estimation of within-line and between-line spacing. /// <para>This is the Docstrum algorithm's 1st step.</para> /// </summary> /// <param name="words">The list of words.</param> /// <param name="wlBounds">Angle bounds for words to be considered as neighbours on the same line.</param> /// <param name="wlBinSize">The bin size used when building the within-line distances distribution.</param> /// <param name="blBounds">Angle bounds for words to be considered as neighbours on separate lines.</param> /// <param name="blBinSize">The bin size used when building the between-line distances distribution.</param> /// <param name="maxDegreeOfParallelism">Sets the maximum number of concurrent tasks enabled. /// <para>A positive property value limits the number of concurrent operations to the set value. /// If it is -1, there is no limit on the number of concurrently running operations.</para></param> /// <param name="withinLineDistance">The estimated within-line distance. Computed as the average peak value of distribution.</param> /// <param name="betweenLineDistance">The estimated between-line distance. Computed as the average peak value of distribution.</param> /// <returns>False if either 'withinLineDistance' or 'betweenLineDistance' is <see cref="double.NaN"/>.</returns> public static bool GetSpacingEstimation(IReadOnlyList <Word> words, AngleBounds wlBounds, int wlBinSize, AngleBounds blBounds, int blBinSize, int maxDegreeOfParallelism, out double withinLineDistance, out double betweenLineDistance) { ParallelOptions parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = maxDegreeOfParallelism }; var withinLineDistList = new ConcurrentBag <double>(); var betweenLineDistList = new ConcurrentBag <double>(); // 1. Estimate within line and between line spacing KdTree <Word> kdTreeBottomLeft = new KdTree <Word>(words, w => w.BoundingBox.BottomLeft); Parallel.For(0, words.Count, parallelOptions, i => { var word = words[i]; // Within-line distance // 1.1.1 Find the 2 closest neighbours words to the candidate, using euclidean distance. foreach (var n in kdTreeBottomLeft.FindNearestNeighbours(word, 2, w => w.BoundingBox.BottomRight, Distances.Euclidean)) { // 1.1.2 Check if the neighbour word is within the angle of the candidate if (wlBounds.Contains(AngleWL(word, n.Item1))) { withinLineDistList.Add(Distances.Euclidean(word.BoundingBox.BottomRight, n.Item1.BoundingBox.BottomLeft)); } } // Between-line distance // 1.2.1 Find the 2 closest neighbours words to the candidate, using euclidean distance. foreach (var n in kdTreeBottomLeft.FindNearestNeighbours(word, 2, w => w.BoundingBox.TopLeft, Distances.Euclidean)) { // 1.2.2 Check if the candidate words is within the angle var angle = AngleBL(word, n.Item1); if (blBounds.Contains(angle)) { // 1.2.3 Compute the vertical (between-line) distance between the candidate // and the neighbour and add it to the between-line distances list double hypotenuse = Distances.Euclidean(word.BoundingBox.Centroid, n.Item1.BoundingBox.Centroid); // Angle is kept within [-90, 90] if (angle > 90) { angle -= 180; } var dist = Math.Abs(hypotenuse * Math.Cos((90 - angle) * Math.PI / 180)) - word.BoundingBox.Height / 2.0 - n.Item1.BoundingBox.Height / 2.0; // The perpendicular distance can be negative because of the subtractions. // Could occur when words are overlapping, we ignore that. if (dist >= 0) { betweenLineDistList.Add(dist); } } } }); // Compute average peak value of distribution double?withinLinePeak = GetPeakAverageDistance(withinLineDistList, wlBinSize); double?betweenLinePeak = GetPeakAverageDistance(betweenLineDistList, blBinSize); withinLineDistance = withinLinePeak ?? double.NaN; betweenLineDistance = betweenLinePeak ?? double.NaN; return(withinLinePeak.HasValue && betweenLinePeak.HasValue); }
private List <TableRectangle> getTableAreasFromCells(List <TableRectangle> cells) { List <List <TableRectangle> > cellGroups = new List <List <TableRectangle> >(); foreach (TableRectangle cell in cells) { bool addedToGroup = false; foreach (List <TableRectangle> cellGroup in cellGroups) { foreach (TableRectangle groupCell in cellGroup) { PdfPoint[] groupCellCorners = groupCell.Points; PdfPoint[] candidateCorners = cell.Points; for (int i = 0; i < candidateCorners.Length; i++) { for (int j = 0; j < groupCellCorners.Length; j++) { //if (candidateCorners[i].distance(groupCellCorners[j]) < CELL_CORNER_DISTANCE_MAXIMUM) if (Distances.Euclidean(candidateCorners[i], groupCellCorners[j]) < CELL_CORNER_DISTANCE_MAXIMUM) { cellGroup.Add(cell); addedToGroup = true; goto cellCheck; } } } } } cellCheck: if (!addedToGroup) { List <TableRectangle> cellGroup = new List <TableRectangle> { cell }; cellGroups.Add(cellGroup); } } // create table areas based on cell group List <TableRectangle> tableAreas = new List <TableRectangle>(); foreach (List <TableRectangle> cellGroup in cellGroups) { // less than four cells should not make a table if (cellGroup.Count < REQUIRED_CELLS_FOR_TABLE) { continue; } double top = double.MinValue; // bobld: MaxValue double left = double.MaxValue; double bottom = double.MaxValue; // bobld: MinValue double right = double.MinValue; foreach (TableRectangle cell in cellGroup) { if (cell.Top > top) { top = cell.Top; // bobld: < } if (cell.Left < left) { left = cell.Left; } if (cell.Bottom < bottom) { bottom = cell.Bottom; // bobld: > } if (cell.Right > right) { right = cell.Right; } } tableAreas.Add(new TableRectangle(new PdfRectangle(left, bottom, right, top))); } return(tableAreas); }