/// <summary> /// Get the words using options values. /// </summary> /// <param name="letters">The page's letters to group into <see cref="Word"/>s.</param> /// <param name="options">The <see cref="NearestNeighbourWordExtractorOptions"/> to use.</param> /// <returns>The <see cref="Word"/>s generated by the nearest neighbour method.</returns> public IEnumerable <Word> GetWords(IReadOnlyList <Letter> letters, DlaOptions options) { if (options is NearestNeighbourWordExtractorOptions nnOptions) { if (letters == null || letters.Count == 0) { return(EmptyArray <Word> .Instance); } if (nnOptions.GroupByOrientation) { // axis aligned List <Word> words = GetWords( letters.Where(l => l.TextOrientation == TextOrientation.Horizontal).ToList(), nnOptions.MaximumDistance, nnOptions.DistanceMeasureAA, nnOptions.FilterPivot, nnOptions.Filter, nnOptions.MaxDegreeOfParallelism); words.AddRange(GetWords( letters.Where(l => l.TextOrientation == TextOrientation.Rotate270).ToList(), nnOptions.MaximumDistance, nnOptions.DistanceMeasureAA, nnOptions.FilterPivot, nnOptions.Filter, nnOptions.MaxDegreeOfParallelism)); words.AddRange(GetWords( letters.Where(l => l.TextOrientation == TextOrientation.Rotate180).ToList(), nnOptions.MaximumDistance, nnOptions.DistanceMeasureAA, nnOptions.FilterPivot, nnOptions.Filter, nnOptions.MaxDegreeOfParallelism)); words.AddRange(GetWords( letters.Where(l => l.TextOrientation == TextOrientation.Rotate90).ToList(), nnOptions.MaximumDistance, nnOptions.DistanceMeasureAA, nnOptions.FilterPivot, nnOptions.Filter, nnOptions.MaxDegreeOfParallelism)); // not axis aligned words.AddRange(GetWords( letters.Where(l => l.TextOrientation == TextOrientation.Other).ToList(), nnOptions.MaximumDistance, nnOptions.DistanceMeasure, nnOptions.FilterPivot, nnOptions.Filter, nnOptions.MaxDegreeOfParallelism)); return(words); } else { return(GetWords(letters, nnOptions.MaximumDistance, nnOptions.DistanceMeasure, nnOptions.FilterPivot, nnOptions.Filter, nnOptions.MaxDegreeOfParallelism)); } } else { throw new ArgumentException("Options provided must be of type " + nameof(NearestNeighbourWordExtractorOptions) + ".", nameof(options)); } }
/// <summary> /// Get the text blocks using options. /// </summary> /// <param name="words">The page's words to generate text blocks for.</param> /// <param name="options">The <see cref="DefaultPageSegmenterOptions"/> to use.</param> /// <returns>The <see cref="TextBlock"/>s generated by the default method.</returns> public IReadOnlyList <TextBlock> GetBlocks(IEnumerable <Word> words, DlaOptions options) { if (options is DefaultPageSegmenterOptions dOptions) { if (words?.Any() != true) { return(EmptyArray <TextBlock> .Instance); } return(new List <TextBlock>() { new TextBlock(new XYLeaf(words).GetLines(dOptions.WordSeparator), dOptions.LineSeparator) }); } else { throw new ArgumentException("Options provided must be of type " + nameof(DefaultPageSegmenterOptions) + ".", nameof(options)); } }
/// <summary> /// Get the blocks using options values. /// </summary> /// <param name="words">The page's words to segment into <see cref="TextBlock"/>s.</param> /// <param name="options">The <see cref="RecursiveXYCutOptions"/> to use.</param> /// <returns>The <see cref="TextBlock"/>s generated by the Recursive X-Y cut method.</returns> public IReadOnlyList <TextBlock> GetBlocks(IEnumerable <Word> words, DlaOptions options) { if (options is RecursiveXYCutOptions ryxcOptions) { if (words?.Any() != true) { return(EmptyArray <TextBlock> .Instance); } return(GetBlocks(words, ryxcOptions.MinimumWidth, ryxcOptions.DominantFontWidthFunc, ryxcOptions.DominantFontHeightFunc, ryxcOptions.WordSeparator, ryxcOptions.LineSeparator)); } else { throw new ArgumentException("Options provided must be of type " + nameof(RecursiveXYCutOptions) + ".", nameof(options)); } }
/// <summary> /// Get the blocks using options values. /// </summary> /// <param name="words">The page's words to segment into <see cref="TextBlock"/>s.</param> /// <param name="options">The <see cref="DocstrumBoundingBoxesOptions"/> to use.</param> /// <returns>The <see cref="TextBlock"/>s generated by the document spectrum method.</returns> public IReadOnlyList <TextBlock> GetBlocks(IEnumerable <Word> words, DlaOptions options) { if (options is DocstrumBoundingBoxesOptions dbbOptions) { if (words?.Any() != true) { return(EmptyArray <TextBlock> .Instance); } return(GetBlocks(words.ToList(), dbbOptions.WithinLineBounds, dbbOptions.WithinLineMultiplier, dbbOptions.WithinLineBinSize, dbbOptions.BetweenLineBounds, dbbOptions.BetweenLineMultiplier, dbbOptions.BetweenLineBinSize, dbbOptions.AngularDifferenceBounds, dbbOptions.Epsilon, dbbOptions.WordSeparator, dbbOptions.LineSeparator, dbbOptions.MaxDegreeOfParallelism)); } else { throw new ArgumentException("Options provided must be of type " + nameof(DocstrumBoundingBoxesOptions) + ".", nameof(options)); } }