private bool LoadPage(int pageNo) { if (_pdfDocumentModel == null) { return(false); } _pdfPageModel = _pdfDocumentModel.GetPage(pageNo); if (_pdfPageModel == null) { return(false); } // set remove duplicate letters _pdfPageModel.SetRemoveDuplicateLetters(_removeDuplicateLetters); // set word extractor _pdfPageModel.SetWordExtractor(WordExtractor); // set page segmenter _pdfPageModel.SetPageSegmenter(PageSegmenter); var pageInfoModel = _pdfPageModel.GetPageInfo(); // Plot height distrib HeightHistoPlotModel = pageInfoModel.HeightDistribution?.GetPlotModel("Letters height distribution"); WidthHistoPlotModel = pageInfoModel.WidthDistribution?.GetPlotModel("Letters width distribution"); // Plot page var pagePlotModel = new PlotModel { IsLegendVisible = false }; pagePlotModel.Axes.Add(new LinearAxis { Position = AxisPosition.Left, Minimum = 0, Maximum = _pdfPageModel.Height }); pagePlotModel.Axes.Add(new LinearAxis { Position = AxisPosition.Bottom, Minimum = 0, Maximum = _pdfPageModel.Width }); // Add background image try { using (var stream = _pdfImageConverter.GetPageStream(pageNo, 2)) { PageImage = new OxyImage(stream); } pagePlotModel.Annotations.Add(new ImageAnnotation { ImageSource = PageImage, Opacity = 0.5, X = new PlotLength(_pdfPageModel.CropBox.Bounds.BottomLeft.X, PlotLengthUnit.Data), Y = new PlotLength(_pdfPageModel.CropBox.Bounds.BottomLeft.Y, PlotLengthUnit.Data), Width = new PlotLength(_pdfPageModel.CropBox.Bounds.Width, PlotLengthUnit.Data), Height = new PlotLength(_pdfPageModel.CropBox.Bounds.Height, PlotLengthUnit.Data), HorizontalAlignment = HorizontalAlignment.Left, VerticalAlignment = VerticalAlignment.Bottom }); } catch (Exception) { throw; } this.PagePlotModel = pagePlotModel; if (IsDisplayLetters) { DisplayLetters(); } if (IsDisplayWords) { DisplayWords(); } if (IsDisplayTextLines) { DisplayTextLines(); } if (IsDisplayTextBlocks) { DisplayTextBlocks(); } if (IsDisplayPaths) { DisplayPaths(); } if (IsDisplayImages) { DisplayImages(); } return(true); }
private void DisplayPage(int pageNo) { if (_pdfDocumentModel == null) { return; } var page = _pdfDocumentModel.GetPage(pageNo); // Create the plot model var tmp = new PlotModel { IsLegendVisible = false }; tmp.Axes.Add(new LinearAxis { Position = AxisPosition.Left, Minimum = 0, Maximum = page.Height }); tmp.Axes.Add(new LinearAxis { Position = AxisPosition.Bottom, Minimum = 0, Maximum = page.Width }); switch (BboxLevel) { case "Words": foreach (var word in page.GetWords()) { var series1 = new LineSeries { Title = GetShorterText(word.Text), LineStyle = LineStyle.Solid, Color = OxyColors.Red }; var bbox = word.BoundingBox; series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); tmp.Series.Add(series1); } break; case "Lines": foreach (var line in page.GetTextBlocks().SelectMany(b => b.TextLines)) { var series1 = new LineSeries { Title = GetShorterText(line.Text), LineStyle = LineStyle.Solid, Color = OxyColors.Red }; var bbox = line.BoundingBox; series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); tmp.Series.Add(series1); } break; case "Paragraphs": foreach (var block in page.GetTextBlocks()) { var series1 = new LineSeries { Title = GetShorterText(block.Text), LineStyle = LineStyle.Solid, Color = OxyColors.Red }; var bbox = block.BoundingBox; series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); tmp.Series.Add(series1); } break; default: foreach (var letter in page.GetLetters()) { var series1 = new LineSeries { Title = letter.Value, LineStyle = LineStyle.Solid, Color = OxyColors.Red }; var bbox = letter.GlyphRectangle; series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); tmp.Series.Add(series1); } break; } // Add background image try { using (var stream = _pdfImageConverter.GetPageStream(pageNo, 2)) { Image = new OxyImage(stream); } tmp.Annotations.Add(new ImageAnnotation { ImageSource = Image, Opacity = 0.5, X = new PlotLength(0, PlotLengthUnit.Data), Y = new PlotLength(0, PlotLengthUnit.Data), Width = new PlotLength(page.Width, PlotLengthUnit.Data), Height = new PlotLength(page.Height, PlotLengthUnit.Data), HorizontalAlignment = HorizontalAlignment.Left, VerticalAlignment = VerticalAlignment.Bottom }); } catch (Exception) { throw; } this.PlotModel = tmp; }