/// <summary> /// Called when the digest item is selected. /// </summary> /// <param name="sender"> The sender. </param> /// <param name="e"> The <see cref="System.Windows.Forms.ListViewItemSelectionChangedEventArgs" /> instance containing the event data. </param> private void OnSelectResult(object sender, ListViewItemSelectionChangedEventArgs e) { lock (selectionLock) { if (resultsView.SelectedItems.Count != 0) { ResultItem item = (ResultItem)resultsView.SelectedItems[0].Tag; if (item.FileName != currentWorkingFileName || document == null) { if (document != null) { document.Dispose(); } FileStream stream = new FileStream(item.FileName, FileMode.Open, FileAccess.Read, FileShare.Read); // create document used for search and rendering document = new Document(stream); currentWorkingFileName = item.FileName; } Page page = document.Pages[item.SearchResult.PageIndex]; RenderingSettings renderingSettings = new RenderingSettings(); Resolution resolution = new Resolution(72, 72); Bitmap bmp = page.Render(resolution, renderingSettings); // Highlights the search result Graphics gr = Graphics.FromImage(bmp); SearchResultRegion searchResultRegion = page.TransformRegion(item.SearchResult.Region, resolution, renderingSettings); foreach (double[] block in searchResultRegion.Blocks) { PointF[] points = new PointF[block.Length / 2]; for (int i = 0; i < block.Length / 2; i++) { points[i] = new PointF((float)block[i * 2], (float)block[i * 2 + 1]); } gr.FillPolygon(new SolidBrush(Color.FromArgb(50, Color.Yellow)), points); } pictureBox1.Image = bmp; } } }
/// <summary> /// Highlights the search result. /// </summary> /// <param name="bitmap"> The bitmap. </param> /// <param name="searchResultItem"> The search result item. </param> /// <param name="page"> The page. </param> private static void HighlightSearchResult(Image bitmap, SearchResultItem searchResultItem, Page page) { using (Graphics gr = Graphics.FromImage(bitmap)) { double[] rectangle; SearchResultRegion region = page.TransformRegion(searchResultItem.Region, bitmap.Width, bitmap.Height, renderingSettings); foreach (double[] item in region.Blocks) { rectangle = item; PointF[] points = new PointF[rectangle.Length / 2]; for (int i = 0; i < 4; i++) { points[i] = new PointF((float)rectangle[i * 2], (float)rectangle[(i * 2) + 1]); } gr.FillPolygon(hightlightBrush, points); } } }
static void Main(string[] args) { // prepare graphics objects Bitmap renderedPage = null; Brush highlightBrush = new SolidBrush(Color.FromArgb(126, 255, 255, 0)); // store rendering settings Resolution renderingResolution = new Resolution(144, 144); RenderingSettings renderingSettings = new RenderingSettings(); Page firstPage = null; // a list of rects to highlight IList <RectangleF> highlightRects = new List <RectangleF>(); // open PDF document using (FileStream fs = new FileStream("../../files/test.pdf", FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { using (Document doc = new Document(fs)) { firstPage = doc.Pages[0]; renderedPage = firstPage.Render(renderingResolution, renderingSettings); // parse links and store highlight rects foreach (Link link in firstPage.Links) { if (link.IsUriLink) { Apitron.PDF.Rasterizer.Rectangle locationRect = link.GetLocationRectangle(renderingResolution, renderingSettings); highlightRects.Add(TransformToGDIRect(locationRect, renderedPage.Height)); Console.WriteLine(link.DestinationUri); } } } } // search text in the same document using regular expression matching URLs using (SearchIndex search = new SearchIndex(new FileStream("../../files/test.pdf", FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { search.Search(handlerArgs => { // first page only if (handlerArgs.PageIndex > 0) { handlerArgs.CancelSearch = true; return; } // add highlight rects by processing found items foreach (SearchResultItem item in handlerArgs.ResultItems) { SearchResultRegion searchResultRegion = firstPage.TransformRegion(item.Region, renderingResolution, renderingSettings); foreach (double[] block in searchResultRegion.Blocks) { float xMin = float.MaxValue; float yMin = float.MaxValue; float xMax = float.MinValue; float yMax = float.MinValue; for (int i = 0; i < block.Length;) { xMin = (float)Math.Min(xMin, block[i]); xMax = (float)Math.Max(xMax, block[i++]); yMin = (float)Math.Min(yMin, block[i]); yMax = (float)Math.Max(yMax, block[i++]); } highlightRects.Add(new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)); } Console.WriteLine(item.Title); } }, new Regex(@"(http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?")); } // render hightllight rects HighlightRects(renderedPage, highlightRects, highlightBrush); renderedPage.Save("renderedPage.png"); Process.Start("renderedPage.png"); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
/// <summary> /// /// </summary> /// <param name="userSessionId"></param> /// <param name="searchText"></param> /// <returns></returns> /// <remarks> /// I should update parameter for GetSessionDetailByUserSessionId to be a string and decouple dependency from MongoDb ObjectId Model /// I should also create my own "OcrResult" to decouple dependency from Microsoft Azure Models /// </remarks> public async Task <SearchResultsDto> SearchUserSession(string userSessionId, string searchText) { var result = new SearchResultsDto(); var sessionDetailsFound = await _sessionDetailRepo.GetSessionDetailByUserSessionId(ObjectId.Parse(userSessionId)).ConfigureAwait(false); //var rawText = String.Empty; try { var detail = sessionDetailsFound.PrintedTextResult; var regions = detail.Regions; foreach (OcrRegion region in regions) { var lines = region.Lines; foreach (OcrLine line in lines) { var words = line.Words; foreach (OcrWord word in words) { if (word.Text.ToLower().Contains(searchText.ToLower())) //Searh for match { //find how many regions to exclude var regionFoundIndex = regions.IndexOf(region); var skipLastCount = regions.Count - regionFoundIndex; if (skipLastCount < 0) { skipLastCount = 0; } //find haw many lines of text there are to find the word var reg = regions.SkipLast(skipLastCount); var linesInPriorRegions = 0; if (reg.Any()) { linesInPriorRegions = reg.Sum(x => x.Lines.Count()); } var lineNumber = linesInPriorRegions + region.Lines.IndexOf(line) + 1; //find how many words to exclude var wordsFoundIndex = words.IndexOf(word); var skipLastWordCount = words.Count - wordsFoundIndex; if (skipLastWordCount < 0) { skipLastWordCount = 0; } //find how many characters to find the word in the line var filteredWords = words.SkipLast(skipLastWordCount); var charInPriorWords = 0; if (filteredWords.Any()) { charInPriorWords = filteredWords.Sum(w => w.Text.Length); } var intPosition = charInPriorWords + filteredWords.Count(); //count number of words and spaces. will not include special characters. //Map from Ocr classes to custom classes (to reduce dependency on Ocr classes var resultText = new SearchResultText() { BoundingBox = word.BoundingBox, Text = word.Text }; var resultLine = new SearchResultLine() { BoundingBox = line.BoundingBox, Text = words.Select(w => new SearchResultText() { BoundingBox = w.BoundingBox, Text = w.Text }).ToList() }; var resultRegion = new SearchResultRegion() { BoundingBox = region.BoundingBox, Lines = new List <SearchResultLine>() { resultLine } }; var resultDetail = new SearchResultDetail() { Language = detail.Language, TextAngle = detail.TextAngle, Orientation = detail.Orientation, Regions = new List <SearchResultRegion>() { resultRegion } }; //build result details var resultDetails = new SearchResultDetails(resultText, resultLine, resultRegion, resultDetail, lineNumber, intPosition); //add to results result.ResultDetails.Add(resultDetails); } } } } } finally { if (sessionDetailsFound != null) { sessionDetailsFound = null; } } return(result); }