public IList <PdfTextFragment> LoadTextFragments(int pageNo) { if (!isOpen) { throw new PdfNoFileOpenedException(); } Logger.LogInfo("Loading textFragments of page " + pageNo); IntPtr pArray = IntPtr.Zero; int count = 0; if (!PdfViewerExtractTextFragments(documentHandle, pageNo, ref pArray, ref count)) { return(null); } long textFragmentSize = Marshal.SizeOf(new NativeTextFragment()); IntPtr pTextFragment = pArray; IList <PdfTextFragment> textFragments = new List <PdfTextFragment>(); PdfTextFragment lastFrag = null; for (int i = 0; i < count; i++) { NativeTextFragment textFragment = (NativeTextFragment)Marshal.PtrToStructure(pTextFragment, typeof(NativeTextFragment)); lastFrag = new PdfTextFragment(textFragment, pageNo, lastFrag); textFragments.Add(lastFrag); pTextFragment = new IntPtr(pTextFragment.ToInt64() + textFragmentSize); } PdfViewerDisposeTextFragments(pArray, count); Logger.LogInfo("Loaded textFragments of page " + pageNo); return(textFragments); }
/// <summary> /// Extracts text fragments from the 2nd page and highlights the glyphs in the fragment. /// </summary> /// <param name="document"></param> private static void ExtractTextAndHighlightGlyphs(PdfFixedDocument document) { PdfRgbColor penColor = new PdfRgbColor(); PdfPen pen = new PdfPen(penColor, 0.5); Random rnd = new Random(); byte[] rgb = new byte[3]; PdfContentExtractor ce = new PdfContentExtractor(document.Pages[1]); PdfTextFragmentCollection tfc = ce.ExtractTextFragments(); PdfTextFragment tf = tfc[1]; for (int i = 0; i < tf.Glyphs.Count; i++) { rnd.NextBytes(rgb); penColor.R = rgb[0]; penColor.G = rgb[1]; penColor.B = rgb[2]; PdfPath boundingPath = new PdfPath(); boundingPath.StartSubpath(tf.Glyphs[i].GlyphCorners[0].X, tf.Glyphs[i].GlyphCorners[0].Y); boundingPath.AddLineTo(tf.Glyphs[i].GlyphCorners[1].X, tf.Glyphs[i].GlyphCorners[1].Y); boundingPath.AddLineTo(tf.Glyphs[i].GlyphCorners[2].X, tf.Glyphs[i].GlyphCorners[2].Y); boundingPath.AddLineTo(tf.Glyphs[i].GlyphCorners[3].X, tf.Glyphs[i].GlyphCorners[3].Y); boundingPath.CloseSubpath(); document.Pages[1].Graphics.DrawPath(pen, boundingPath); } }
public IList <PdfTextFragment> GetTextWithinSelection(PdfSourcePoint start, PdfSourcePoint end, int firstPage, int lastPage, ref bool swap) { IList <PdfTextFragment> containedFragments = new List <PdfTextFragment>(); IList <int> pageRange = Enumerable.Range(firstPage, lastPage - firstPage + 1).ToList(); //intersect the markedRect with the textFragments (transformed to canvas coordinates) foreach (int page in pageRange) { List <PdfTextFragment> frags; try { frags = (List <PdfTextFragment>)documentManager.GetTextFragments(page); }catch (PdfRequestCanceledException) { Logger.LogWarning("Request to load textfragments was canceled somehow (that is not supposed to happen)"); continue; } if (frags.Count == 0) { continue; } PdfTextFragment firstFrag = frags.First(); PdfTextFragment lastFrag = frags.Last(); if (page == firstPage) { firstFrag = FindNearestTextFragment(start, frags, GetPageRect(page)); } if (page == lastPage) { lastFrag = FindNearestTextFragment(end, frags, GetPageRect(page)); } if (firstFrag == frags.First() && lastFrag == frags.Last()) { containedFragments = containedFragments.Concat(frags).ToList(); } else { int firstIndex = frags.IndexOf(firstFrag); int lastIndex = frags.IndexOf(lastFrag); if (firstIndex > lastIndex) //first and last are swapped { swap = true; containedFragments = containedFragments.Concat(frags.GetRange(lastIndex, firstIndex - lastIndex + 1)).ToList(); } else { swap = false; containedFragments = containedFragments.Concat(frags.GetRange(firstIndex, lastIndex - firstIndex + 1)).ToList(); } } } return(containedFragments); }
private PdfTextFragment FindNearestTextFragment(PdfSourcePoint location, List <PdfTextFragment> haystack, PdfSourceRect pageRect) { /* To use This, you have to invert y axis of if-checks * int needle = haystack.Count / 2; //the fragment we currently look at * int start = 0; * int end = haystack.Count - 1; * while (true) * { * PdfSourceRect rect = haystack[needle].RectOnUnrotatedPage; * * if (location.dY < rect.dY) * end = haystack.IndexOf(haystack[needle].LastOnLine); * else if (location.dY > rect.dBottom) * start = haystack.IndexOf(haystack[needle].FirstOnLine); * else * { * start = haystack.IndexOf(haystack[needle].FirstOnLine); * end = haystack.IndexOf(haystack[needle].LastOnLine); * break; * } * int s = haystack.IndexOf(haystack[start].LastOnLine) + 1; * int t = haystack.IndexOf(haystack[end].FirstOnLine) - 1; * if (s > t) * break; * needle = (t - s) / 2 + s; * } * //there are only two ways to reach this: * // * haystack[needle] is on the correct line (and thus [start,end] interval describes this one line * // * [start,end] interval spans 2 lines or less * haystack = haystack.GetRange(start, end - start + 1); */ //figure out the closest one double minDist = double.MaxValue; PdfTextFragment minDistFrag = null; foreach (PdfTextFragment frag in haystack) { double dist = frag.RectOnUnrotatedPage.ShortestDistanceSquared(location); if (dist < minDist) { minDist = dist; minDistFrag = frag; } } return(minDistFrag); }
private void MouseMoveEventHandler(Object sender, MouseEventArgs e) { if (!controller.IsOpen) { return; } if (OverrideMouseModeToWait) { return; } if (mouseScrolling) { Vector delta = System.Windows.Point.Subtract(e.GetPosition(this), lastMousePosition); controller.Scroll(delta); lastMousePosition = e.GetPosition(this); } else if (middleMouseScrolling) { Vector delta = System.Windows.Point.Subtract(e.GetPosition(this), lastMousePosition); double fac = delta.X / -delta.Y; if (delta.Length < 10) { Cursor = Cursors.ScrollAll; } else if (Math.Abs(-delta.Y) <= 1.0) { if (delta.X > 0) { Cursor = Cursors.ScrollE; } else { Cursor = Cursors.ScrollW; } } else if (-delta.Y > 0) { if (fac > 4) { Cursor = Cursors.ScrollE; } else if (fac > 0.25) { Cursor = Cursors.ScrollNE; } else if (fac > -0.25) { Cursor = Cursors.ScrollN; } else if (fac > -4) { Cursor = Cursors.ScrollNW; } else { Cursor = Cursors.ScrollW; } } else { if (fac > 4) { Cursor = Cursors.ScrollW; } else if (fac > 0.25) { Cursor = Cursors.ScrollSW; } else if (fac > -0.25) { Cursor = Cursors.ScrollS; } else if (fac > -4) { Cursor = Cursors.ScrollSE; } else { Cursor = Cursors.ScrollE; } } } else if (markingRectangle) { selectedRect = new Rect(lastMousePosition, e.GetPosition(this)); InvalidateVisual(); } else if (selectingText) { PdfTargetPoint p2 = new PdfTargetPoint(e.GetPosition(this)); double s1 = 0.0, s2 = 0.0; //IList<PdfTextFragment> frags = controller.GetTextWithinSelection(new PdfTargetPoint(lastMousePosition), p2, ref s1, ref s2); IList <PdfTextFragment> frags = null; try { frags = controller.GetTextWithinSelection(null, p2, ref s1, ref s2); } catch (PdfNoFileOpenedException) { selectingText = false; return; } if (frags == null) { return; } selectedRects.Clear(); StringBuilder textBuilder = new StringBuilder(); // If there are no fragments return; if (!frags.Any()) { return; } //special treatment for first and last element PdfTextFragment first = frags[0]; PdfTextFragment last = frags[frags.Count - 1]; int firstIndex = first.GetIndexOfClosestGlyph(s1); int lastIndex = last.GetIndexOfClosestGlyph(s2); int firstPageNo = controller.InversePageOrder[first.PageNo - 1]; int lastPageNo = controller.InversePageOrder[last.PageNo - 1]; if (frags.Count == 1) { //It might be that start and end point are in wrong order, as they are only ordered by textfragment and there is only one in this collection if (lastIndex < firstIndex) { int tmp = lastIndex; lastIndex = firstIndex; firstIndex = tmp; } selectedRects.Add(firstPageNo, new List <PdfSourceRect>() { first.GetRectOnUnrotatedPage(firstIndex, lastIndex) }); textBuilder.Append(first.Text.Substring(firstIndex, lastIndex - firstIndex)).Append(" "); } else { selectedRects.Add(firstPageNo, new List <PdfSourceRect>() { first.GetRectOnUnrotatedPage(firstIndex, int.MaxValue) }); textBuilder.Append(first.Text.Substring(firstIndex, first.Text.Length - firstIndex)).Append(" "); if (firstPageNo != lastPageNo) { selectedRects.Add(lastPageNo, new List <PdfSourceRect>()); } selectedRects[lastPageNo].Add(last.GetRectOnUnrotatedPage(0, lastIndex)); //remove first and last frags.RemoveAt(0); frags.RemoveAt(frags.Count - 1); foreach (PdfTextFragment frag in frags) { int fragPageNo = controller.InversePageOrder[frag.PageNo - 1]; if (!selectedRects.ContainsKey(fragPageNo)) { selectedRects.Add(fragPageNo, new List <PdfSourceRect>()); } selectedRects[fragPageNo].Add(frag.RectOnUnrotatedPage); textBuilder.Append(frag.Text).Append(" "); } textBuilder.Append(last.Text.Substring(0, lastIndex)); } _selectedText = textBuilder.ToString(); this.InvalidateVisual(); } else if (creatingDrawingAnnotation || creatingTextRecognitionNote) //[InkingForPDF] { if (annotationPoints == null) { annotationPoints = new List <Point>(); } annotationPoints.Add(e.GetPosition(this)); InvalidateVisual(); } else if (creatingClickAnnotation) //[InkingForPDF] { lastMousePosition = e.GetPosition(this); InvalidateVisual(); } else if (movingAnnotations) //[InkingForPDF] { lastMousePosition = e.GetPosition(this); InvalidateVisual(); } else if (MouseMode == TMouseMode.eMouseMarkMode && selectedAnnotations != null && selectedAnnotations.Count > 0) //[InkingForPDF] { int page = 0; try { PdfSourcePoint point = controller.TransformOnScreenToOnNearestPage(new PdfTargetPoint(e.GetPosition(this)), ref page); if (page > 0 && selectedAnnotations.Any(annot => annot.ContainsPoint(point) && annot.PageNr == page)) { this.Cursor = Cursors.SizeAll; this.mouseOverAnnotationInMarkMode = true; } else { this.Cursor = Cursors.Cross; this.mouseOverAnnotationInMarkMode = false; } } catch (ArgumentOutOfRangeException ex) { Logger.LogError("Could not transform the point into page coordinates"); Logger.LogException(ex); this.Cursor = Cursors.Cross; } } e.Handled = true; }
public void Search(string toSearch, int startPage, int startIndex) { if (toSearch == null || toSearch.Length == 0) { SearchCompleted(null); return; } //Console.WriteLine(String.Format("Searching \"{0}\" on page {1} index {2}", toSearch, startPage, startIndex)); bool repeatedSearch = String.Compare(lastSearchString, toSearch) == 0; lastSearchString = toSearch; if (repeatedSearch) { //offset the startindex to not get the same result again startIndex += previous ? -1 : 1; } else { if (!useRegex) { toSearch = Regex.Replace(toSearch, "\\\\ ", " "); // make literal white space characters to whitespaces ("\ " to " ") toSearch = Regex.Escape(toSearch); //escape regex literals ("." to "\.") } toSearch = Regex.Replace(toSearch, "\\\\ ", " "); // make literal white space characters to whitespaces ("\ " to " ") toSearch = Regex.Replace(toSearch, "\\s+", "\\s+"); //make any positive number of whitespaces match any other positive number of whitespaces (YES, this line DOES do something) RegexOptions regexOptions = RegexOptions.Singleline; if (!matchCase) { regexOptions = regexOptions | RegexOptions.IgnoreCase; } regex = new Regex(toSearch, regexOptions); startIndex = previous ? int.MaxValue : 0; //TODO later i will maybe want to start searching from specific location, instead from start of page matchesCache.Clear(); } bool firstTime = true;//marks the first time we visit the startPage, if we visit it a second time, we have not found anything //iterate through pages that have to be loaded for (int currentPage = startPage; ; currentPage += previous ? -1 : 1) { if (currentPage > canvas.PageCount) { if (!wrap) { //We have not found anything and reached the end SearchCompleted(null); return; } //We have reached the end of the document -> continue at beginning currentPage = 1; startIndex = 0; } else if (currentPage <= 0) { if (!wrap) { //We have not found anything backwards and reached the beginning SearchCompleted(null); return; } //We have reached the beginning of the document -> continue at end currentPage = canvas.PageCount; startIndex = int.MaxValue; } //load textfragments StringBuilder searchableText = new StringBuilder(""); for (int page = currentPage; page <= canvas.PageCount && page < currentPage + maxNumberOfPagesPreloaded; page++) { try { loadPage(page); } catch (PdfNoFileOpenedException) { //the file is not open anymore SearchCompleted(null); return; } searchableText.Append(searchableTextCache[page]); } if (!matchesCache.ContainsKey(currentPage)) { // we need to search for matches on this page, as they are not in cache yet MatchCollection matches = regex.Matches(searchableText.ToString()); matchesCache.Add(currentPage, new List <Match>()); foreach (Match match in matches) { if (match.Index < searchableTextCache[currentPage].Length) { matchesCache[currentPage].Add(match); } else { break; } } } //find the next match we are interested in starting from startIndex Match nextMatch = null; foreach (Match match in matchesCache[currentPage]) { if (!previous) { if (match.Index >= startIndex && (nextMatch == null || match.Index < nextMatch.Index)) { nextMatch = match; } } else { if (match.Index <= startIndex && (nextMatch == null || match.Index > nextMatch.Index)) { nextMatch = match; } } } if (nextMatch == null) { //no match on this page, go to next (or previous) page //special case if we have checked the entire document and reached the startPage again and still not found anything: give up. if (currentPage == startPage) { if (!firstTime) { //if we are on startPage for the second time, we have found nothing and should stop looking SearchCompleted(null); return; } firstTime = false; } //the startIndex is not valid if we are not on the first page startIndex = previous ? int.MaxValue : 0; continue; } else { //We found a match: we have to translate our match to textfragments for highlighting //create the empty lists for the textRects, that will be highlighted Dictionary <int, IList <PdfSourceRect> > textRects = new Dictionary <int, IList <PdfSourceRect> >(); textRects.Add(currentPage, new List <PdfSourceRect>()); int fragmentIndex = 0; int page = currentPage; int previousPagesTextLength = 0; //the length of all text on previous pages (this is used to offset the index to be relative to page instead of currentPage) //iterate over textfragments and extract their textRects while (textFragmentIndexDict.ContainsKey(page)) { PdfTextFragment fragment = null; //offset the fragmentIndex to be relative to page for getting correct fragment fragment = textFragmentIndexDict[page][fragmentIndex - previousPagesTextLength]; int startSubIndex = 0; //index of start of match in relation to firstTextFragment int endSubIndex = fragment.Text.Length; //index of end of match in relation to firstTextFragment if (nextMatch.Index > fragmentIndex + fragment.Text.Length) { //we have not found the region with the match yet //carry on with next fragment } else if (nextMatch.Index + nextMatch.Length < fragmentIndex) { //We have passed the fragment already and can end searching break; } else { //we are looking at a fragment that is intersecting with the match if (fragmentIndex < nextMatch.Index) { //the match starts in the middle of this textfragment startSubIndex = nextMatch.Index - fragmentIndex; } if (fragmentIndex + fragment.Text.Length > nextMatch.Index + nextMatch.Length) { //the match ends before the end of the fragment endSubIndex = nextMatch.Index + nextMatch.Length - fragmentIndex; } PdfSourceRect rect = null; if (startSubIndex == 0 && endSubIndex == fragment.Text.Length) { rect = fragment.RectOnUnrotatedPage; } else { //rect = canvas.DocumentManager.RequestTextFragmentSubRect(fragment, startSubIndex, endSubIndex - startSubIndex).Wait(); rect = fragment.GetRectOnUnrotatedPage(startSubIndex, endSubIndex); } textRects[page].Add(rect); } //increment index and if necessary page fragmentIndex += fragment.Text.Length + 1; if (!textFragmentIndexDict[page].ContainsKey(fragmentIndex - previousPagesTextLength)) { if (page == canvas.PageCount) { //we reached the end of the document: end here break; } else { //There are no more fragments on page: go to the next page previousPagesTextLength += searchableTextCache[page].Length; page++; textRects.Add(page, new List <PdfSourceRect>()); } } }//end of iteration over textFragments SearchCompleted(new SearchResult(currentPage, textRects, nextMatch.Index, nextMatch.Value)); //Console.WriteLine(String.Format("Found match on page {0} with index {1}", currentPage, nextMatch.Index)); return; } /* * if (!previous) * startIndex = 0; * else * startIndex = int.MaxValue;*/ }//end for all pages }