private void HighlightSelectedWords(Graphics g) { MyWord[] words = _selectedText[_currentPageNumber]; // Highlight the selected words using (Brush brush = new SolidBrush(Color.FromArgb(128, SystemColors.Highlight))) { LeadMatrix mm = _rasterImageViewer.GetImageTransformWithDpi(true); Matrix m = new Matrix((float)mm.M11, (float)mm.M12, (float)mm.M21, (float)mm.M22, (float)mm.OffsetX, (float)mm.OffsetY); Transformer trans = new Transformer(m); // Clip to the current image bounds RectangleF clipRect = new RectangleF(0, 0, _rasterImageViewer.Image.ImageWidth, _rasterImageViewer.Image.ImageHeight); clipRect = trans.RectangleToPhysical(clipRect); g.SetClip(clipRect); LeadRect lineBounds = LeadRect.Empty; foreach (MyWord word in words) { // Get the word boundaries if (lineBounds.IsEmpty) { lineBounds = word.Bounds; } else { lineBounds = LeadRect.Union(lineBounds, word.Bounds); } if (word.IsEndOfLine) { // Highlight this line HighlightLine(g, trans, brush, lineBounds); lineBounds = LeadRect.Empty; } } if (!lineBounds.IsEmpty) { HighlightLine(g, trans, brush, lineBounds); } } }
public static Dictionary <int, MyWord[]> BuildWord(PDFDocument document) { Dictionary <int, MyWord[]> pageWords = new Dictionary <int, MyWord[]>(); for (int pageNumber = 1; pageNumber <= document.Pages.Count; pageNumber++) { List <MyWord> words = new List <MyWord>(); PDFDocumentPage page = document.Pages[pageNumber - 1]; IList <PDFObject> objects = page.Objects; if (objects != null && objects.Count > 0) { int objectIndex = 0; int objectCount = objects.Count; // Loop through all the objects while (objectIndex < objectCount) { // Find the total bounding rectangle, begin and end index of the next word LeadRect wordBounds = LeadRect.Empty; int firstObjectIndex = objectIndex; // Loop till we reach EndOfWord or reach the end of the objects bool more = true; while (more) { PDFObject obj = objects[objectIndex]; // Must be text and not a white character if (obj.ObjectType == PDFObjectType.Text && !Char.IsWhiteSpace(obj.Code)) { // Add the bounding rectangle of this object PDFRect temp = page.ConvertRect(PDFCoordinateType.Pdf, PDFCoordinateType.Pixel, obj.Bounds); LeadRect objectBounds = LeadRect.FromLTRB((int)temp.Left, (int)temp.Top, (int)temp.Right, (int)temp.Bottom); if (wordBounds.IsEmpty) { wordBounds = objectBounds; } else { wordBounds = LeadRect.Union(wordBounds, objectBounds); } } else { firstObjectIndex = objectIndex + 1; } objectIndex++; more = (objectIndex < objectCount) && !obj.TextProperties.IsEndOfWord && !obj.TextProperties.IsEndOfLine; } if (firstObjectIndex == objectIndex) { continue; } // From the begin and end index, collect the characters into a string StringBuilder sb = new StringBuilder(); for (int i = firstObjectIndex; i < objectIndex; i++) { if (objects[i].ObjectType == PDFObjectType.Text) { sb.Append(objects[i].Code); } } // Add this word to the list PDFObject lastObject = objects[objectIndex - 1]; MyWord word = new MyWord(); word.Value = sb.ToString(); word.Bounds = wordBounds; word.IsEndOfLine = lastObject.TextProperties.IsEndOfLine; words.Add(word); } } // Add "IsEndOfLine" to the last word in the page, just in case it does not have it if (words.Count > 0) { MyWord word = words[words.Count - 1]; word.IsEndOfLine = true; words[words.Count - 1] = word; } pageWords.Add(pageNumber, words.ToArray()); } return(pageWords); }