Exemplo n.º 1
0
        private void HighlightSelectedWords(Graphics g)
        {
            MyWord[] words = _selectedText[_currentPageNumber];

            // Highlight the selected words
            using (Brush brush = new SolidBrush(Color.FromArgb(128, SystemColors.Highlight)))
            {
                LeadMatrix  mm    = _rasterImageViewer.GetImageTransformWithDpi(true);
                Matrix      m     = new Matrix((float)mm.M11, (float)mm.M12, (float)mm.M21, (float)mm.M22, (float)mm.OffsetX, (float)mm.OffsetY);
                Transformer trans = new Transformer(m);

                // Clip to the current image bounds
                RectangleF clipRect = new RectangleF(0, 0, _rasterImageViewer.Image.ImageWidth, _rasterImageViewer.Image.ImageHeight);
                clipRect = trans.RectangleToPhysical(clipRect);
                g.SetClip(clipRect);

                LeadRect lineBounds = LeadRect.Empty;

                foreach (MyWord word in words)
                {
                    // Get the word boundaries
                    if (lineBounds.IsEmpty)
                    {
                        lineBounds = word.Bounds;
                    }
                    else
                    {
                        lineBounds = LeadRect.Union(lineBounds, word.Bounds);
                    }

                    if (word.IsEndOfLine)
                    {
                        // Highlight this line
                        HighlightLine(g, trans, brush, lineBounds);
                        lineBounds = LeadRect.Empty;
                    }
                }

                if (!lineBounds.IsEmpty)
                {
                    HighlightLine(g, trans, brush, lineBounds);
                }
            }
        }
Exemplo n.º 2
0
        public static Dictionary <int, MyWord[]> BuildWord(PDFDocument document)
        {
            Dictionary <int, MyWord[]> pageWords = new Dictionary <int, MyWord[]>();

            for (int pageNumber = 1; pageNumber <= document.Pages.Count; pageNumber++)
            {
                List <MyWord> words = new List <MyWord>();

                PDFDocumentPage   page    = document.Pages[pageNumber - 1];
                IList <PDFObject> objects = page.Objects;
                if (objects != null && objects.Count > 0)
                {
                    int objectIndex = 0;
                    int objectCount = objects.Count;

                    // Loop through all the objects
                    while (objectIndex < objectCount)
                    {
                        // Find the total bounding rectangle, begin and end index of the next word
                        LeadRect wordBounds       = LeadRect.Empty;
                        int      firstObjectIndex = objectIndex;

                        // Loop till we reach EndOfWord or reach the end of the objects
                        bool more = true;
                        while (more)
                        {
                            PDFObject obj = objects[objectIndex];

                            // Must be text and not a white character
                            if (obj.ObjectType == PDFObjectType.Text && !Char.IsWhiteSpace(obj.Code))
                            {
                                // Add the bounding rectangle of this object
                                PDFRect  temp         = page.ConvertRect(PDFCoordinateType.Pdf, PDFCoordinateType.Pixel, obj.Bounds);
                                LeadRect objectBounds = LeadRect.FromLTRB((int)temp.Left, (int)temp.Top, (int)temp.Right, (int)temp.Bottom);

                                if (wordBounds.IsEmpty)
                                {
                                    wordBounds = objectBounds;
                                }
                                else
                                {
                                    wordBounds = LeadRect.Union(wordBounds, objectBounds);
                                }
                            }
                            else
                            {
                                firstObjectIndex = objectIndex + 1;
                            }

                            objectIndex++;
                            more = (objectIndex < objectCount) && !obj.TextProperties.IsEndOfWord && !obj.TextProperties.IsEndOfLine;
                        }

                        if (firstObjectIndex == objectIndex)
                        {
                            continue;
                        }

                        // From the begin and end index, collect the characters into a string
                        StringBuilder sb = new StringBuilder();
                        for (int i = firstObjectIndex; i < objectIndex; i++)
                        {
                            if (objects[i].ObjectType == PDFObjectType.Text)
                            {
                                sb.Append(objects[i].Code);
                            }
                        }

                        // Add this word to the list

                        PDFObject lastObject = objects[objectIndex - 1];

                        MyWord word = new MyWord();
                        word.Value       = sb.ToString();
                        word.Bounds      = wordBounds;
                        word.IsEndOfLine = lastObject.TextProperties.IsEndOfLine;

                        words.Add(word);
                    }
                }

                // Add "IsEndOfLine" to the last word in the page, just in case it does not have it
                if (words.Count > 0)
                {
                    MyWord word = words[words.Count - 1];
                    word.IsEndOfLine       = true;
                    words[words.Count - 1] = word;
                }

                pageWords.Add(pageNumber, words.ToArray());
            }

            return(pageWords);
        }