Пример #1
        public Dictionary<int, string> ExecuteHighlight(BackgroundWorker backgroundWorker)
            OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + originalFilePath);

            //if the document is open, close it firstly
            System.Diagnostics.Process[] processes = System.Diagnostics.Process.GetProcessesByName("WINWORD");
            if (processes != null)
                if (processes.Length > 0)
                    string targetDocPath = "";
                    int dirIndex = highlightedFilePath.LastIndexOf("\\");
                    if (dirIndex > 0)
                        targetDocPath = highlightedFilePath.Substring(dirIndex + 2);
                    foreach (System.Diagnostics.Process process in processes)
                        string temp = process.MainWindowTitle.ToString();
                        if (temp.Length == 0)
                        else if (temp.Contains(targetDocPath))

            Dictionary<int, string> pageContents = new Dictionary<int, string>();
            var app = new Microsoft.Office.Interop.Word.Application();

            app.Visible = false;
            object readOnly = false;
            object missing = System.Reflection.Missing.Value;
            var doc = app.Documents.Open(this.originalFilePath, missing, readOnly);

            int pageNum = doc.Content.ComputeStatistics(Microsoft.Office.Interop.Word.WdStatistic.wdStatisticPages); //doc page

            List<string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName);


            //identify each word
            for (int p = 1; p <= pageNum; p++)
                OutputMg.OutputContent(backgroundWorker, "Parsing page: " + p);

                string pageHighlight = "";

                object what = WdGoToItem.wdGoToPage;
                object which = WdGoToDirection.wdGoToAbsolute;
                object nextPage = p + 1;
                Range startRange;
                Range endRange;

                    startRange = app.Selection.GoTo(ref what, ref which, p, ref missing);
                    endRange = app.Selection.GoTo(what, which, nextPage, missing);
                catch (Exception)
                    MessageBox.Show("This document is locked by author. We cannot execute highlight", "Failed", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                if (startRange.Start == endRange.Start)
                    which = WdGoToDirection.wdGoToLast;
                    what = WdGoToItem.wdGoToLine;
                    endRange = app.Selection.GoTo(what, which, nextPage, missing);

                endRange.SetRange(startRange.Start, endRange.End);

                foreach (Paragraph field in endRange.Paragraphs)
                    Range fieldRange = field.Range;
                    string paraText = fieldRange.Text.ToLower();

                    if (paraText.Length == 0)
                        foreach (string topicTerm in topicTerms)
                            if (paraText.Contains(topicTerm) || paraText.Contains(topicTerm + "s"))
                                fieldRange.HighlightColorIndex = WdColorIndex.wdYellow;
                                pageHighlight += paraText + "\t";
                pageContents.Add(p, pageHighlight);


            return pageContents;
Пример #2
        public Dictionary <int, string> ExecuteHighlight(BackgroundWorker backgroundWorker)
            OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + pdfFilePath);

            List <string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName);


            string origiFile = pdfFilePath;

            //Create a new file from our test file with highlighting
            string highLightFile = highlightedPDFPath;

            int pdfNum = 0;

            PdfReader reader = new PdfReader(origiFile);

            using (FileStream fs = new FileStream(highLightFile, FileMode.Create, FileAccess.Write, FileShare.None))
                using (PdfStamper stamper = new PdfStamper(reader, fs))
                    using (var r = new PdfReader(origiFile))
                        pdfNum = r.NumberOfPages;
                        string ex = "";
                        ITextExtractionStrategy strategy;

                        for (int i = 1; i <= pdfNum; i++)
                            OutputMg.OutputContent(backgroundWorker, "Parsing page: " + i);

                            Rectangle pageRect = r.GetPageSize(i);

                            Document doc = new Document(pageRect);

                            float leftMargin  = doc.LeftMargin;
                            float rightMargin = doc.RightMargin;
                            float lineWidth   = pageRect.Width;

                            var textPos = new FutherLocationTextExtractionStrategy(topicTerms);

                            //Create an instance of our strategy
                            ex = PdfTextExtractor.GetTextFromPage(r, i, textPos); //store the text and the position info in textPos
                            List <iTextSharp.text.Rectangle> quadList = new List <iTextSharp.text.Rectangle>();

                            foreach (var p in textPos.myPoints)
                                string p_text = p.Text;

                                iTextSharp.text.Rectangle rect = p.Rect;

                                quadList.Add(rect);//collect the coordination of keywords

                            List <string> pageContent = new List <string>();

                            if (quadList.Count > 0)
                                List <iTextSharp.text.Rectangle> orderedRect = orderRectByBottom(quadList);
                                //merge and adjust the rectangle, highlight the adjusted rect
                                List <iTextSharp.text.Rectangle> adjustedRect = adjustRect(orderedRect, lineWidth, leftMargin);
                                foreach (Rectangle rect in adjustedRect)
                                    //Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces
                                    //the co-ordination of four points
                                    float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top };

                                    ////Create our hightlight
                                    PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad);

                                    ////Set the color
                                    highlight.Color = BaseColor.YELLOW;

                                    stamper.AddAnnotation(highlight, i); // i is the page

                                    //get the text of highlighting
                                    RenderFilter[] filter = { new RegionTextRenderFilter(rect) };
                                    strategy = new MyFilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
                                    string text = PdfTextExtractor.GetTextFromPage(reader, i, strategy).Trim();
                                    if (!pageContent.Contains(text))
                                StringBuilder sb = new StringBuilder();

                                foreach (string tmp in pageContent)

                                pageContents.Add(i, sb.ToString());
