Пример #1
0
        public Dictionary<int, string> ExecuteHighlight(BackgroundWorker backgroundWorker)
        {
            OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + originalFilePath);

            //if the document is open, close it firstly
            System.Diagnostics.Process[] processes = System.Diagnostics.Process.GetProcessesByName("WINWORD");
            if (processes != null)
            {
                if (processes.Length > 0)
                {
                    string targetDocPath = "";
                    int dirIndex = highlightedFilePath.LastIndexOf("\\");
                    if (dirIndex > 0)
                    {
                        targetDocPath = highlightedFilePath.Substring(dirIndex + 2);
                    }
                    foreach (System.Diagnostics.Process process in processes)
                    {
                        string temp = process.MainWindowTitle.ToString();
                        if (temp.Length == 0)
                        {
                            process.Kill();
                        }
                        else if (temp.Contains(targetDocPath))
                        {
                            process.Kill();
                            System.IO.File.Delete(highlightedFilePath);
                        }
                        
                    }
                }
            }

            Dictionary<int, string> pageContents = new Dictionary<int, string>();
            var app = new Microsoft.Office.Interop.Word.Application();

            app.Visible = false;
            object readOnly = false;
            object missing = System.Reflection.Missing.Value;
            var doc = app.Documents.Open(this.originalFilePath, missing, readOnly);

            int pageNum = doc.Content.ComputeStatistics(Microsoft.Office.Interop.Word.WdStatistic.wdStatisticPages); //doc page

            List<string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName);

            AddUserSearchTerms(topicTerms);

            //identify each word
            for (int p = 1; p <= pageNum; p++)
            {
                OutputMg.OutputContent(backgroundWorker, "Parsing page: " + p);

                string pageHighlight = "";

                object what = WdGoToItem.wdGoToPage;
                object which = WdGoToDirection.wdGoToAbsolute;
                object nextPage = p + 1;
                Range startRange;
                Range endRange;

                try
                {
                    startRange = app.Selection.GoTo(ref what, ref which, p, ref missing);
                    endRange = app.Selection.GoTo(what, which, nextPage, missing);
                }
                catch (Exception)
                {
                    doc.Close();
                    app.Quit();
                    MessageBox.Show("This document is locked by author. We cannot execute highlight", "Failed", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                    break;
                }
                if (startRange.Start == endRange.Start)
                {
                    which = WdGoToDirection.wdGoToLast;
                    what = WdGoToItem.wdGoToLine;
                    endRange = app.Selection.GoTo(what, which, nextPage, missing);
                }

                endRange.SetRange(startRange.Start, endRange.End);

                foreach (Paragraph field in endRange.Paragraphs)
                {
                    Range fieldRange = field.Range;
                    string paraText = fieldRange.Text.ToLower();

                    if (paraText.Length == 0)
                    {
                        continue;
                    }
                    else
                    {
                        foreach (string topicTerm in topicTerms)
                        {
                            if (paraText.Contains(topicTerm) || paraText.Contains(topicTerm + "s"))
                            {
                                fieldRange.HighlightColorIndex = WdColorIndex.wdYellow;
                                pageHighlight += paraText + "\t";
                                break;
                            }
                        }
                    }
                }
                pageContents.Add(p, pageHighlight);
            }

            doc.SaveAs2(this.highlightedFilePath);
            doc.Close();
            app.Quit();

            return pageContents;
        }
Пример #2
0
        public Dictionary <int, string> ExecuteHighlight(BackgroundWorker backgroundWorker)
        {
            OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + pdfFilePath);

            List <string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName);

            AddUserSearchTerms(topicTerms);

            string origiFile = pdfFilePath;

            //Create a new file from our test file with highlighting
            string highLightFile = highlightedPDFPath;

            int pdfNum = 0;

            PdfReader reader = new PdfReader(origiFile);

            using (FileStream fs = new FileStream(highLightFile, FileMode.Create, FileAccess.Write, FileShare.None))
            {
                using (PdfStamper stamper = new PdfStamper(reader, fs))
                {
                    using (var r = new PdfReader(origiFile))
                    {
                        pdfNum = r.NumberOfPages;
                        string ex = "";
                        ITextExtractionStrategy strategy;

                        for (int i = 1; i <= pdfNum; i++)
                        {
                            OutputMg.OutputContent(backgroundWorker, "Parsing page: " + i);

                            Rectangle pageRect = r.GetPageSize(i);

                            Document doc = new Document(pageRect);

                            float leftMargin  = doc.LeftMargin;
                            float rightMargin = doc.RightMargin;
                            float lineWidth   = pageRect.Width;

                            var textPos = new FutherLocationTextExtractionStrategy(topicTerms);

                            //Create an instance of our strategy
                            ex = PdfTextExtractor.GetTextFromPage(r, i, textPos); //store the text and the position info in textPos
                            List <iTextSharp.text.Rectangle> quadList = new List <iTextSharp.text.Rectangle>();

                            foreach (var p in textPos.myPoints)
                            {
                                string p_text = p.Text;

                                iTextSharp.text.Rectangle rect = p.Rect;


                                quadList.Add(rect);//collect the coordination of keywords
                            }

                            List <string> pageContent = new List <string>();

                            if (quadList.Count > 0)
                            {
                                List <iTextSharp.text.Rectangle> orderedRect = orderRectByBottom(quadList);
                                //merge and adjust the rectangle, highlight the adjusted rect
                                List <iTextSharp.text.Rectangle> adjustedRect = adjustRect(orderedRect, lineWidth, leftMargin);
                                foreach (Rectangle rect in adjustedRect)
                                {
                                    //Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces
                                    //the co-ordination of four points
                                    float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top };

                                    ////Create our hightlight
                                    PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad);

                                    ////Set the color
                                    highlight.Color = BaseColor.YELLOW;

                                    stamper.AddAnnotation(highlight, i); // i is the page

                                    //get the text of highlighting
                                    RenderFilter[] filter = { new RegionTextRenderFilter(rect) };
                                    strategy = new MyFilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
                                    string text = PdfTextExtractor.GetTextFromPage(reader, i, strategy).Trim();
                                    if (!pageContent.Contains(text))
                                    {
                                        pageContent.Add(text);
                                    }
                                }
                                StringBuilder sb = new StringBuilder();

                                foreach (string tmp in pageContent)
                                {
                                    sb.AppendLine(tmp);
                                }

                                pageContents.Add(i, sb.ToString());
                            }
                        }
                    }
                }
            }

            return(pageContents);
        }