public Dictionary<int, string> ExecuteHighlight(BackgroundWorker backgroundWorker) { OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + originalFilePath); //if the document is open, close it firstly System.Diagnostics.Process[] processes = System.Diagnostics.Process.GetProcessesByName("WINWORD"); if (processes != null) { if (processes.Length > 0) { string targetDocPath = ""; int dirIndex = highlightedFilePath.LastIndexOf("\\"); if (dirIndex > 0) { targetDocPath = highlightedFilePath.Substring(dirIndex + 2); } foreach (System.Diagnostics.Process process in processes) { string temp = process.MainWindowTitle.ToString(); if (temp.Length == 0) { process.Kill(); } else if (temp.Contains(targetDocPath)) { process.Kill(); System.IO.File.Delete(highlightedFilePath); } } } } Dictionary<int, string> pageContents = new Dictionary<int, string>(); var app = new Microsoft.Office.Interop.Word.Application(); app.Visible = false; object readOnly = false; object missing = System.Reflection.Missing.Value; var doc = app.Documents.Open(this.originalFilePath, missing, readOnly); int pageNum = doc.Content.ComputeStatistics(Microsoft.Office.Interop.Word.WdStatistic.wdStatisticPages); //doc page List<string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName); AddUserSearchTerms(topicTerms); //identify each word for (int p = 1; p <= pageNum; p++) { OutputMg.OutputContent(backgroundWorker, "Parsing page: " + p); string pageHighlight = ""; object what = WdGoToItem.wdGoToPage; object which = WdGoToDirection.wdGoToAbsolute; object nextPage = p + 1; Range startRange; Range endRange; try { startRange = app.Selection.GoTo(ref what, ref which, p, ref missing); endRange = app.Selection.GoTo(what, which, nextPage, missing); } catch (Exception) { doc.Close(); app.Quit(); MessageBox.Show("This document is locked by author. We cannot execute highlight", "Failed", MessageBoxButtons.OK, MessageBoxIcon.Warning); break; } if (startRange.Start == endRange.Start) { which = WdGoToDirection.wdGoToLast; what = WdGoToItem.wdGoToLine; endRange = app.Selection.GoTo(what, which, nextPage, missing); } endRange.SetRange(startRange.Start, endRange.End); foreach (Paragraph field in endRange.Paragraphs) { Range fieldRange = field.Range; string paraText = fieldRange.Text.ToLower(); if (paraText.Length == 0) { continue; } else { foreach (string topicTerm in topicTerms) { if (paraText.Contains(topicTerm) || paraText.Contains(topicTerm + "s")) { fieldRange.HighlightColorIndex = WdColorIndex.wdYellow; pageHighlight += paraText + "\t"; break; } } } } pageContents.Add(p, pageHighlight); } doc.SaveAs2(this.highlightedFilePath); doc.Close(); app.Quit(); return pageContents; }
public Dictionary <int, string> ExecuteHighlight(BackgroundWorker backgroundWorker) { OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + pdfFilePath); List <string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName); AddUserSearchTerms(topicTerms); string origiFile = pdfFilePath; //Create a new file from our test file with highlighting string highLightFile = highlightedPDFPath; int pdfNum = 0; PdfReader reader = new PdfReader(origiFile); using (FileStream fs = new FileStream(highLightFile, FileMode.Create, FileAccess.Write, FileShare.None)) { using (PdfStamper stamper = new PdfStamper(reader, fs)) { using (var r = new PdfReader(origiFile)) { pdfNum = r.NumberOfPages; string ex = ""; ITextExtractionStrategy strategy; for (int i = 1; i <= pdfNum; i++) { OutputMg.OutputContent(backgroundWorker, "Parsing page: " + i); Rectangle pageRect = r.GetPageSize(i); Document doc = new Document(pageRect); float leftMargin = doc.LeftMargin; float rightMargin = doc.RightMargin; float lineWidth = pageRect.Width; var textPos = new FutherLocationTextExtractionStrategy(topicTerms); //Create an instance of our strategy ex = PdfTextExtractor.GetTextFromPage(r, i, textPos); //store the text and the position info in textPos List <iTextSharp.text.Rectangle> quadList = new List <iTextSharp.text.Rectangle>(); foreach (var p in textPos.myPoints) { string p_text = p.Text; iTextSharp.text.Rectangle rect = p.Rect; quadList.Add(rect);//collect the coordination of keywords } List <string> pageContent = new List <string>(); if (quadList.Count > 0) { List <iTextSharp.text.Rectangle> orderedRect = orderRectByBottom(quadList); //merge and adjust the rectangle, highlight the adjusted rect List <iTextSharp.text.Rectangle> adjustedRect = adjustRect(orderedRect, lineWidth, leftMargin); foreach (Rectangle rect in adjustedRect) { //Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces //the co-ordination of four points float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top }; ////Create our hightlight PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad); ////Set the color highlight.Color = BaseColor.YELLOW; stamper.AddAnnotation(highlight, i); // i is the page //get the text of highlighting RenderFilter[] filter = { new RegionTextRenderFilter(rect) }; strategy = new MyFilteredTextRenderListener(new LocationTextExtractionStrategy(), filter); string text = PdfTextExtractor.GetTextFromPage(reader, i, strategy).Trim(); if (!pageContent.Contains(text)) { pageContent.Add(text); } } StringBuilder sb = new StringBuilder(); foreach (string tmp in pageContent) { sb.AppendLine(tmp); } pageContents.Add(i, sb.ToString()); } } } } } return(pageContents); }