private void TryToLoadCompSummary(BackgroundWorker backgroundWorker, string searchTermFilePath, string cleanComponentDir) { OutputMg.OutputContent(backgroundWorker, "Start to combine terms"); CombineTerms termCombiner = new CombineTerms(); Dictionary <string, List <string> > compTerms = termCombiner.GetCombinedTerms(searchTermFilePath, null); OutputMg.OutputContent(backgroundWorker, "Finished combining terms"); OutputMg.OutputContent(backgroundWorker, "Start to generate summary"); foreach (string comp in compTerms.Keys) { OutputMg.OutputContent(backgroundWorker, "-- Start to generate summary for " + comp); List <string> compTermList = compTerms[comp]; //get the sentences of components string paraFile = cleanComponentDir + comp + ".txt"; GenerateComponentSummary sentenceSplitter = new GenerateComponentSummary(); List <string> candidateSentences = sentenceSplitter.SplitSingleFileSentence(paraFile); MMRSummary summaryGenerator = new MMRSummary(); string summary = summaryGenerator.GenerateSummary(compTermList, candidateSentences); mCompSummDictionary.Add(comp, summary); OutputMg.OutputContent(backgroundWorker, "-- Finished generating summary for " + comp); } OutputMg.OutputContent(backgroundWorker, "Finished generating summary"); }
public void GenerateCompSummary(BackgroundWorker backgroundWorker, string searchTermFilePath, string cleanComponentDir, string compName) { OutputMg.OutputContent(backgroundWorker, "Start to generate summary"); OutputMg.OutputContent(backgroundWorker, "-- Start to generate summary for " + compName); if (mCompTerms == null) { OutputMg.OutputContent(backgroundWorker, "Start to combine terms"); CombineTerms termCombiner = new CombineTerms(); mCompTerms = termCombiner.GetCombinedTerms(searchTermFilePath, null); OutputMg.OutputContent(backgroundWorker, "Finished combining terms"); } if (mCompTerms == null || !mCompTerms.ContainsKey(compName) || mCompSummDictionary.ContainsKey(compName)) { return; } List <string> compTermList = mCompTerms[compName]; //get the sentences of components string paraFile = cleanComponentDir + compName + ".txt"; GenerateComponentSummary sentenceSplitter = new GenerateComponentSummary(); List <string> candidateSentences = sentenceSplitter.SplitSingleFileSentence(paraFile); if (!mCompSummDictionary.ContainsKey(compName)) { MMRSummary summaryGenerator = new MMRSummary(); string summary = summaryGenerator.GenerateSummary(compTermList, candidateSentences); mCompSummDictionary.Add(compName, summary); } OutputMg.OutputContent(backgroundWorker, "-- Finished generating summary for " + compName); OutputMg.OutputContent(backgroundWorker, "Finished generating summary"); }
private static bool ClearFolder(BackgroundWorker backgroundWorker) { try { FileMg.DirectoryDelete(FileMg.AutoTmtOutputFileDir, true); FileMg.DeleteTmtCacheFile(FileMg.AutoTmtDataFileDir); } catch { } if (Configures.GetAutoIsDeleteExistingFile()) { OutputMg.OutputHeader1(backgroundWorker, "Step 0", "Clear output folder"); OutputMg.OutputContent(backgroundWorker, "Start to clear"); try { FileMg.ClearAutoFolder(); FileMg.InitDataFolder(); } catch { OutputMg.OutputHeader1(backgroundWorker, "Failed", "Clear folder failed. Please try to run this tool as Administrator."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished clearing"); } return(true); }
/* * calculate the density per document */ //find the density of all components in each document. //files in two levels public void DoClumpingRank(BackgroundWorker backgroundWorker) { if (File.Exists(rankResult)) { File.Delete(rankResult); } OutputMg.OutputContent(backgroundWorker, "Start parsing topic terms"); string topicTermContent = FileOperators.ReadFileText(topicTermsFilePath).ToLower(); topicTerms = ParseTopicTerms.GetTopicTermValueList(topicTermContent); OutputMg.OutputContent(backgroundWorker, "Finished parsing topic terms."); OutputMg.OutputContent(backgroundWorker, "Start ranking topic"); docTopicDensityMap = new Dictionary <string, Dictionary <string, float> >(); fileNameTopicDensityMap = new Dictionary <string, Dictionary <string, float> >(); string[] fileEntities = Directory.GetFiles(txtCleanFileDir); CalDocDensityMap(fileEntities); string[] dirs = Directory.GetDirectories(txtCleanFileDir); foreach (string dir in dirs) { string[] subFileEntities = Directory.GetFiles(dir); //CalDocDensityMap(subFileEntities); CalDocDensityMap(subFileEntities); } OutputMg.OutputContent(backgroundWorker, "Finished ranking topic"); OutputMg.OutputContent(backgroundWorker, "Start writing ranking topic"); WriteRankingResult(); OutputMg.OutputContent(backgroundWorker, "Finished writing ranking topic"); }
private static bool RankTopic(BackgroundWorker backgroundWorker) { OutputMg.OutputHeader1(backgroundWorker, "Step 4", "Rank Topic Relative Files"); //RankDocByClumping docRank = new RankDocByClumping(Constants.TopicLabelFileDir + Constants.TopicManualTermFileName, Constants.DefaultCleanTextFileDir, Constants.TopicLabelFileDir + Constants.TopicManualRelatedFileName); //docRank.DoClumpingRank(); OutputMg.OutputContent(backgroundWorker, "Start to rank topic related files."); //RankDocByClumpingImprove docRank = new RankDocByClumpingImprove(FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName, FileMg.ManualCleanTextFileDir, FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName); //docRank.DoClumpingRank(); RankDocByClumpingLessClumps docRank = new RankDocByClumpingLessClumps(FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName, FileMg.ManualCleanTextFileDir, FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName); docRank.DoClumpingRank(backgroundWorker); Console.WriteLine("output path:" + FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName); //TopicDocRank docRank = new TopicDocRank(FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName, FileMg.ManualCleanTextFileDir, FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName); //docRank.executeRank(); //RankingDocByClumpingCaleb docRank = new RankingDocByClumpingCaleb(FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName, FileMg.ManualCleanTextFileDir, FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName); //docRank.DoClumpingRank(); //DocumentRanking.VSMRankDoc.TopicDocRank docRank = new DocumentRanking.VSMRankDoc.TopicDocRank(FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName, FileMg.ManualCleanTextFileDir, FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName); //docRank.executeRank(); if (!File.Exists(FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to rank topic related files."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished ranking topic related files."); return(true); }
//searchTerms:C:\Users\xlian\MyPapers\Simmons\qualityRequirements\ProgramImpl\TopicManualTerms.txt //compRelatedFile:C:\Users\xlian\MyPapers\Simmons\qualityRequirements\ProgramImpl\TOpicManualRelatedFiles.txt public void IdentifyComponentPara(BackgroundWorker backgroundWorker, string searchTerms, string compRelatedFile, string paras, string storeFile) { OutputMg.OutputContent(backgroundWorker, "Start parsing component terms"); ParseCompTerms(searchTerms); OutputMg.OutputContent(backgroundWorker, "Parsing component terms has been done"); OutputMg.OutputContent(backgroundWorker, "Start parsing component files"); ParseCompFiles(compRelatedFile); OutputMg.OutputContent(backgroundWorker, "Parsing component file has been done"); OutputMg.OutputContent(backgroundWorker, "Start extracting component paragraphs"); ExtractCompParagraphs(compRelatedFile, paras, storeFile); OutputMg.OutputContent(backgroundWorker, "Extracting component paragraphs has been done"); }
private static bool ExtractSourceFiles(BackgroundWorker backgroundWorker) { OutputMg.OutputHeader1(backgroundWorker, "Step 2", "Extract Source File"); OutputMg.OutputContent(backgroundWorker, "Start to count number under " + FileMg.AutoSourceFileDir); int sourceFileNumber = FileMg.CountFileNumber(FileMg.AutoSourceFileDir); OutputMg.OutputContent(backgroundWorker, "Finished counting. Total file number is: " + sourceFileNumber); OutputMg.OutputContent(backgroundWorker, "Start to extract files"); ExtractMg.ExtractFile(FileMg.AutoSourceFileDir, FileMg.AutoExtractTextFileDir, FileMg.AutoCleanTextFileDir, FileMg.AutoSemiCleanTextFileDir, FileMg.AutoTmtDataFileDir + Constants.TmtInputFileName, backgroundWorker); OutputMg.OutputContent(backgroundWorker, "Finished extracting files"); return(true); }
private static bool ExtractComponent(BackgroundWorker backgroundWorker) { OutputMg.OutputHeader1(backgroundWorker, "Step 5", "Extract Component Files"); OutputMg.OutputContent(backgroundWorker, "Start to extract component files."); IdentifyComParagraphs relatedParaExtractor = new IdentifyComParagraphs(); relatedParaExtractor.IdentifyComponentPara(backgroundWorker, FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName, FileMg.ManualTopicLabelFileDir + Constants.TopicManualRelatedFileName, FileMg.ManualCleanTextFileDir, FileMg.ManualCleanComponentFileDir); OutputMg.OutputContent(backgroundWorker, "Finished extracting component files."); return(true); }
private static bool CopyRawFiles(BackgroundWorker backgroundWorker, string dirName) { OutputMg.OutputHeader1(backgroundWorker, "Step 1", "Copy Source File"); try { OutputMg.OutputContent(backgroundWorker, "Start to count number under " + dirName); int sourceFileNumber = FileMg.CountFileNumber(dirName); OutputMg.OutputContent(backgroundWorker, "Finished counting. Total file number is: " + sourceFileNumber); OutputMg.OutputContent(backgroundWorker, "Starting copy files"); int numCopy = FileMg.DirectoryCopy(dirName, FileMg.AutoSourceFileDir, true, false, backgroundWorker); OutputMg.OutputContent(backgroundWorker, "Finished copying files. Total file number is: " + numCopy); return(true); } catch { OutputMg.OutputHeader1(backgroundWorker, "Failed", "Copy files failed. Please try to run this tool as Administrator."); return(false); } }
private static bool RankTopic(BackgroundWorker backgroundWorker) { OutputMg.OutputHeader1(backgroundWorker, "Step 5", "Rank Topic Relative Files"); //RankDocByClumping docRand = new RankDocByClumping(Constants.TopicLabelFileDir + Constants.TopicTermFileName, Constants.DefaultCleanTextFileDir, Constants.TopicLabelFileDir + Constants.TopicRelatedFileName); //docRand.DoClumpingRank(); OutputMg.OutputContent(backgroundWorker, "Start to rank topic related files."); //RankDocByClumpingImprove docRank = new RankDocByClumpingImprove(FileMg.AutoTopicLabelFileDir + Constants.TopicTermFileName, FileMg.AutoCleanTextFileDir, FileMg.AutoTopicLabelFileDir + Constants.TopicRelatedFileName); //docRank.DoClumpingRank(); RankDocByClumpingLessClumps docRank = new RankDocByClumpingLessClumps(FileMg.AutoTopicLabelFileDir + Constants.TopicTermFileName, FileMg.AutoCleanTextFileDir, FileMg.AutoTopicLabelFileDir + Constants.TopicRelatedFileName); docRank.DoClumpingRank(backgroundWorker); if (!File.Exists(FileMg.AutoTopicLabelFileDir + Constants.TopicRelatedFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to rank topic related files."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished ranking topic related files."); return(true); }
private static bool LabelTopic(BackgroundWorker backgroundWorker, string modelFilePath) { OutputMg.OutputHeader1(backgroundWorker, "Step 3", "Label the Generated Topics"); if (!Directory.Exists(FileMg.ManualTopicLabelFileDir)) { Directory.CreateDirectory(FileMg.ManualTopicLabelFileDir); } OutputMg.OutputContent(backgroundWorker, "Start to parse manual model file."); PrepareManualModel prepareManualModel = new PrepareManualModel(modelFilePath, FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName); prepareManualModel.ParseManualModel(); if (!File.Exists(FileMg.ManualTopicLabelFileDir + Constants.TopicManualTermFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to parse manual model file, please check the file format."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished parsing manual model file."); return(true); }
private static bool GenerateTopicWithTmt(BackgroundWorker backgroundWorker) { OutputMg.OutputHeader1(backgroundWorker, "Step 3", "Generate Topic with TMT"); /* 1. Run Tmt Tool */ OutputMg.OutputContent(backgroundWorker, "Start to run TMT"); if (!TmtToolMg.RunTmtTool(Configures.GetAutoWizardTopicNumberArray(), Configures.GetAutoWizardMaxIteration())) { OutputMg.OutputContent(backgroundWorker, "Failed to startup TMT. Make sure you have authority to run command."); return(false); } /* 2. Check Tmt output */ int maxIter = Int32.Parse(Configures.GetAutoWizardMaxIteration()); string termDistZipFilePath = FileMg.AutoTmtOutputFileDir + string.Format(Constants.TmtOutputTopicTermDistZipFilePathTemp, maxIter.ToString("D5")); if (!File.Exists(termDistZipFilePath)) { OutputMg.OutputContent(backgroundWorker, "Cannot find the result file of topic modeling."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished generating topic and term distribution."); /* 3. Unzip Term distribution */ OutputMg.OutputContent(backgroundWorker, "Start to unzip term distribution file"); if (!UnzipToolMg.RunUnzipTool(termDistZipFilePath, FileMg.AutoRDataFileDir) || !File.Exists(FileMg.AutoRDataFileDir + Constants.RInputFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to unzip term distribution file. Make sure you have setup 7-zip."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished unzipping term distribution file."); return(true); }
private static bool LabelTopic(BackgroundWorker backgroundWorker) { OutputMg.OutputHeader1(backgroundWorker, "Step 4", "Label Topic"); /* 0. Create file directory */ if (!Directory.Exists(FileMg.AutoTopicLabelFileDir)) { Directory.CreateDirectory(FileMg.AutoTopicLabelFileDir); } /* 1. Get minimum topic number */ OutputMg.OutputContent(backgroundWorker, "Getting minimum topic number."); int minmumTopicNumber = PrepareTopicFile.GetMinimumTopicNumber(); int maxIteration = Int32.Parse(Configures.GetAutoWizardMaxIteration()); if (minmumTopicNumber == -1) { OutputMg.OutputContent(backgroundWorker, "Failed to get minimum topic number. Please check whether you run TMT successfully."); return(false); } /* 2. Run TMT */ OutputMg.OutputContent(backgroundWorker, "Minimum topic number is " + minmumTopicNumber); string summaryFilePath = FileMg.AutoTmtOutputFileDir + string.Format(Constants.TmtOutputSummaryFilePathTemp, maxIteration.ToString("D5")); string topicTermFilePath = FileMg.AutoTopicLabelFileDir + Constants.TopicTermFileName; OutputMg.OutputContent(backgroundWorker, "Start to generate topic terms file."); PrepareTopicFile.Execute(summaryFilePath, topicTermFilePath); //LabelDomainTopic.Execute(Constants.DefaultSourceFileDir, Constants.TopicLabelFileDir + Constants.TopicTermFileName, summaryFilePath, Constants.TopicLabelFileDir + Constants.TopicLabelFileName); if (!File.Exists(topicTermFilePath)) { OutputMg.OutputContent(backgroundWorker, "Failed to generate topic terms file."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished generating topic terms file."); /* 3. Run JNSP */ OutputMg.OutputContent(backgroundWorker, "Start to run JNSP tool."); if (!Directory.Exists(FileMg.AutoJNSPDataFileDir)) { Directory.CreateDirectory(FileMg.AutoJNSPDataFileDir); } JNSPToolMg.RunJNSPTool(); string jnspOutputFileName = FileMg.AutoJNSPDataFileDir + Constants.JnspOptionCNTFileName + Constants.JnspOptionWindowNumber + ".cnt"; if (!File.Exists(jnspOutputFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to run JNSP tool."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished running JNSP tool."); /* 4. Label topic */ OutputMg.OutputContent(backgroundWorker, "Start to label topic."); KDDLabel kddLabel = new KDDLabel(jnspOutputFileName, topicTermFilePath, summaryFilePath, FileMg.AutoTopicLabelFileDir + Constants.TopicLabelFileName); kddLabel.GenerateTopicLabel(); if (!File.Exists(FileMg.AutoTopicLabelFileDir + Constants.TopicLabelFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to label topic."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished labeling topic"); /* 5. Generate Similarity */ OutputMg.OutputContent(backgroundWorker, "Start to generate topic similarity."); TopicSim.CalTopicSimilarity(topicTermFilePath, FileMg.AutoTopicLabelFileDir + Constants.TopicSimilarityFileName); if (!File.Exists(FileMg.AutoTopicLabelFileDir + Constants.TopicLabelFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to generate topic similarity."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished generating topic similarity"); /* 6. Running R Tool */ OutputMg.OutputContent(backgroundWorker, "Start to generate BiTree with R tool."); RToolMg.RunRTool(); if (!File.Exists(FileMg.AutoRDataFileDir + Constants.ROutputFileName)) { OutputMg.OutputContent(backgroundWorker, "Failed to generate BiTree with R tool. Please make sure you have setup R tool."); return(false); } OutputMg.OutputContent(backgroundWorker, "Finished generating BiTree with R tool"); return(true); }
private static int ExecuteExtract(string sourceDirName, string destDirName, string cleanDirName, string semiCleanDirName, string tmtInputFilePath, BackgroundWorker backgroundWorker = null) { if (backgroundWorker != null) { OutputMg.OutputContent(backgroundWorker, "Extrating file from " + sourceDirName + " to " + destDirName); } // Get the subdirectories for the specified directory. DirectoryInfo dir = new DirectoryInfo(sourceDirName); if (!dir.Exists) { throw new DirectoryNotFoundException( "Source directory does not exist or could not be found: " + sourceDirName); } // If the destination directory doesn't exist, create it. if (!Directory.Exists(destDirName)) { Directory.CreateDirectory(destDirName); } if (!Directory.Exists(cleanDirName)) { Directory.CreateDirectory(cleanDirName); } // Get the files in the directory and copy them to the new location. FileInfo[] files = dir.GetFiles(); foreach (FileInfo file in files) { curDealFileIndex++; if (backgroundWorker != null) { OutputMg.OutputContent(backgroundWorker, "Extrating file " + file.Name, curCleanFileIndex); } try { if (file.Name.EndsWith(".pdf") || file.Name.EndsWith(".doc") || file.Name.EndsWith(".docx")) { string tempDestFileName = file.Name + ".txt"; string tempDestPath = System.IO.Path.Combine(destDirName, tempDestFileName); string tempCleanPath = System.IO.Path.Combine(cleanDirName, tempDestFileName); string tempSemiCleanPath = System.IO.Path.Combine(semiCleanDirName, tempDestFileName); if (!File.Exists(tempDestPath)) { if (file.Name.EndsWith(".pdf")) { ExtractPDF.ExecuteExtraction(file.FullName, tempDestPath); } else if (file.Name.EndsWith(".doc") || file.Name.EndsWith(".docx")) { ExtractWord.ExecuteWordExtraction(file.FullName, tempDestPath); } else if (file.Name.EndsWith(".ppt") || file.Name.EndsWith(".pptx")) { ExtractPPT.ExecuteExtraction(file.FullName, tempDestPath); } else if (file.Name.EndsWith(".xls") || file.Name.EndsWith(".xlsx")) { ExtractExcel.ExecuteExtraction(file.FullName, tempDestPath); } } //clean ExtractContent.Preprocess.SemiClean cleaner = new Preprocess.SemiClean(); cleaner.CleanDir(tempDestPath, tempCleanPath); //generate tmtInputFile.csv GenerateTmtInputFile(tempCleanPath, tmtInputFilePath); if (backgroundWorker != null) { OutputMg.OutputContent(backgroundWorker, "Successfuly Extrated file " + file.Name, curDealFileIndex); } } } catch (Exception ex) { OutputMg.OutputContent(backgroundWorker, "Failed to extract file " + file.Name + " with Exception: " + ex.Message, curDealFileIndex); } } DirectoryInfo[] dirs = dir.GetDirectories(); foreach (DirectoryInfo subdir in dirs) { string tempDestPath = System.IO.Path.Combine(destDirName, subdir.Name); string tempCleanPath = System.IO.Path.Combine(cleanDirName, subdir.Name); string tempSemiCleanPath = System.IO.Path.Combine(semiCleanDirName, subdir.Name); ExecuteExtract(subdir.FullName, tempDestPath, tempCleanPath, tempSemiCleanPath, tmtInputFilePath, backgroundWorker); } return(curDealFileIndex); }
public Dictionary<int, string> ExecuteHighlight(BackgroundWorker backgroundWorker) { OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + originalFilePath); //if the document is open, close it firstly System.Diagnostics.Process[] processes = System.Diagnostics.Process.GetProcessesByName("WINWORD"); if (processes != null) { if (processes.Length > 0) { string targetDocPath = ""; int dirIndex = highlightedFilePath.LastIndexOf("\\"); if (dirIndex > 0) { targetDocPath = highlightedFilePath.Substring(dirIndex + 2); } foreach (System.Diagnostics.Process process in processes) { string temp = process.MainWindowTitle.ToString(); if (temp.Length == 0) { process.Kill(); } else if (temp.Contains(targetDocPath)) { process.Kill(); System.IO.File.Delete(highlightedFilePath); } } } } Dictionary<int, string> pageContents = new Dictionary<int, string>(); var app = new Microsoft.Office.Interop.Word.Application(); app.Visible = false; object readOnly = false; object missing = System.Reflection.Missing.Value; var doc = app.Documents.Open(this.originalFilePath, missing, readOnly); int pageNum = doc.Content.ComputeStatistics(Microsoft.Office.Interop.Word.WdStatistic.wdStatisticPages); //doc page List<string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName); AddUserSearchTerms(topicTerms); //identify each word for (int p = 1; p <= pageNum; p++) { OutputMg.OutputContent(backgroundWorker, "Parsing page: " + p); string pageHighlight = ""; object what = WdGoToItem.wdGoToPage; object which = WdGoToDirection.wdGoToAbsolute; object nextPage = p + 1; Range startRange; Range endRange; try { startRange = app.Selection.GoTo(ref what, ref which, p, ref missing); endRange = app.Selection.GoTo(what, which, nextPage, missing); } catch (Exception) { doc.Close(); app.Quit(); MessageBox.Show("This document is locked by author. We cannot execute highlight", "Failed", MessageBoxButtons.OK, MessageBoxIcon.Warning); break; } if (startRange.Start == endRange.Start) { which = WdGoToDirection.wdGoToLast; what = WdGoToItem.wdGoToLine; endRange = app.Selection.GoTo(what, which, nextPage, missing); } endRange.SetRange(startRange.Start, endRange.End); foreach (Paragraph field in endRange.Paragraphs) { Range fieldRange = field.Range; string paraText = fieldRange.Text.ToLower(); if (paraText.Length == 0) { continue; } else { foreach (string topicTerm in topicTerms) { if (paraText.Contains(topicTerm) || paraText.Contains(topicTerm + "s")) { fieldRange.HighlightColorIndex = WdColorIndex.wdYellow; pageHighlight += paraText + "\t"; break; } } } } pageContents.Add(p, pageHighlight); } doc.SaveAs2(this.highlightedFilePath); doc.Close(); app.Quit(); return pageContents; }
/*copy the subdirectories and the related files*/ public static int ToolDirectoryCopy(string sourceDirName, string destDirName, bool copySubDirs, bool createDirs, BackgroundWorker backgroundWorker = null, int currentFileIndex = 0) { if (backgroundWorker != null) { OutputMg.OutputContent(backgroundWorker, "Copying file from " + sourceDirName + " to " + destDirName); } // Get the subdirectories for the specified directory. DirectoryInfo dir = new DirectoryInfo(sourceDirName); if (!dir.Exists) { throw new DirectoryNotFoundException( "Source directory does not exist or could not be found: " + sourceDirName); } DirectoryInfo[] dirs = dir.GetDirectories(); // If the destination directory doesn't exist, create it. if (!Directory.Exists(destDirName)) { Directory.CreateDirectory(destDirName); } // Get the files in the directory and copy them to the new location. FileInfo[] files = dir.GetFiles(); foreach (FileInfo file in files) { try { currentFileIndex++; OutputMg.OutputContent(backgroundWorker, "Copying file " + file.Name, currentFileIndex); string temppath = Path.Combine(destDirName, file.Name); if (!File.Exists(temppath)) { file.CopyTo(temppath, false); } //if (backgroundWorker != null) //{ // OutputMg.OutputContent(backgroundWorker, "Copying file " + file.Name, currentFileIndex); //} } catch (Exception ex) { if (backgroundWorker != null) { OutputMg.OutputContent(backgroundWorker, "Copying file failed with exception: " + ex.Message); } } } // If copying subdirectories, copy them and their contents to new location. if (copySubDirs) { foreach (DirectoryInfo subdir in dirs) { string temppath = Path.Combine(destDirName, subdir.Name); currentFileIndex = DirectoryCopy(subdir.FullName, temppath, copySubDirs, createDirs, backgroundWorker, currentFileIndex); } } return(currentFileIndex); }
public Dictionary <int, string> ExecuteHighlight(BackgroundWorker backgroundWorker) { OutputMg.OutputContent(backgroundWorker, "Starting highlight file " + pdfFilePath); List <string> topicTerms = ReadTargetTopicTerms.ParseTopicTerms(this.topicTermPath, this.targetTopicName); AddUserSearchTerms(topicTerms); string origiFile = pdfFilePath; //Create a new file from our test file with highlighting string highLightFile = highlightedPDFPath; int pdfNum = 0; PdfReader reader = new PdfReader(origiFile); using (FileStream fs = new FileStream(highLightFile, FileMode.Create, FileAccess.Write, FileShare.None)) { using (PdfStamper stamper = new PdfStamper(reader, fs)) { using (var r = new PdfReader(origiFile)) { pdfNum = r.NumberOfPages; string ex = ""; ITextExtractionStrategy strategy; for (int i = 1; i <= pdfNum; i++) { OutputMg.OutputContent(backgroundWorker, "Parsing page: " + i); Rectangle pageRect = r.GetPageSize(i); Document doc = new Document(pageRect); float leftMargin = doc.LeftMargin; float rightMargin = doc.RightMargin; float lineWidth = pageRect.Width; var textPos = new FutherLocationTextExtractionStrategy(topicTerms); //Create an instance of our strategy ex = PdfTextExtractor.GetTextFromPage(r, i, textPos); //store the text and the position info in textPos List <iTextSharp.text.Rectangle> quadList = new List <iTextSharp.text.Rectangle>(); foreach (var p in textPos.myPoints) { string p_text = p.Text; iTextSharp.text.Rectangle rect = p.Rect; quadList.Add(rect);//collect the coordination of keywords } List <string> pageContent = new List <string>(); if (quadList.Count > 0) { List <iTextSharp.text.Rectangle> orderedRect = orderRectByBottom(quadList); //merge and adjust the rectangle, highlight the adjusted rect List <iTextSharp.text.Rectangle> adjustedRect = adjustRect(orderedRect, lineWidth, leftMargin); foreach (Rectangle rect in adjustedRect) { //Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces //the co-ordination of four points float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top }; ////Create our hightlight PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad); ////Set the color highlight.Color = BaseColor.YELLOW; stamper.AddAnnotation(highlight, i); // i is the page //get the text of highlighting RenderFilter[] filter = { new RegionTextRenderFilter(rect) }; strategy = new MyFilteredTextRenderListener(new LocationTextExtractionStrategy(), filter); string text = PdfTextExtractor.GetTextFromPage(reader, i, strategy).Trim(); if (!pageContent.Contains(text)) { pageContent.Add(text); } } StringBuilder sb = new StringBuilder(); foreach (string tmp in pageContent) { sb.AppendLine(tmp); } pageContents.Add(i, sb.ToString()); } } } } } return(pageContents); }