private FileInfo RemoveTyposFromFile(FileInfo fileInfo) { AppendResult(fileInfo.DirectoryName, fileInfo.Name, "=========================================" + Environment.NewLine + "Remove Typos Start: " + DateTime.Now.ToString()); LogProvider provider = new LogProvider(); provider.OpenLog(fileInfo.FullName); TypoCorrectionDetector typoDetector = TypoCorrectionDetector.GetInstance(); var patterns = typoDetector.DetectAsPatternInstances(provider); // Save the results to a file. DetectionResult result = new DetectionResult(provider.LogPath, patterns); result.SaveToFile(GetSaveFileName(fileInfo.DirectoryName, fileInfo.Name)); result.ExportToCSV(GetSaveFileName(fileInfo.DirectoryName, fileInfo.Name, "csv")); var xmlDoc = new XmlDocument(); xmlDoc.Load(fileInfo.FullName); var documentChanges = provider.LoggedEvents.OfType <DocumentChange>().ToList(); // This should be done in reverse order, to process consecutive typo corrections correctly. foreach (PatternInstance pattern in patterns.Reverse()) { // Determine the type int startIndex = documentChanges.IndexOf(pattern.PrimaryEvent as DocumentChange); // Type 1: Insert -> Delete -> Insert if (documentChanges[startIndex + 1] is Delete) { ProcessType1(xmlDoc, documentChanges, startIndex); } // Type 2: Insert -> Replace else if (documentChanges[startIndex + 1] is Replace) { ProcessType2(xmlDoc, documentChanges, startIndex); } } string newPath = Path.Combine(fileInfo.DirectoryName, _settings.Prefix + Path.GetFileNameWithoutExtension(fileInfo.Name) + _settings.Postfix + fileInfo.Extension); xmlDoc.Save(newPath); AppendResult(fileInfo.DirectoryName, fileInfo.Name, string.Format("{0} typo corrections have been removed" + Environment.NewLine, patterns.Count())); return(new FileInfo(newPath)); }
private string RemoveTyposFromFile(FileInfo fileInfo) { LogProvider provider = new LogProvider(); provider.OpenLog(fileInfo.FullName); TypoCorrectionDetector typoDetector = TypoCorrectionDetector.GetInstance(); var patterns = typoDetector.DetectAsPatternInstances(provider); var xmlDoc = new XmlDocument(); xmlDoc.Load(fileInfo.FullName); var documentChanges = provider.LoggedEvents.OfType <DocumentChange>().ToList(); // This should be done in reverse order, to process consecutive typo corrections correctly. foreach (PatternInstance pattern in patterns.Reverse()) { // Determine the type int startIndex = documentChanges.IndexOf(pattern.PrimaryEvent as DocumentChange); // Type 1: Insert -> Delete -> Insert if (documentChanges[startIndex + 1] is Delete) { ProcessType1(xmlDoc, documentChanges, startIndex); } // Type 2: Insert -> Replace else if (documentChanges[startIndex + 1] is Replace) { ProcessType2(xmlDoc, documentChanges, startIndex); } } string newPath = Path.Combine(fileInfo.DirectoryName, Path.GetFileNameWithoutExtension(fileInfo.Name) + textPostfix.Text + fileInfo.Extension); xmlDoc.Save(newPath); return(string.Format("[{0}] {1} typo corrections have been removed", fileInfo.FullName, patterns.Count())); }