/// <summary> /// Populates result after indexer completes /// </summary> private void BackgroundWorker_WorkCompleted(object sender, RunWorkerCompletedEventArgs e) { ProgressValue = 0; IsIndexingInProgress = false; HasIndexingResult = true; NotifyPropertyChanged(() => HasIndexingResult); UpdateCanStartIndexing(); if (e.Error != null) { Status = e.Error.Message; return; } if (e.Cancelled) { Status = "Process Cancelled."; return; } IndexerResult result = (IndexerResult)e.Result; if (result == null) { Status = "Process did not return any result."; return; } IdentifierCount = result.GetSplitResultList().Count; DictionaryWordsCount = result.GetDictionaryWordList().Count; TokensCount = result.GetTokenList().Count; UnidentifiedWordsCount = result.GetUnidentifiedList().Count; DictionaryWords = string.Join(Environment.NewLine, result.GetDictionaryWordList().Keys.OrderBy(x => x).ThenBy(x => x.Length)); Tokens = string.Join(Environment.NewLine, result.GetTokenList().Keys.OrderBy(x => x).ThenBy(x => x.Length)); UnidentifiedWords = string.Join(Environment.NewLine, result.GetUnidentifiedList().Keys.OrderBy(x => x.Length).ThenBy(x => x)); CorrectedWords = string.Join(Environment.NewLine, result.GetCorrectionDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word)); StemmedWords = string.Join(Environment.NewLine, result.GetStemmedDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word)); NotifyPropertyChanged(() => IdentifierCount); NotifyPropertyChanged(() => DictionaryWordsCount); NotifyPropertyChanged(() => TokensCount); NotifyPropertyChanged(() => UnidentifiedWordsCount); NotifyPropertyChanged(() => DictionaryWords); NotifyPropertyChanged(() => UnidentifiedWords); NotifyPropertyChanged(() => Tokens); NotifyPropertyChanged(() => StemmedWords); NotifyPropertyChanged(() => CorrectedWords); _result = result; Status = ""; }
public void Export(string exportFolderPath) { exportFolderPath = exportFolderPath.TrimEnd('\\'); using (StreamWriter fileWriter = new StreamWriter(exportFolderPath + "\\" + _projectStat.Name + "_file.txt", true)) { SelectedFiles.ToList().ForEach(file => { var allWords = _result.GetDictionaryWordList().Where(x => x.Value.Contains(file)).Select(x => x.Key).OrderBy(x => x).ThenBy(x => x.Length) .Union(_result.GetTokenList().Where(x => x.Value.Contains(file)).Select(x => x.Key).OrderBy(x => x).ThenBy(x => x.Length)) .Union(_result.GetUnidentifiedList().Where(x => x.Value.Contains(file)).Select(x => x.Key).OrderBy(x => x.Length).ThenBy(x => x)) .Union(_result.GetCorrectionDictionary().Where(x => x.Value.IndexerFiles.Contains(file)).Select(x => x.Key).OrderBy(x => x)) .Union(_result.GetStemmedDictionary().OrderBy(x => x.Key).Where(x => x.Value.IndexerFiles.Contains(file)).Select(x => x.Key).OrderBy(x => x)); fileWriter.WriteLine(file.Name + " " + string.Join(" ", allWords)); fileWriter.Flush(); }); } File.WriteAllLines(exportFolderPath + "\\" + _projectStat.Name + "_tokens.txt", new[] { "Natural Words:" } .Union(_result.GetDictionaryWordList().Keys.OrderBy(x => x).ThenBy(x => x.Length)) .Union(new[] { "Abbreviations:" }) .Union(_result.GetTokenList().Keys.OrderBy(x => x).ThenBy(x => x.Length)) .Union(new[] { "Unidentified Words:" }) .Union(_result.GetUnidentifiedList().Keys.OrderBy(x => x.Length).ThenBy(x => x)) .Union(new[] { "Spell Checking:" }) .Union(_result.GetCorrectionDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word)) .Union(new[] { "Stemmed Words:" }) .Union(_result.GetStemmedDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word))); using (StreamWriter fileWriter = new StreamWriter(exportFolderPath + "\\" + _projectStat.Name + "_split.html", false)) { fileWriter.WriteLine("<!DOCTYPE HTML><html><head><style>span{margin: 5px;}.unidentified{color: red;}.identified{color: black;}.token{color: blue;}.misspelled{color: green}.stemmed{color: #AA3333;}</style></head><body>"); fileWriter.WriteLine("<h3>Color Index</h3>"); fileWriter.WriteLine("<span class='identified'>Natural word</span><br />"); fileWriter.WriteLine("<span class='unidentified'>Unidentified word</span><br />"); fileWriter.WriteLine("<span class='token'>Abbreviation</span><br />"); fileWriter.WriteLine("<span class='misspelled'>Misspelled word</span><br />"); fileWriter.WriteLine("<span class='stemmed'>Stemmed/Lemmatized word</span><br />"); fileWriter.WriteLine("<br />"); fileWriter.WriteLine("<h3>Splits</h3>"); StringBuilder stringBuilder = new StringBuilder(); _result.GetSplitResultList().ToList().ForEach(identifierSplitResult => { stringBuilder.Append("<div class='"); if (identifierSplitResult.Splits.Any(x => x.SplitIdentification == SplitIdentification.Unidentified)) { stringBuilder.Append("has-unidentified"); } if (identifierSplitResult.Splits.Any(x => x.SplitIdentification == SplitIdentification.Token || x.SplitIdentification == SplitIdentification.MergedToken)) { stringBuilder.Append("has-token"); } if (identifierSplitResult.Splits.Any(x => x.SplitIdentification == SplitIdentification.TokenMisspelled || x.SplitIdentification == SplitIdentification.WordMisspelled)) { stringBuilder.Append("has-misspelled"); } stringBuilder.Append("'>" + identifierSplitResult.Identifier + ": "); identifierSplitResult.Splits.ToList().ForEach(split => { switch (split.SplitIdentification) { case SplitIdentification.Identified: stringBuilder.Append("<span class='identified'>"); break; case SplitIdentification.Unidentified: stringBuilder.Append("<span class='unidentified'>"); break; case SplitIdentification.MergedToken: case SplitIdentification.Token: case SplitIdentification.SingleLetterIdentifier: stringBuilder.Append("<span class='token'>"); break; case SplitIdentification.TokenMisspelled: case SplitIdentification.WordMisspelled: stringBuilder.Append("<span class='misspelled'>"); break; case SplitIdentification.WordStemmed: case SplitIdentification.TokenStemmed: stringBuilder.Append("<span class='stemmed'>"); break; } stringBuilder.Append(split.Split); stringBuilder.Append("</span>"); }); stringBuilder.Append("</div>"); fileWriter.WriteLine(stringBuilder.ToString()); stringBuilder.Clear(); }); fileWriter.WriteLine("</body></html>"); } }
/// <summary> /// Get Result /// </summary> /// <returns>Indexer Result</returns> private IndexerResult GetResult() { IndexerResult indexerResult = new IndexerResult(); _configuration.Splitter.SetResultPhase(true); // extract int totalFileCount = _configuration.FilesToScan.Count; int currentFileCount = 0; foreach (IndexerFile file in _configuration.FilesToScan) { try { _configuration.NotificationHandler.UpdateStatus(NotificationType.ReadingFileForIdentifiers, currentFileCount, totalFileCount, "Extracting file identifier: " + file.Name); if (!_textExtractors.ContainsKey(file.Extension)) { string message = "No extractor is defined for file extension: " + file.Extension + ". Do you want to skip this file?"; if (_configuration.NotificationHandler.GetYesNoAnswer(QuestionType.NoTextExtratorDefined, message)) { continue; } } ITextExtractor textExtractor = _textExtractors[file.Extension]; string fileText = File.ReadAllText(file.Path); foreach (string identifier in textExtractor.Extract(fileText, _configuration.ExtractType)) { _configuration.NotificationHandler.UpdateStatus(NotificationType.Splitting, currentFileCount, totalFileCount, "Splitting token: " + identifier + " in file: " + file.Name); IdentifierSplitResult identifierSplitResult = new IdentifierSplitResult(identifier, file); identifierSplitResult.Add(_configuration.Splitter.Split(identifier)); indexerResult.AddSplitResult(identifierSplitResult); } } catch (Exception e) { string additionalMessage = "Error reading file. " + Environment.NewLine + "File: " + file.Name + Environment.NewLine + "Message: " + e.Message + Environment.NewLine + "Do you want to skip this file?"; if (!_configuration.NotificationHandler.GetYesNoAnswer(QuestionType.ErrorReadingFile, additionalMessage)) { throw; } } finally { currentFileCount++; } } // Since while adding result we do not have merged token, misspelled and stemmed word info, filter them and add to respective list indexerResult.UpdateFromMergeToken(_tokenDictionary); indexerResult.UpdateFromMisspelled(_tokenDictionary); indexerResult.UpdateFromStemmed(_tokenDictionary); // Filter 3: Stem every identified. If the word is identified replace the word with stemmed word if (_configuration.Stemmer != null) { List <string> dictionaryWordList = indexerResult.GetDictionaryWordList().Keys.ToList(); int totalIdentifiedCount = dictionaryWordList.Count; int currentIdentifiedCount = 0; foreach (string identified in dictionaryWordList) { currentIdentifiedCount++; _configuration.NotificationHandler.UpdateStatus(NotificationType.Stemming, currentIdentifiedCount, totalIdentifiedCount, "Stemming: " + identified); string stemmedText = _configuration.Stemmer.GetStemmedText(identified); if (stemmedText != null && stemmedText != identified && _configuration.Dictionary.IsWord(stemmedText)) { indexerResult.AddStemmedWordAndReplaceIdentified(identified, stemmedText); } } } // Filter result indexerResult.RemoveFilterWordAndTokenResult(_configuration.Dictionary); _configuration.NotificationHandler.UpdateStatus(NotificationType.IndexingCompleted, 1, 1, "Indexing Completed"); return(indexerResult); }