/// <summary>
        /// Populates result after indexer completes
        /// </summary>
        private void BackgroundWorker_WorkCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            ProgressValue        = 0;
            IsIndexingInProgress = false;
            HasIndexingResult    = true;
            NotifyPropertyChanged(() => HasIndexingResult);
            UpdateCanStartIndexing();

            if (e.Error != null)
            {
                Status = e.Error.Message;
                return;
            }

            if (e.Cancelled)
            {
                Status = "Process Cancelled.";
                return;
            }

            IndexerResult result = (IndexerResult)e.Result;

            if (result == null)
            {
                Status = "Process did not return any result.";
                return;
            }

            IdentifierCount        = result.GetSplitResultList().Count;
            DictionaryWordsCount   = result.GetDictionaryWordList().Count;
            TokensCount            = result.GetTokenList().Count;
            UnidentifiedWordsCount = result.GetUnidentifiedList().Count;

            DictionaryWords   = string.Join(Environment.NewLine, result.GetDictionaryWordList().Keys.OrderBy(x => x).ThenBy(x => x.Length));
            Tokens            = string.Join(Environment.NewLine, result.GetTokenList().Keys.OrderBy(x => x).ThenBy(x => x.Length));
            UnidentifiedWords = string.Join(Environment.NewLine, result.GetUnidentifiedList().Keys.OrderBy(x => x.Length).ThenBy(x => x));
            CorrectedWords    = string.Join(Environment.NewLine, result.GetCorrectionDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word));
            StemmedWords      = string.Join(Environment.NewLine, result.GetStemmedDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word));

            NotifyPropertyChanged(() => IdentifierCount);
            NotifyPropertyChanged(() => DictionaryWordsCount);
            NotifyPropertyChanged(() => TokensCount);
            NotifyPropertyChanged(() => UnidentifiedWordsCount);
            NotifyPropertyChanged(() => DictionaryWords);
            NotifyPropertyChanged(() => UnidentifiedWords);
            NotifyPropertyChanged(() => Tokens);
            NotifyPropertyChanged(() => StemmedWords);
            NotifyPropertyChanged(() => CorrectedWords);

            _result = result;
            Status  = "";
        }
        public void Export(string exportFolderPath)
        {
            exportFolderPath = exportFolderPath.TrimEnd('\\');
            using (StreamWriter fileWriter = new StreamWriter(exportFolderPath + "\\" + _projectStat.Name + "_file.txt", true))
            {
                SelectedFiles.ToList().ForEach(file =>
                {
                    var allWords = _result.GetDictionaryWordList().Where(x => x.Value.Contains(file)).Select(x => x.Key).OrderBy(x => x).ThenBy(x => x.Length)
                                   .Union(_result.GetTokenList().Where(x => x.Value.Contains(file)).Select(x => x.Key).OrderBy(x => x).ThenBy(x => x.Length))
                                   .Union(_result.GetUnidentifiedList().Where(x => x.Value.Contains(file)).Select(x => x.Key).OrderBy(x => x.Length).ThenBy(x => x))
                                   .Union(_result.GetCorrectionDictionary().Where(x => x.Value.IndexerFiles.Contains(file)).Select(x => x.Key).OrderBy(x => x))
                                   .Union(_result.GetStemmedDictionary().OrderBy(x => x.Key).Where(x => x.Value.IndexerFiles.Contains(file)).Select(x => x.Key).OrderBy(x => x));
                    fileWriter.WriteLine(file.Name + " " + string.Join(" ", allWords));
                    fileWriter.Flush();
                });
            }

            File.WriteAllLines(exportFolderPath + "\\" + _projectStat.Name + "_tokens.txt",
                               new[] { "Natural Words:" }
                               .Union(_result.GetDictionaryWordList().Keys.OrderBy(x => x).ThenBy(x => x.Length))
                               .Union(new[] { "Abbreviations:" })
                               .Union(_result.GetTokenList().Keys.OrderBy(x => x).ThenBy(x => x.Length))
                               .Union(new[] { "Unidentified Words:" })
                               .Union(_result.GetUnidentifiedList().Keys.OrderBy(x => x.Length).ThenBy(x => x))
                               .Union(new[] { "Spell Checking:" })
                               .Union(_result.GetCorrectionDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word))
                               .Union(new[] { "Stemmed Words:" })
                               .Union(_result.GetStemmedDictionary().OrderBy(x => x.Key).Select(x => x.Key + ": " + x.Value.Word)));

            using (StreamWriter fileWriter = new StreamWriter(exportFolderPath + "\\" + _projectStat.Name + "_split.html", false))
            {
                fileWriter.WriteLine("<!DOCTYPE HTML><html><head><style>span{margin: 5px;}.unidentified{color: red;}.identified{color: black;}.token{color: blue;}.misspelled{color: green}.stemmed{color: #AA3333;}</style></head><body>");
                fileWriter.WriteLine("<h3>Color Index</h3>");
                fileWriter.WriteLine("<span class='identified'>Natural word</span><br />");
                fileWriter.WriteLine("<span class='unidentified'>Unidentified word</span><br />");
                fileWriter.WriteLine("<span class='token'>Abbreviation</span><br />");
                fileWriter.WriteLine("<span class='misspelled'>Misspelled word</span><br />");
                fileWriter.WriteLine("<span class='stemmed'>Stemmed/Lemmatized word</span><br />");
                fileWriter.WriteLine("<br />");
                fileWriter.WriteLine("<h3>Splits</h3>");
                StringBuilder stringBuilder = new StringBuilder();
                _result.GetSplitResultList().ToList().ForEach(identifierSplitResult =>
                {
                    stringBuilder.Append("<div class='");
                    if (identifierSplitResult.Splits.Any(x => x.SplitIdentification == SplitIdentification.Unidentified))
                    {
                        stringBuilder.Append("has-unidentified");
                    }
                    if (identifierSplitResult.Splits.Any(x => x.SplitIdentification == SplitIdentification.Token || x.SplitIdentification == SplitIdentification.MergedToken))
                    {
                        stringBuilder.Append("has-token");
                    }
                    if (identifierSplitResult.Splits.Any(x => x.SplitIdentification == SplitIdentification.TokenMisspelled || x.SplitIdentification == SplitIdentification.WordMisspelled))
                    {
                        stringBuilder.Append("has-misspelled");
                    }
                    stringBuilder.Append("'>" + identifierSplitResult.Identifier + ": ");
                    identifierSplitResult.Splits.ToList().ForEach(split =>
                    {
                        switch (split.SplitIdentification)
                        {
                        case SplitIdentification.Identified:
                            stringBuilder.Append("<span class='identified'>");
                            break;

                        case SplitIdentification.Unidentified:
                            stringBuilder.Append("<span class='unidentified'>");
                            break;

                        case SplitIdentification.MergedToken:
                        case SplitIdentification.Token:
                        case SplitIdentification.SingleLetterIdentifier:
                            stringBuilder.Append("<span class='token'>");
                            break;

                        case SplitIdentification.TokenMisspelled:
                        case SplitIdentification.WordMisspelled:
                            stringBuilder.Append("<span class='misspelled'>");
                            break;

                        case SplitIdentification.WordStemmed:
                        case SplitIdentification.TokenStemmed:
                            stringBuilder.Append("<span class='stemmed'>");
                            break;
                        }
                        stringBuilder.Append(split.Split);
                        stringBuilder.Append("</span>");
                    });
                    stringBuilder.Append("</div>");
                    fileWriter.WriteLine(stringBuilder.ToString());
                    stringBuilder.Clear();
                });

                fileWriter.WriteLine("</body></html>");
            }
        }
Beispiel #3
0
        /// <summary>
        /// Get Result
        /// </summary>
        /// <returns>Indexer Result</returns>
        private IndexerResult GetResult()
        {
            IndexerResult indexerResult = new IndexerResult();

            _configuration.Splitter.SetResultPhase(true);

            // extract
            int totalFileCount   = _configuration.FilesToScan.Count;
            int currentFileCount = 0;

            foreach (IndexerFile file in _configuration.FilesToScan)
            {
                try
                {
                    _configuration.NotificationHandler.UpdateStatus(NotificationType.ReadingFileForIdentifiers, currentFileCount, totalFileCount, "Extracting file identifier: " + file.Name);

                    if (!_textExtractors.ContainsKey(file.Extension))
                    {
                        string message = "No extractor is defined for file extension: " + file.Extension + ".  Do you want to skip this file?";
                        if (_configuration.NotificationHandler.GetYesNoAnswer(QuestionType.NoTextExtratorDefined, message))
                        {
                            continue;
                        }
                    }

                    ITextExtractor textExtractor = _textExtractors[file.Extension];
                    string         fileText      = File.ReadAllText(file.Path);
                    foreach (string identifier in textExtractor.Extract(fileText, _configuration.ExtractType))
                    {
                        _configuration.NotificationHandler.UpdateStatus(NotificationType.Splitting, currentFileCount, totalFileCount, "Splitting token: " + identifier + " in file: " + file.Name);
                        IdentifierSplitResult identifierSplitResult = new IdentifierSplitResult(identifier, file);
                        identifierSplitResult.Add(_configuration.Splitter.Split(identifier));
                        indexerResult.AddSplitResult(identifierSplitResult);
                    }
                }
                catch (Exception e)
                {
                    string additionalMessage = "Error reading file. " + Environment.NewLine + "File: " + file.Name +
                                               Environment.NewLine + "Message: " + e.Message + Environment.NewLine +
                                               "Do you want to skip this file?";
                    if (!_configuration.NotificationHandler.GetYesNoAnswer(QuestionType.ErrorReadingFile, additionalMessage))
                    {
                        throw;
                    }
                }
                finally
                {
                    currentFileCount++;
                }
            }

            // Since while adding result we do not have merged token, misspelled and stemmed word info, filter them and add to respective list
            indexerResult.UpdateFromMergeToken(_tokenDictionary);
            indexerResult.UpdateFromMisspelled(_tokenDictionary);
            indexerResult.UpdateFromStemmed(_tokenDictionary);

            // Filter 3: Stem every identified. If the word is identified replace the word with stemmed word
            if (_configuration.Stemmer != null)
            {
                List <string> dictionaryWordList     = indexerResult.GetDictionaryWordList().Keys.ToList();
                int           totalIdentifiedCount   = dictionaryWordList.Count;
                int           currentIdentifiedCount = 0;
                foreach (string identified in dictionaryWordList)
                {
                    currentIdentifiedCount++;
                    _configuration.NotificationHandler.UpdateStatus(NotificationType.Stemming, currentIdentifiedCount, totalIdentifiedCount, "Stemming: " + identified);
                    string stemmedText = _configuration.Stemmer.GetStemmedText(identified);
                    if (stemmedText != null && stemmedText != identified && _configuration.Dictionary.IsWord(stemmedText))
                    {
                        indexerResult.AddStemmedWordAndReplaceIdentified(identified, stemmedText);
                    }
                }
            }

            // Filter result
            indexerResult.RemoveFilterWordAndTokenResult(_configuration.Dictionary);

            _configuration.NotificationHandler.UpdateStatus(NotificationType.IndexingCompleted, 1, 1, "Indexing Completed");
            return(indexerResult);
        }