public IEnumerable <string> GetResult(IndexerFile file) { IndexerResult indexerResult = new IndexerResult(); _configuration.Splitter.SetResultPhase(true); if (!_textExtractors.ContainsKey(file.Extension)) { string message = "No extractor is defined for file extension: " + file.Extension + "."; throw new Exception(message); } ITextExtractor textExtractor = _textExtractors[file.Extension]; string fileText = File.ReadAllText(file.Path); foreach (string identifier in textExtractor.Extract(fileText, _configuration.ExtractType)) { try { IdentifierSplitResult identifierSplitResult = new IdentifierSplitResult(identifier, file); identifierSplitResult.Add(_configuration.Splitter.Split(identifier)); indexerResult.AddSplitResult(identifierSplitResult); } catch (Exception) { continue; } } return(indexerResult.GetSplitResultList().SelectMany(x => x.Splits).Select(Filter).Where(x => !string.IsNullOrEmpty(x))); }
/// <summary> /// Get Result /// </summary> /// <returns>Indexer Result</returns> private IndexerResult GetResult() { IndexerResult indexerResult = new IndexerResult(); _configuration.Splitter.SetResultPhase(true); // extract int totalFileCount = _configuration.FilesToScan.Count; int currentFileCount = 0; foreach (IndexerFile file in _configuration.FilesToScan) { try { _configuration.NotificationHandler.UpdateStatus(NotificationType.ReadingFileForIdentifiers, currentFileCount, totalFileCount, "Extracting file identifier: " + file.Name); if (!_textExtractors.ContainsKey(file.Extension)) { string message = "No extractor is defined for file extension: " + file.Extension + ". Do you want to skip this file?"; if (_configuration.NotificationHandler.GetYesNoAnswer(QuestionType.NoTextExtratorDefined, message)) { continue; } } ITextExtractor textExtractor = _textExtractors[file.Extension]; string fileText = File.ReadAllText(file.Path); foreach (string identifier in textExtractor.Extract(fileText, _configuration.ExtractType)) { _configuration.NotificationHandler.UpdateStatus(NotificationType.Splitting, currentFileCount, totalFileCount, "Splitting token: " + identifier + " in file: " + file.Name); IdentifierSplitResult identifierSplitResult = new IdentifierSplitResult(identifier, file); identifierSplitResult.Add(_configuration.Splitter.Split(identifier)); indexerResult.AddSplitResult(identifierSplitResult); } } catch (Exception e) { string additionalMessage = "Error reading file. " + Environment.NewLine + "File: " + file.Name + Environment.NewLine + "Message: " + e.Message + Environment.NewLine + "Do you want to skip this file?"; if (!_configuration.NotificationHandler.GetYesNoAnswer(QuestionType.ErrorReadingFile, additionalMessage)) { throw; } } finally { currentFileCount++; } } // Since while adding result we do not have merged token, misspelled and stemmed word info, filter them and add to respective list indexerResult.UpdateFromMergeToken(_tokenDictionary); indexerResult.UpdateFromMisspelled(_tokenDictionary); indexerResult.UpdateFromStemmed(_tokenDictionary); // Filter 3: Stem every identified. If the word is identified replace the word with stemmed word if (_configuration.Stemmer != null) { List <string> dictionaryWordList = indexerResult.GetDictionaryWordList().Keys.ToList(); int totalIdentifiedCount = dictionaryWordList.Count; int currentIdentifiedCount = 0; foreach (string identified in dictionaryWordList) { currentIdentifiedCount++; _configuration.NotificationHandler.UpdateStatus(NotificationType.Stemming, currentIdentifiedCount, totalIdentifiedCount, "Stemming: " + identified); string stemmedText = _configuration.Stemmer.GetStemmedText(identified); if (stemmedText != null && stemmedText != identified && _configuration.Dictionary.IsWord(stemmedText)) { indexerResult.AddStemmedWordAndReplaceIdentified(identified, stemmedText); } } } // Filter result indexerResult.RemoveFilterWordAndTokenResult(_configuration.Dictionary); _configuration.NotificationHandler.UpdateStatus(NotificationType.IndexingCompleted, 1, 1, "Indexing Completed"); return(indexerResult); }