public Sentence CreateSentence(BookId bookId, string validatedSentence, int indexInBook) { validatedSentence = string.Join(" ", _textProcessor.GetWordsEnumerable(validatedSentence)); var sentenceType = SentenceType.Normal; var sentenceId = SentenceId.New(); var words = _textProcessor.GetWordsEnumerable(validatedSentence).ToArray(); var wordsList = new List <Word>(words.Length); var wordsInSentence = words .GroupBy(word => word) .Select(group => new { Word = group.Key, Count = group.Count() }).ToDictionary(x => x.Word); foreach (var word in words) { if (word.All(character => TextConstants.PunctuationCharacters.Contains(character)) && word.Length > 1) { sentenceType = SentenceType.Other; } if (word.EndsWith(".?", StringComparison.Ordinal) || word.EndsWith(".!", StringComparison.Ordinal)) { sentenceType = SentenceType.Other; } } var rawWordsInSentence = words .Select(word => _textProcessor.NormalizeWord(word)) .GroupBy(word => word) .Select(group => new { Word = group.Key, Count = group.Count() }).ToDictionary(x => x.Word); var index = 0; foreach (var word in words) { var keyPairs = CreateKeyPairs(validatedSentence, word); var rawWord = _textProcessor.NormalizeWord(word); wordsList.Add(new Word( sentenceId, index, word, rawWord, wordsInSentence[word].Count, rawWord == string.Empty ? 0 : rawWordsInSentence[rawWord].Count, keyPairs)); index++; } return(new Sentence(bookId, sentenceId, sentenceType, indexInBook, validatedSentence, wordsList)); }
public static IEnumerable <string> GetNormalizedWordsEnumerable( this ITextProcessor textProcessor, string text) { return(textProcessor.GetWordsEnumerable(text) .Select(word => textProcessor.NormalizeWord(word)) .Where(word => word.Length > 0) .Distinct()); }
public static IEnumerable <string> GetNormalizedWordsEnumerable( this ITextProcessor textProcessor, string text, LanguageInformation languageInformation) { return(textProcessor.GetWordsEnumerable(text) .Select(word => textProcessor.NormalizeWord(word)) .Where(word => languageInformation.IsAllLettersAllowed(word)) .Where(word => word.Length > 0) .Distinct()); }
private async ValueTask <IEnumerable <string> > FindRandomWordsAsync(int maxCount, bool rawWords) { var randomSentences = await FindRandomSentencesAsync(maxCount, 1) .ConfigureAwait(false); var wordsEnumerable = randomSentences.SelectMany(sentence => _textProcessor.GetWordsEnumerable(sentence.Value)); if (rawWords) { wordsEnumerable = wordsEnumerable.Select(word => _textProcessor.NormalizeWord(word)); } var distinctWords = wordsEnumerable.Distinct().ToList(); return(Enumerable.Range(0, maxCount) .Select(index => RandomNumberGenerator.GetInt32(0, distinctWords.Count)) .Select(index => distinctWords[index]) .ToList()); }