public static IEnumerable <string> GetSentencesEnumerable( this ITextProcessor textProcessor, string text, LanguageInformation languageInformation) { return(textProcessor.GetSentencesEnumerable(text) .Where(sentence => languageInformation.IsAllLettersAllowed(sentence))); }
private async ValueTask <BookImportResult> ImportBookAsync(Book book, BookContent bookContent) { using var reader = new StreamReader(bookContent.Content); var content = await reader.ReadToEndAsync() .ConfigureAwait(false); var languageInfo = await _languageProvider.FindLanguageInformationAsync(book.Language) .ConfigureAwait(false); var tooShortSentences = _textProcessor.GetSentencesEnumerable(content, languageInfo) .Where(sentence => sentence.Length < MinSentenceLengthCharacters) .Distinct() .ToList(); var notAllowedSentences = _textProcessor.GetSentencesEnumerable(content) .Where(sentence => !languageInfo.IsAllLettersAllowed(sentence)) .Distinct() .ToList(); var notAllowedCharacters = notAllowedSentences.SelectMany( sentence => sentence.Where(character => !languageInfo.IsAllLettersAllowed(character.ToString()))) .Distinct() .ToList(); var sentencesEnumerable = _bookContentProcessor.ProcessBookContent( book.BookId, content, languageInfo); await _sentenceRepository.SaveByBatchesAsync(sentencesEnumerable) .ConfigureAwait(false); return(new BookImportResult( book, tooShortSentences, notAllowedSentences, string.Join(string.Empty, notAllowedCharacters))); }
public IEnumerable <Sentence> ProcessBookContent(BookId bookId, string content, LanguageInformation languageInformation) { return(_textProcessor.GetSentencesEnumerable(content, languageInformation) .Where(sentence => sentence.Length >= MinSentenceLengthCharacters) .Select((sentence, sentenceIndex) => _sentenceFactory.CreateSentence(bookId, sentence, sentenceIndex))); }