Пример #1
0
    public Sentence CreateSentence(BookId bookId, string validatedSentence, int indexInBook)
    {
        validatedSentence = string.Join(" ", _textProcessor.GetWordsEnumerable(validatedSentence));
        var sentenceType = SentenceType.Normal;

        var sentenceId = SentenceId.New();
        var words      = _textProcessor.GetWordsEnumerable(validatedSentence).ToArray();
        var wordsList  = new List <Word>(words.Length);

        var wordsInSentence = words
                              .GroupBy(word => word)
                              .Select(group => new
        {
            Word  = group.Key,
            Count = group.Count()
        }).ToDictionary(x => x.Word);

        foreach (var word in words)
        {
            if (word.All(character => TextConstants.PunctuationCharacters.Contains(character)) &&
                word.Length > 1)
            {
                sentenceType = SentenceType.Other;
            }

            if (word.EndsWith(".?", StringComparison.Ordinal) || word.EndsWith(".!", StringComparison.Ordinal))
            {
                sentenceType = SentenceType.Other;
            }
        }

        var rawWordsInSentence = words
                                 .Select(word => _textProcessor.NormalizeWord(word))
                                 .GroupBy(word => word)
                                 .Select(group => new
        {
            Word  = group.Key,
            Count = group.Count()
        }).ToDictionary(x => x.Word);

        var index = 0;

        foreach (var word in words)
        {
            var keyPairs = CreateKeyPairs(validatedSentence, word);

            var rawWord = _textProcessor.NormalizeWord(word);
            wordsList.Add(new Word(
                              sentenceId, index,
                              word, rawWord,
                              wordsInSentence[word].Count, rawWord == string.Empty ? 0 : rawWordsInSentence[rawWord].Count,
                              keyPairs));
            index++;
        }

        return(new Sentence(bookId, sentenceId, sentenceType, indexInBook, validatedSentence, wordsList));
    }
 public static IEnumerable <string> GetNormalizedWordsEnumerable(
     this ITextProcessor textProcessor, string text)
 {
     return(textProcessor.GetWordsEnumerable(text)
            .Select(word => textProcessor.NormalizeWord(word))
            .Where(word => word.Length > 0)
            .Distinct());
 }
 public static IEnumerable <string> GetNormalizedWordsEnumerable(
     this ITextProcessor textProcessor,
     string text,
     LanguageInformation languageInformation)
 {
     return(textProcessor.GetWordsEnumerable(text)
            .Select(word => textProcessor.NormalizeWord(word))
            .Where(word => languageInformation.IsAllLettersAllowed(word))
            .Where(word => word.Length > 0)
            .Distinct());
 }
Пример #4
0
    private async ValueTask <IEnumerable <string> > FindRandomWordsAsync(int maxCount, bool rawWords)
    {
        var randomSentences = await FindRandomSentencesAsync(maxCount, 1)
                              .ConfigureAwait(false);

        var wordsEnumerable = randomSentences.SelectMany(sentence => _textProcessor.GetWordsEnumerable(sentence.Value));

        if (rawWords)
        {
            wordsEnumerable = wordsEnumerable.Select(word => _textProcessor.NormalizeWord(word));
        }

        var distinctWords = wordsEnumerable.Distinct().ToList();

        return(Enumerable.Range(0, maxCount)
               .Select(index => RandomNumberGenerator.GetInt32(0, distinctWords.Count))
               .Select(index => distinctWords[index])
               .ToList());
    }