Beispiel #1
0
        public void Handle_bigrams()
        {
            var results = NGramProcessor.MakeNgrams(Sentence, 2);
            var arr     = results as string[] ?? results.ToArray();

            arr.Should().Contain("hello this");
            arr.Should().Contain("an example");
            arr.Should().Contain("I hope");
            arr.Should().Contain("ain't that");
        }
Beispiel #2
0
        public void CreateNgrams(string arg = null)
        {
            if (!string.IsNullOrEmpty(arg))
            {
                Sentence = arg;
            }

            Log.Verbose("text match {ngram} {text}", Sentence);

            Words = Sentence.SplitSentenceIntoWords()
                    .ToList()
                    .ConvertAll(x => x.ToLowerInvariantWithOutSpaces())
                    .Where(x => !x.IsGibberish())
                    .ToList();

            WordsCleaned = Words.Select(x => x.ToAlphaNumericOnly()).Where(x => !string.IsNullOrWhiteSpace(x))
                           .ToList();

            var text = string.Join(" ", WordsCleaned);

            Ngrams = new ConcurrentDictionary <int, IEnumerable <string> >();
            if (WordsCleaned.Count > 3)
            {
                Ngrams.Add(new KeyValuePair <int, IEnumerable <string> >(4, NGramProcessor.MakeNgrams(text, 4)));
            }

            if (WordsCleaned.Count > 2)
            {
                Ngrams.Add(new KeyValuePair <int, IEnumerable <string> >(3, NGramProcessor.MakeNgrams(text, 3)));
            }

            if (WordsCleaned.Count > 1)
            {
                Ngrams.Add(new KeyValuePair <int, IEnumerable <string> >(2, NGramProcessor.MakeNgrams(text, 2)));
            }

            Ngrams.Add(new KeyValuePair <int, IEnumerable <string> >(1, WordsCleaned));
        }