public void TestTrigrams()
        {
            var trigams = new NGramDictionary(new NGramExtractor(Trigram));
            trigams.AddSequence(Text1.Split(null).ToList());
            trigams.AddSequence(Text2.Split(null).ToList());
            trigams.AddSequence(Text3.Split(null).ToList());

            var ex = Assert.Throws<ArgumentException>(() => trigams.GetFrequency());
            Assert.That(ex.Message,
                Is.EqualTo(@"Length of nGramTokens (0) must not be greater than maxNGramSize or less than minNGramSize"));

            ex = Assert.Throws<ArgumentException>(() => trigams.GetFrequency("I"));
            Assert.That(ex.Message,
                Is.EqualTo(@"Length of nGramTokens (1) must not be greater than maxNGramSize or less than minNGramSize"));

            ex = Assert.Throws<ArgumentException>(() => trigams.GetFrequency("I", "am"));
            Assert.That(ex.Message,
                Is.EqualTo(@"Length of nGramTokens (2) must not be greater than maxNGramSize or less than minNGramSize"));

            int freq = trigams.GetFrequency("I", "am", "Sam");
            Assert.AreEqual(1, freq);

            freq = trigams.GetFrequency("Sam", "I", "am");
            Assert.AreEqual(1, freq);

            freq = trigams.GetFrequency("not", "like", "green");
            Assert.AreEqual(1, freq);
        }
Exemple #2
0
 public NGramModel(int nGramSize, string modelFilepath)
 {
     nGramDictionary = new NGramDictionary(extractor);
     maxNGramSize    = nGramSize;
     extractor       = new NGramExtractor(1, maxNGramSize);
     string[] lines = File.ReadAllLines(modelFilepath);
     //AddAll(lines);
 }
        public void SerializationTest()
        {
            var corpus = new NGramDictionary(new NGramExtractor(Unigram, Trigram));
            corpus.AddSequence(Text1.Split(null).ToList());
            corpus.AddSequence(Text2.Split(null).ToList());

            NGramDictionary copy = NGramDictionary.DeserializeFrom(corpus.ToString());

            corpus.AddSequence(Text3.Split(null).ToList());
            copy.AddSequence(Text3.Split(null).ToList());

            Assert.AreEqual(corpus.ToString(), copy.ToString());
        }
Exemple #4
0
 public NGramModel(int nGramSize)
 {
     maxNGramSize    = nGramSize;
     extractor       = new NGramExtractor(1, maxNGramSize);
     nGramDictionary = new NGramDictionary(extractor);
 }
        public void TestUnigramsBigramsTrigramsWithStartStopSymbols()
        {
            const string text1 = "<s> <s> I am Sam </s>";
            const string text2 = "<s> <s> Sam I am </s>";
            const string text3 = "<s> <s> I do not like green eggs and ham </s>";

            var corpus = new NGramDictionary(new NGramExtractor(Unigram, Trigram));

            corpus.AddSequence(text1.Split(null).ToList());
            corpus.AddSequence(text2.Split(null).ToList());
            corpus.AddSequence(text3.Split(null).ToList());

            var ex = Assert.Throws<ArgumentException>(() => corpus.GetFrequency());
            Assert.That(ex.Message,
                Is.EqualTo(@"Length of nGramTokens (0) must not be greater than maxNGramSize or less than minNGramSize"));

            int freq = corpus.GetFrequency("<s>");
            Assert.AreEqual(6, freq);

            freq = corpus.GetFrequency("</s>");
            Assert.AreEqual(3, freq);

            freq = corpus.GetFrequency("<s>", "<s>");
            Assert.AreEqual(3, freq);

            freq = corpus.GetFrequency("am");
            Assert.AreEqual(2, freq);

            freq = corpus.GetFrequency("not");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("the");
            Assert.AreEqual(0, freq);

            freq = corpus.GetFrequency("<s>", "I", "am");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("<s>", "<s>", "I");
            Assert.AreEqual(2, freq);

            freq = corpus.GetFrequency("<s>", "<s>", "Sam");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("<s>", "Sam", "I");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("am", "I");
            Assert.AreEqual(0, freq);

            freq = corpus.GetFrequency("I", "am", "</s>");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("am", "</s>");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("I", "am", "Sam");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("Sam", "I", "am");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("not", "like", "green");
            Assert.AreEqual(1, freq);
        }
        public void TestUnigramsBigrams()
        {
            var corpus = new NGramDictionary(new NGramExtractor(Unigram, Bigram));
            corpus.AddSequence(Text1.Split(null).ToList());
            corpus.AddSequence(Text2.Split(null).ToList());
            corpus.AddSequence(Text3.Split(null).ToList());

            var ex = Assert.Throws<ArgumentException>(() => corpus.GetFrequency());
            Assert.That(ex.Message,
                Is.EqualTo(@"Length of nGramTokens (0) must not be greater than maxNGramSize or less than minNGramSize"));

            int freq = corpus.GetFrequency("I");
            Assert.AreEqual(3, freq);

            freq = corpus.GetFrequency("am");
            Assert.AreEqual(2, freq);

            freq = corpus.GetFrequency("not");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("the");
            Assert.AreEqual(0, freq);

            freq = corpus.GetFrequency("I", "am");
            Assert.AreEqual(2, freq);

            freq = corpus.GetFrequency("Sam", "I");
            Assert.AreEqual(1, freq);

            freq = corpus.GetFrequency("am", "I");
            Assert.AreEqual(0, freq);
        }
Exemple #7
0
 public NGramModel(int nGramSize)
 {
     maxNGramSize = nGramSize;
     extractor = new NGramExtractor(1, maxNGramSize);
     nGramDictionary = new NGramDictionary(extractor);
 }
Exemple #8
-1
 public NGramModel(int nGramSize, string modelFilepath)
 {
     nGramDictionary = new NGramDictionary(extractor);
     maxNGramSize = nGramSize;
     extractor = new NGramExtractor(1, maxNGramSize);
     string[] lines = File.ReadAllLines(modelFilepath);
     //AddAll(lines);
 }