예제 #1
0
        public CorpusAnalyser(IEnumerable<string> samples)
        {
            _index = new DocumentIndex();
            _markovChain = new DiscreteMarkovChain<string>();

            _documentTermMatrix = Analyse(samples.Select(s => new TokenisedTextDocument(Guid.NewGuid().ToString(), _index.Tokeniser.Tokenise(s))));
        }
예제 #2
0
        public CorpusAnalyser(IEnumerable<TokenisedTextDocument> samples)
        {
            _index = new DocumentIndex();
            _markovChain = new DiscreteMarkovChain<string>();

            _documentTermMatrix = Analyse(samples);
        }
예제 #3
0
        public void SecondOrder_AddSequence_and_GetFrequencies_ReturnsCorrectFrequency(string sequence, char test, char assertChar, int assertFreq, int totalCount)
        {
            var mkc = new DiscreteMarkovChain<char>(2);

            mkc.AnalyseSequence(sequence);

            var freq = mkc.GetFrequencies(test);

            Assert.That(freq.Count, Is.EqualTo(totalCount));
            Assert.That(freq[assertChar], Is.EqualTo(assertFreq));
        }
예제 #4
0
        public void AddSequence_and_Prune()
        {
            var mkc = new DiscreteMarkovChain<char>();

            mkc.AnalyseSequence("a1b1c1b2c2b3");

            Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(3));
            Assert.That(mkc.GetFrequencies('a').Count, Is.EqualTo(1));

            mkc.Prune(3);

            Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(3));
            Assert.That(mkc.GetFrequencies('a').Count, Is.EqualTo(0));
        }
예제 #5
0
        public void AddSequence_and_Merge_WithOther()
        {
            var mkc = new DiscreteMarkovChain<char>();
            var mkc2 = new DiscreteMarkovChain<char>();

            mkc.AnalyseSequence("a1b1c1b2c2b3");
            mkc2.AnalyseSequence("b5b1");

            Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(3));
            Assert.That(mkc.GetFrequencies('b')['1'], Is.EqualTo(1));

            mkc.Merge(mkc2);

            Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(4));
            Assert.That(mkc.GetFrequencies('b')['1'], Is.EqualTo(2));
        }
예제 #6
0
        public void AddSequence_GetFrequencies_ReturnsExpectedValues()
        {
            var mkc = new DiscreteMarkovChain<char>(2);

            mkc.AnalyseSequence("abcabdabcabcabn");

            var freq = mkc.GetFrequencies("ab");

            Assert.That(freq.Count, Is.EqualTo(3));
            Assert.That(freq['c'], Is.EqualTo(3));
            Assert.That(freq['d'], Is.EqualTo(1));
            Assert.That(freq['n'], Is.EqualTo(1));
        }
예제 #7
0
        public void AddSequence_ProbabilityOf()
        {
            var mkc = new DiscreteMarkovChain<char>(2);

            var seq = "abcabdabcabcabn";
            
            mkc.AnalyseSequence(seq);

            var p = mkc.ProbabilityOfEvent("ab", 'c');
            
            Assert.That(p.Value, Is.EqualTo(3d / 5d));
        }
예제 #8
0
        public void AddSequence_Simulate()
        {
            var mkc = new DiscreteMarkovChain<char>(2);

            var seq = "abcabdabcabcabn";

            mkc.AnalyseSequence(seq);

            var simulation = mkc.Simulate('b').ToList();

            Assert.That(simulation.Count > 0);
            Assert.That(simulation.All(c => seq.IndexOf(c) > -1));
        }