public CorpusAnalyser(IEnumerable<string> samples) { _index = new DocumentIndex(); _markovChain = new DiscreteMarkovChain<string>(); _documentTermMatrix = Analyse(samples.Select(s => new TokenisedTextDocument(Guid.NewGuid().ToString(), _index.Tokeniser.Tokenise(s)))); }
public CorpusAnalyser(IEnumerable<TokenisedTextDocument> samples) { _index = new DocumentIndex(); _markovChain = new DiscreteMarkovChain<string>(); _documentTermMatrix = Analyse(samples); }
public void SecondOrder_AddSequence_and_GetFrequencies_ReturnsCorrectFrequency(string sequence, char test, char assertChar, int assertFreq, int totalCount) { var mkc = new DiscreteMarkovChain<char>(2); mkc.AnalyseSequence(sequence); var freq = mkc.GetFrequencies(test); Assert.That(freq.Count, Is.EqualTo(totalCount)); Assert.That(freq[assertChar], Is.EqualTo(assertFreq)); }
public void AddSequence_and_Prune() { var mkc = new DiscreteMarkovChain<char>(); mkc.AnalyseSequence("a1b1c1b2c2b3"); Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(3)); Assert.That(mkc.GetFrequencies('a').Count, Is.EqualTo(1)); mkc.Prune(3); Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(3)); Assert.That(mkc.GetFrequencies('a').Count, Is.EqualTo(0)); }
public void AddSequence_and_Merge_WithOther() { var mkc = new DiscreteMarkovChain<char>(); var mkc2 = new DiscreteMarkovChain<char>(); mkc.AnalyseSequence("a1b1c1b2c2b3"); mkc2.AnalyseSequence("b5b1"); Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(3)); Assert.That(mkc.GetFrequencies('b')['1'], Is.EqualTo(1)); mkc.Merge(mkc2); Assert.That(mkc.GetFrequencies('b').Count, Is.EqualTo(4)); Assert.That(mkc.GetFrequencies('b')['1'], Is.EqualTo(2)); }
public void AddSequence_GetFrequencies_ReturnsExpectedValues() { var mkc = new DiscreteMarkovChain<char>(2); mkc.AnalyseSequence("abcabdabcabcabn"); var freq = mkc.GetFrequencies("ab"); Assert.That(freq.Count, Is.EqualTo(3)); Assert.That(freq['c'], Is.EqualTo(3)); Assert.That(freq['d'], Is.EqualTo(1)); Assert.That(freq['n'], Is.EqualTo(1)); }
public void AddSequence_ProbabilityOf() { var mkc = new DiscreteMarkovChain<char>(2); var seq = "abcabdabcabcabn"; mkc.AnalyseSequence(seq); var p = mkc.ProbabilityOfEvent("ab", 'c'); Assert.That(p.Value, Is.EqualTo(3d / 5d)); }
public void AddSequence_Simulate() { var mkc = new DiscreteMarkovChain<char>(2); var seq = "abcabdabcabcabn"; mkc.AnalyseSequence(seq); var simulation = mkc.Simulate('b').ToList(); Assert.That(simulation.Count > 0); Assert.That(simulation.All(c => seq.IndexOf(c) > -1)); }