public void TestSequenceProbability() { HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f); Assert.AreEqual( 0, gram.Compile().SequenceProbability(new string[] { "a", "b", "c" })); double denominator = 0.9 + 0.81 + 0.729; double triWeight = 0.9 / denominator; double biWeight = 0.81 / denominator; double uniweight = 0.729 / denominator; UniGram gram1 = new UniGram(); gram1.AddData(null, "a"); gram1.AddData(null, "a"); gram1.AddData(null, "b"); NGram gram2 = new NGram(2); gram2.AddData(new string[] { "a" }, "a"); gram2.AddData(new string[] { "a" }, "b"); NGram gram3 = new NGram(3); gram3.AddData(new string[] { "a", "a" }, "b"); gram.AddData(new string[] { "a", "a" }, "b"); ICompiledGram c1 = gram1.Compile(); ICompiledGram c2 = gram2.Compile(); ICompiledGram c3 = gram3.Compile(); string[] input = new string[] { "a", "a", "b" }; double expected = uniweight * c1.SequenceProbability(input) + biWeight * c2.SequenceProbability(input) + triWeight * c3.SequenceProbability(input); double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" }); Assert.IsTrue( Mathf.Approximately((float)expected, (float)actual), $"Expected {expected} but received {actual}."); }
public void Execute() { string keyDirectory = Path.Combine(basePath, $"{extension}_{gram.GetN()}"); if (Directory.Exists(keyDirectory) == false) { Directory.CreateDirectory(keyDirectory); } StreamWriter writer = File.CreateText($"{keyDirectory}.txt"); writer.WriteLine("Sequence_Probability,Perplexity,Linearity_JSON_Positions,Leniency"); ICompiledGram compiled = gram.Compile(); ICompiledGram simpleCompiled = simplifiedGram?.Compile(); for (int i = 0; i < numSimulations; ++i) { UtilityRandom.SetSeed(new DateTime().Millisecond); Tuple <List <string>, List <string> > tuple; if (gram as NGram == null) { tuple = GetColumnsBestGuess(compiled, simpleCompiled); } else { tuple = GetColumnsSemiGuaranteed(compiled, simpleCompiled); } List <string> columns = tuple.Item1; List <string> simplified = tuple.Item2; string[] columnsArray = columns.ToArray(); List <int> positions = LevelAnalyzer.Positions(columnsArray); JsonArray jsonPositions = new JsonArray(); foreach (int pos in positions) { jsonPositions.Add(pos); } double sequenceProbability = compiled.SequenceProbability(columnsArray); writer.Write($"{sequenceProbability},"); if (sequenceProbability == 0) { writer.Write($"0,"); } else { writer.Write($"{1d/sequenceProbability},"); } writer.Write($"{jsonPositions},"); writer.Write($"{LevelAnalyzer.Leniency(simplified.ToArray())}\n"); StreamWriter levelWriter = File.CreateText(Path.Combine(keyDirectory, $"{i}.txt")); levelWriter.Write(string.Join("\n", columnsArray)); levelWriter.Flush(); levelWriter.Close(); if (i % 200 == 0) { writer.Flush(); } } writer.Flush(); writer.Close(); }