Exemple #1
0
        public void TestSequenceProbability()
        {
            HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f);

            Assert.AreEqual(
                0,
                gram.Compile().SequenceProbability(new string[] { "a", "b", "c" }));

            double denominator = 0.9 + 0.81 + 0.729;
            double triWeight   = 0.9 / denominator;
            double biWeight    = 0.81 / denominator;
            double uniweight   = 0.729 / denominator;

            UniGram gram1 = new UniGram();

            gram1.AddData(null, "a");
            gram1.AddData(null, "a");
            gram1.AddData(null, "b");

            NGram gram2 = new NGram(2);

            gram2.AddData(new string[] { "a" }, "a");
            gram2.AddData(new string[] { "a" }, "b");

            NGram gram3 = new NGram(3);

            gram3.AddData(new string[] { "a", "a" }, "b");

            gram.AddData(new string[] { "a", "a" }, "b");

            ICompiledGram c1 = gram1.Compile();
            ICompiledGram c2 = gram2.Compile();
            ICompiledGram c3 = gram3.Compile();

            string[] input    = new string[] { "a", "a", "b" };
            double   expected =
                uniweight * c1.SequenceProbability(input) +
                biWeight * c2.SequenceProbability(input) +
                triWeight * c3.SequenceProbability(input);
            double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" });

            Assert.IsTrue(
                Mathf.Approximately((float)expected, (float)actual),
                $"Expected {expected} but received {actual}.");
        }
Exemple #2
0
        public void Execute()
        {
            string keyDirectory = Path.Combine(basePath, $"{extension}_{gram.GetN()}");

            if (Directory.Exists(keyDirectory) == false)
            {
                Directory.CreateDirectory(keyDirectory);
            }

            StreamWriter writer = File.CreateText($"{keyDirectory}.txt");

            writer.WriteLine("Sequence_Probability,Perplexity,Linearity_JSON_Positions,Leniency");

            ICompiledGram compiled       = gram.Compile();
            ICompiledGram simpleCompiled = simplifiedGram?.Compile();

            for (int i = 0; i < numSimulations; ++i)
            {
                UtilityRandom.SetSeed(new DateTime().Millisecond);

                Tuple <List <string>, List <string> > tuple;

                if (gram as NGram == null)
                {
                    tuple = GetColumnsBestGuess(compiled, simpleCompiled);
                }
                else
                {
                    tuple = GetColumnsSemiGuaranteed(compiled, simpleCompiled);
                }

                List <string> columns    = tuple.Item1;
                List <string> simplified = tuple.Item2;

                string[]   columnsArray  = columns.ToArray();
                List <int> positions     = LevelAnalyzer.Positions(columnsArray);
                JsonArray  jsonPositions = new JsonArray();
                foreach (int pos in positions)
                {
                    jsonPositions.Add(pos);
                }

                double sequenceProbability = compiled.SequenceProbability(columnsArray);
                writer.Write($"{sequenceProbability},");
                if (sequenceProbability == 0)
                {
                    writer.Write($"0,");
                }
                else
                {
                    writer.Write($"{1d/sequenceProbability},");
                }

                writer.Write($"{jsonPositions},");
                writer.Write($"{LevelAnalyzer.Leniency(simplified.ToArray())}\n");

                StreamWriter levelWriter = File.CreateText(Path.Combine(keyDirectory, $"{i}.txt"));
                levelWriter.Write(string.Join("\n", columnsArray));
                levelWriter.Flush();
                levelWriter.Close();

                if (i % 200 == 0)
                {
                    writer.Flush();
                }
            }

            writer.Flush();
            writer.Close();
        }