public void TestGetGuesses() { HierarchicalNGram gram = new HierarchicalNGram(3, 0.6f); gram.AddData(new string[] { "b", "a" }, "c"); gram.AddData(new string[] { "b", "c" }, "c"); gram.AddData(new string[] { "b", "a" }, "a"); gram.AddData(new string[] { "a", "a" }, "d"); ICompiledGram compiledGram = gram.Compile(); string[] guesses = compiledGram.GetGuesses(new string[] { "b", "a" }); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); guesses = compiledGram.GetGuesses(new string[] { "b", "c" }); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); guesses = compiledGram.GetGuesses(new string[] { "a", "a" }); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); }
public string[] GetGuesses(string[] inData) { HashSet <string> seenGuesses = new HashSet <string>(); List <string> guesses = new List <string>(); int length = inData.Length; for (int i = CompiledGrammars.Length - 1; i >= 0; --i) { ICompiledGram gram = CompiledGrammars[i]; int n = gram.GetN() - 1; ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n); string[] grammarGuesses = gram.GetGuesses(segment.ToArray()); foreach (string guess in grammarGuesses) { if (seenGuesses.Contains(guess) == false) { seenGuesses.Add(guess); guesses.Add(guess); } } } return(guesses.ToArray()); }
private Tuple <List <string>, List <string> > GetColumnsSemiGuaranteed(ICompiledGram compiled, ICompiledGram simpleCompiled) { List <string> columns; List <string> simplified; if (simplifiedGram == null) { columns = NGramGenerator.Generate(compiled, startInput, size, includeStart: false); simplified = LevelParser.BreakColumnsIntoSimplifiedTokens( columns, game); } else { simplified = NGramGenerator.GenerateBestAttempt( simpleCompiled, LevelParser.BreakColumnsIntoSimplifiedTokens(startInput, game), size, maxAttempts); Games localGame = game; columns = NGramGenerator.GenerateRestricted( compiled, startInput, simplified, (inColumn) => { return(LevelParser.ClassifyColumn(inColumn, localGame)); }, includeStart: false); } return(new Tuple <List <string>, List <string> >(columns, simplified)); }
public CompiledHierarchicalNGram(HierarchicalNGram hierarchicalGram) { weightMultiplier = hierarchicalGram.CompiledMemoryUpdate; CompiledGrammars = new ICompiledGram[hierarchicalGram.N]; Weights = new float[hierarchicalGram.N]; n = hierarchicalGram.N; float weightSum = 0f; float currentWeight = 1f; for (int grammarSize = hierarchicalGram.N - 1; grammarSize >= 0; --grammarSize) { CompiledGrammars[grammarSize] = hierarchicalGram.Grammars[grammarSize].Compile(); currentWeight *= weightMultiplier; weightSum += currentWeight; Weights[grammarSize] = currentWeight; } for (int i = 0; i < Weights.Length; ++i) { Weights[i] /= weightSum; } }
public void TestGetGuesses() { BackOffNGram gram = new BackOffNGram(3, 0.6f); gram.AddData(new string[] { "b", "a" }, "a"); gram.AddData(new string[] { "b", "a" }, "c"); gram.AddData(new string[] { "b", "c" }, "c"); gram.AddData(new string[] { "a", "a" }, "d"); ICompiledGram compiledGram = gram.Compile(); string[] guesses = compiledGram.GetGuesses(new string[] { "b", "a" }); Assert.AreEqual(4, guesses.Length); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("b")); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); Assert.IsTrue(guesses[0] == "a" || guesses[0] == "c"); Assert.IsTrue(guesses[1] == "a" || guesses[1] == "c"); Assert.AreNotEqual(guesses[0], guesses[1]); Assert.IsTrue(guesses[2] == "b" || guesses[2] == "d"); Assert.IsTrue(guesses[3] == "b" || guesses[3] == "d"); Assert.AreNotEqual(guesses[2], guesses[3]); guesses = compiledGram.GetGuesses(new string[] { "b", "c" }); Assert.AreEqual(4, guesses.Length); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("b")); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); Assert.AreEqual("c", guesses[0]); Assert.IsTrue(guesses[1] == "a" || guesses[1] == "b" || guesses[1] == "d"); Assert.IsTrue(guesses[2] == "a" || guesses[2] == "b" || guesses[2] == "d"); Assert.IsTrue(guesses[3] == "a" || guesses[3] == "b" || guesses[3] == "d"); Assert.AreNotEqual(guesses[1], guesses[2]); Assert.AreNotEqual(guesses[2], guesses[3]); guesses = compiledGram.GetGuesses(new string[] { "a", "a" }); Assert.AreEqual(4, guesses.Length); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("b")); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); Assert.AreEqual("d", guesses[0]); Assert.IsTrue(guesses[1] == "a" || guesses[1] == "b" || guesses[1] == "c"); Assert.IsTrue(guesses[2] == "a" || guesses[2] == "b" || guesses[2] == "c"); Assert.IsTrue(guesses[3] == "a" || guesses[3] == "b" || guesses[3] == "c"); Assert.AreNotEqual(guesses[1], guesses[2]); Assert.AreNotEqual(guesses[2], guesses[3]); }
private ICompiledGram GetCompiledUniGram(string[] inData, int size) { Assert.IsNotNull(inData); Assert.AreEqual(size - 1, inData.Length); if (cachedInData == inData) { return(cachedGram); } Dictionary <string, float> temp = null; ICompiledGram tempGram; UniGram gram = new UniGram(); int length = inData.Length; Assert.IsTrue(length == size - 1); for (int i = size - 1; i >= 0; --i) { tempGram = CompiledGrammars[i]; int n = tempGram.GetN() - 1; if (n == 0) { temp = tempGram.GetValues(null); } else { ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n); string[] input = segment.ToArray(); if (tempGram.HasNextStep(input)) { temp = tempGram.GetValues(segment.ToArray()); } } if (temp != null) { foreach (KeyValuePair <string, float> kvp in temp) { if (gram.Grammar.ContainsKey(kvp.Key) == false) { gram.Grammar.Add(kvp.Key, kvp.Value * Weights[i]); } } } } cachedGram = gram.Compile();; cachedInData = inData; return(cachedGram); }
private void TestValues(ICompiledGram original, ICompiledGram clone, string[] key) { Assert.IsNotNull(original); Assert.IsNotNull(clone); Assert.AreEqual(original.GetN(), clone.GetN()); Dictionary <string, float> clonedValues = clone.GetValues(key); foreach (KeyValuePair <string, float> kvp in original.GetValues(key)) { Assert.IsTrue(clonedValues.ContainsKey(kvp.Key)); Assert.AreEqual(kvp.Value, clonedValues[kvp.Key]); } }
public void TestClone() { UniGram unigram = new UniGram(); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar); ICompiledGram clone = compiledUnigram.Clone(); Assert.AreEqual(compiledUnigram.GetN(), clone.GetN()); Assert.AreEqual(compiledUnigram.GetValues(null), clone.GetValues(null)); }
private bool FoundValue(ICompiledGram compiledGram, string expected, string[] input, int iterations) { bool found = false; for (int i = 0; i < iterations; ++i) { if (compiledGram.Get(input) == expected) { found = true; break; } } return(found); }
public ICompiledGram Clone() { int length = CompiledGrammars.Length; ICompiledGram[] compiledGrammars = new ICompiledGram[length]; float[] weights = new float[length]; for (int i = 0; i < length; ++i) { compiledGrammars[i] = CompiledGrammars[i].Clone(); weights[i] = Weights[i]; } return(new CompiledBackOffNGram(weights, compiledGrammars, weightMultiplier)); }
private static List <string> GenerateTree( ICompiledGram gram, CircularQueue <string> prior, int size, int index, List <string> acceptedTypes, Func <string, string> classifier) { string[] guesses = gram.GetGuesses(prior.ToArray()); foreach (string guess in guesses) { if (classifier != null && !classifier(guess).Equals(acceptedTypes[index])) { continue; } CircularQueue <string> newPrior = prior.Clone(); newPrior.Add(guess); if (size <= 1) { return(new List <string>() { guess }); } else if (gram.HasNextStep(newPrior.ToArray())) { List <string> returnVal = GenerateTree( gram, newPrior, size - 1, index + 1, acceptedTypes, classifier); if (returnVal != null) { returnVal.Insert(0, guess); return(returnVal); } } } return(null); }
public void TestSequenceProbability() { HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f); Assert.AreEqual( 0, gram.Compile().SequenceProbability(new string[] { "a", "b", "c" })); double denominator = 0.9 + 0.81 + 0.729; double triWeight = 0.9 / denominator; double biWeight = 0.81 / denominator; double uniweight = 0.729 / denominator; UniGram gram1 = new UniGram(); gram1.AddData(null, "a"); gram1.AddData(null, "a"); gram1.AddData(null, "b"); NGram gram2 = new NGram(2); gram2.AddData(new string[] { "a" }, "a"); gram2.AddData(new string[] { "a" }, "b"); NGram gram3 = new NGram(3); gram3.AddData(new string[] { "a", "a" }, "b"); gram.AddData(new string[] { "a", "a" }, "b"); ICompiledGram c1 = gram1.Compile(); ICompiledGram c2 = gram2.Compile(); ICompiledGram c3 = gram3.Compile(); string[] input = new string[] { "a", "a", "b" }; double expected = uniweight * c1.SequenceProbability(input) + biWeight * c2.SequenceProbability(input) + triWeight * c3.SequenceProbability(input); double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" }); Assert.IsTrue( Mathf.Approximately((float)expected, (float)actual), $"Expected {expected} but received {actual}."); }
public void TestGet() { HierarchicalNGram gram = new HierarchicalNGram(3, 0.6f); gram.AddData(new string[] { "b", "a" }, "a"); gram.AddData(new string[] { "b", "a" }, "c"); gram.AddData(new string[] { "b", "c" }, "c"); gram.AddData(new string[] { "a", "a" }, "d"); ICompiledGram compiledGram = gram.Compile(); Assert.IsTrue(FoundValue(compiledGram, "c", new string[] { "b", "a" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "d", new string[] { "b", "a" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "a", new string[] { "b", "a" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "c", new string[] { "b", "c" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "d", new string[] { "b", "c" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "a", new string[] { "b", "c" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "c", new string[] { "a", "a" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "d", new string[] { "a", "a" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "a", new string[] { "a", "a" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "c", new string[] { "z", "d" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "d", new string[] { "z", "d" }, 1000)); Assert.IsTrue(FoundValue(compiledGram, "a", new string[] { "z", "d" }, 1000)); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiledGram.Get(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiledGram.Get(new string[] { "z" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiledGram.Get(new string[] { "z", "a", "d" }); }); }
public void TestGetGuesses() { NGram ngram = new NGram(3); ICompiledGram compiledGram = ngram.Compile(); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiledGram.GetGuesses(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiledGram.GetGuesses(new string[] { "b" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiledGram.GetGuesses(new string[] { "b", "c", "d" }); }); string[] res = compiledGram.GetGuesses(new string[] { "b", "c" }); Assert.IsNotNull(res); Assert.AreEqual(0, res.Length); ngram.AddData(new string[] { "a", "b" }, "c"); string[] guesses = ngram.Compile().GetGuesses(new string[] { "a", "b" }); Assert.AreEqual(1, guesses.Length); Assert.AreEqual("c", guesses[0]); ngram.AddData(new string[] { "a", "b" }, "c"); ngram.AddData(new string[] { "a", "b" }, "d"); guesses = ngram.Compile().GetGuesses(new string[] { "a", "b" }); Assert.AreEqual(2, guesses.Length); Assert.IsTrue(guesses.Contains("c")); Assert.IsTrue(guesses.Contains("d")); ngram.AddData(new string[] { "a", "e" }, "e"); guesses = ngram.Compile().GetGuesses(new string[] { "a", "e" }); Assert.AreEqual(1, guesses.Length); Assert.AreEqual("e", guesses[0]); }
/// <summary> /// This is built around the idea that you can take input for an n-gram /// and simplify it into categories. Obviously this function cannot /// implement a generic classifier so it received a lambda which from /// the user which handles the implementation specific details. This /// generator will generate for that size output that only matches the /// specific type specified by accepted type. /// /// A possible extension would beo make acceptedType a list so that a user /// can say that any of the following types are acceptable. /// </summary> /// <param name="grammar"></param> /// <param name="startInput"></param> /// <param name="size"></param> /// <param name="acceptedTypes"></param> /// <param name="classifier"></param> /// <returns></returns> public static List <string> GenerateRestricted( ICompiledGram grammar, List <string> startInput, List <string> acceptedTypes, Func <string, string> classifier, bool includeStart = true) { Assert.IsNotNull(acceptedTypes); Assert.IsTrue(acceptedTypes.Count > 0); Assert.IsNotNull(classifier); Assert.IsNotNull(grammar); Assert.IsNotNull(startInput); Assert.IsTrue(startInput.Count > grammar.GetN() - 1); CircularQueue <string> queue = new CircularQueue <string>(grammar.GetN() - 1); queue.AddRange(startInput); List <string> outputLevel = GenerateTree( grammar, queue, acceptedTypes.Count - startInput.Count, startInput.Count, acceptedTypes, classifier); if (includeStart) { List <string> level = null; if (outputLevel != null) { level = new List <string>(startInput); level.AddRange(outputLevel); } return(level); } return(outputLevel); }
// @NOTE: this is used for simulation. Do not use outside of it. public static List <string> GenerateBestAttempt( ICompiledGram gram, List <string> start, int size, int maxAttempts) { List <string> best = null; for (int i = 0; i < maxAttempts; ++i) { CircularQueue <string> prior = new CircularQueue <string>(gram.GetN() - 1); prior.AddRange(start); List <string> output = new List <string>(); while (size > 0 && gram.HasNextStep(prior.ToArray())) { string nextToken = gram.Get(prior.ToArray()); output.Add(nextToken); prior.Add(nextToken); --size; } if (size == 0) { best = output; break; } if (best == null) { best = output; } else if (output.Count > best.Count) { best = output; } } return(best); }
public string Get(string[] inData) { Assert.IsNotNull(inData); Assert.AreEqual(n - 1, inData.Length); int length = inData.Length; for (int i = CompiledGrammars.Length - 1; i >= 0; --i) { ICompiledGram gram = CompiledGrammars[i]; int n = gram.GetN() - 1; ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n); string[] grammarGuesses = gram.GetGuesses(segment.ToArray()); if (grammarGuesses.Length > 0) { return(grammarGuesses[0]); } } return(null); }
/// <summary> /// This will generate a list of output where it assumes that there is a /// final state that can be reached. Like how a mario level always ends /// with a flag. If there is no final state this will be slow and fail. /// </summary> /// <param name="grammar"></param> /// <param name="startInput"></param> /// <param name="minSize"></param> /// <param name="maxSize"></param> /// <returns></returns> public static List <string> Generate( ICompiledGram grammar, List <string> startInput, int size, bool includeStart = true) { CircularQueue <string> queue = new CircularQueue <string>(grammar.GetN() - 1); queue.AddRange(startInput); List <string> outputLevel = GenerateTree(grammar, queue, size, 0, null, null); if (includeStart) { List <string> level = new List <string>(startInput); level.AddRange(outputLevel); return(level); } return(outputLevel); }
public void TestGetValues() { HierarchicalNGram uncompiled = new HierarchicalNGram(2, 0.6f); ICompiledGram compiled = uncompiled.Compile(); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b", "c" }); }); Assert.AreEqual(0, compiled.GetValues(new string[] { "a" }).Keys.Count); // Test with one entry a->c uncompiled.AddData(new string[] { "a" }, "c"); compiled = uncompiled.Compile(); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b", "c" }); }); float uniWeight = (0.36f / (0.6f + 0.36f)); float biWeight = (0.6f / (0.6f + 0.36f)); Dictionary <string, float> values = compiled.GetValues(new string[] { "z" }); Assert.AreEqual(2, values.Keys.Count); Assert.AreEqual(0.5f, values["a"]); Assert.AreEqual(0.5f, values["c"]); values = compiled.GetValues(new string[] { "a" }); Assert.AreEqual(2, values.Keys.Count); Assert.AreEqual(0.5 * uniWeight, values["a"]); Assert.AreEqual(biWeight + 0.5 * uniWeight, values["c"]); // test with three entries a->c, b->c & b->d uncompiled.AddData(new string[] { "b" }, "c"); uncompiled.AddData(new string[] { "b" }, "d"); compiled = uncompiled.Compile(); // in this case we haven't seen the prior "z" so we only have the // unigram to work with values = compiled.GetValues(new string[] { "z" }); Assert.AreEqual(4, values.Keys.Count); Assert.IsTrue(values.ContainsKey("a")); // 1 Assert.IsTrue(values.ContainsKey("b")); // 2 Assert.IsTrue(values.ContainsKey("c")); // 2 Assert.IsTrue(values.ContainsKey("d")); // 1 Assert.AreEqual(1 / 6f, values["a"]); Assert.AreEqual(2 / 6f, values["b"]); Assert.AreEqual(2 / 6f, values["c"]); Assert.AreEqual(1 / 6f, values["d"]); // we have the prior a, so we are working with it and the unigram values = compiled.GetValues(new string[] { "a" }); Assert.AreEqual(4, values.Keys.Count); Assert.IsTrue(values.ContainsKey("a")); Assert.IsTrue(values.ContainsKey("b")); Assert.IsTrue(values.ContainsKey("c")); Assert.IsTrue(values.ContainsKey("d")); Assert.AreEqual(1 / 6f * uniWeight, values["a"]); // only unigram Assert.AreEqual(2 / 6f * uniWeight, values["b"]); // only unigram Assert.AreEqual(biWeight + 2 / 6f * uniWeight, values["c"]); // uni-gram and bi-gram Assert.AreEqual(1 / 6f * uniWeight, values["d"]); // only unigram // we have the prior b, so we are working with it and the unigram values = compiled.GetValues(new string[] { "b" }); Assert.AreEqual(4, values.Keys.Count); Assert.IsTrue(values.ContainsKey("a")); Assert.IsTrue(values.ContainsKey("b")); Assert.IsTrue(values.ContainsKey("c")); Assert.IsTrue(values.ContainsKey("d")); Assert.AreEqual(1 / 6f * uniWeight, values["a"]); // only unigram Assert.AreEqual(2 / 6f * uniWeight, values["b"]); // only unigram Assert.AreEqual(0.5f * biWeight + 2 / 6f * uniWeight, values["c"]); // uni-gram and bi-gram Assert.AreEqual(0.5f * biWeight + 1 / 6f * uniWeight, values["d"]); // only unigram }
// @NOTE: this is used for simulation. Do not use outside of it. public static List <string> GenerateBestRestrictedAttempt( ICompiledGram gram, List <string> start, List <string> acceptedTypes, Func <string, string> classifier, int maxAttempts) { List <string> best = null; for (int attempt = 0; attempt < maxAttempts; ++attempt) { CircularQueue <string> prior = new CircularQueue <string>(gram.GetN() - 1); prior.AddRange(start); int acceptedTypeIndex = 0; List <string> output = new List <string>(); string token; int i; while (acceptedTypeIndex < acceptedTypes.Count && gram.HasNextStep(prior.ToArray())) { string[] tokens = gram.GetGuesses(prior.ToArray()); string nextToken = null; string acceptedType = acceptedTypes[acceptedTypeIndex]; for (i = 0; i < tokens.Length; ++i) { token = tokens[i]; if (classifier.Invoke(token).Equals(acceptedType)) { nextToken = token; } } if (nextToken != null) { output.Add(nextToken); prior.Add(nextToken); acceptedTypeIndex += 1; } else { break; } } if (output.Count == acceptedTypes.Count) { best = output; break; } if (best == null) { best = output; } else if (output.Count > best.Count) { best = output; } } return(best); }
private bool GenerateLevel() { List <List <char> > level = new List <List <char> >(); if (blackBoard.ConfigUI.Config.UsingSimplifiedNGram) { ICompiledGram compiledGram = simpleGrammar.Compile(); int levelIndex = levelColumns.RandomIndex(); List <string> simpleInput = simplifiedLevelColumns[levelIndex].GetRange (0, blackBoard.ConfigUI.Config.N + 7); blackBoard.LevelColumns = levelColumns[levelIndex].GetRange( 0, blackBoard.ConfigUI.Config.N + 7); blackBoard.SimpleLevelColumns = NGramGenerator.Generate( compiledGram, simpleInput, blackBoard.ConfigUI.Config.LevelSize); compiledGram = grammar.Compile(); blackBoard.LevelColumns = NGramGenerator.GenerateRestricted( compiledGram, blackBoard.LevelColumns, blackBoard.SimpleLevelColumns, (inColumn) => { return(LevelParser.ClassifyColumn( inColumn, blackBoard.ConfigUI.Config.Game)); }); } else { ICompiledGram compiledGram = grammar.Compile(); blackBoard.LevelColumns = NGramGenerator.Generate( compiledGram, levelColumns.RandomValue().GetRange(0, blackBoard.ConfigUI.Config.N + 7), blackBoard.ConfigUI.Config.LevelSize); } bool generationWorked = blackBoard.LevelColumns != null; if (generationWorked) { foreach (string column in blackBoard.LevelColumns) { level.Add(new List <char>(column)); } // add ending column to the level char flagChar = Tile.playerOneFinish.ToChar(); List <char> endingColumn = new List <char>(); for (int i = 0; i < level[0].Count; ++i) { endingColumn.Add(flagChar); } level.Add(endingColumn); blackBoard.LevelInfo = LevelLoader.Build(level, blackBoard.Tilemap, blackBoard.CameraFollow); } return(generationWorked); }
public void Execute() { string keyDirectory = Path.Combine(basePath, $"{extension}_{gram.GetN()}"); if (Directory.Exists(keyDirectory) == false) { Directory.CreateDirectory(keyDirectory); } StreamWriter writer = File.CreateText($"{keyDirectory}.txt"); writer.WriteLine("Sequence_Probability,Perplexity,Linearity_JSON_Positions,Leniency"); ICompiledGram compiled = gram.Compile(); ICompiledGram simpleCompiled = simplifiedGram?.Compile(); for (int i = 0; i < numSimulations; ++i) { UtilityRandom.SetSeed(new DateTime().Millisecond); Tuple <List <string>, List <string> > tuple; if (gram as NGram == null) { tuple = GetColumnsBestGuess(compiled, simpleCompiled); } else { tuple = GetColumnsSemiGuaranteed(compiled, simpleCompiled); } List <string> columns = tuple.Item1; List <string> simplified = tuple.Item2; string[] columnsArray = columns.ToArray(); List <int> positions = LevelAnalyzer.Positions(columnsArray); JsonArray jsonPositions = new JsonArray(); foreach (int pos in positions) { jsonPositions.Add(pos); } double sequenceProbability = compiled.SequenceProbability(columnsArray); writer.Write($"{sequenceProbability},"); if (sequenceProbability == 0) { writer.Write($"0,"); } else { writer.Write($"{1d/sequenceProbability},"); } writer.Write($"{jsonPositions},"); writer.Write($"{LevelAnalyzer.Leniency(simplified.ToArray())}\n"); StreamWriter levelWriter = File.CreateText(Path.Combine(keyDirectory, $"{i}.txt")); levelWriter.Write(string.Join("\n", columnsArray)); levelWriter.Flush(); levelWriter.Close(); if (i % 200 == 0) { writer.Flush(); } } writer.Flush(); writer.Close(); }
public void TestGetValues() { BackOffNGram uncompiled = new BackOffNGram(2, 0.6f); ICompiledGram compiled = uncompiled.Compile(); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b", "c" }); }); Assert.AreEqual(0, compiled.GetValues(new string[] { "a" }).Keys.Count); // Test with one entry a->c uncompiled.AddData(new string[] { "a" }, "c"); compiled = uncompiled.Compile(); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { compiled.GetValues(new string[] { "a", "b", "c" }); }); float uniWeight = (0.36f / (0.6f + 0.36f)); float biWeight = (0.6f / (0.6f + 0.36f)); Dictionary <string, float> values = compiled.GetValues(new string[] { "z" }); Assert.AreEqual(2, values.Keys.Count); Assert.AreEqual(0.5f, values["a"]); Assert.AreEqual(0.5f, values["c"]); values = compiled.GetValues(new string[] { "a" }); Assert.AreEqual(2, values.Keys.Count); float expected = 0.5f * uniWeight / (0.5f * uniWeight + biWeight); Assert.IsTrue( Mathf.Approximately(expected, values["a"]), $"Expected {expected} but received {values["a"]}."); expected = biWeight / (0.5f * uniWeight + biWeight); Assert.IsTrue( Mathf.Approximately(expected, values["c"]), $"Expected {expected} but received {values["c"]}."); // test with three entries a->c, b->c & b->d uncompiled.AddData(new string[] { "b" }, "c"); uncompiled.AddData(new string[] { "b" }, "d"); compiled = uncompiled.Compile(); // in this case we haven't seen the prior "z" so we only have the // unigram to work with values = compiled.GetValues(new string[] { "z" }); Assert.AreEqual(4, values.Keys.Count); Assert.IsTrue(values.ContainsKey("a")); // 1 Assert.IsTrue(values.ContainsKey("b")); // 2 Assert.IsTrue(values.ContainsKey("c")); // 2 Assert.IsTrue(values.ContainsKey("d")); // 1 Assert.AreEqual(1 / 6f, values["a"]); Assert.AreEqual(2 / 6f, values["b"]); Assert.AreEqual(2 / 6f, values["c"]); Assert.AreEqual(1 / 6f, values["d"]); }
public void TestGet() { NGram ngram = new NGram(2); ngram.AddData(new string[] { "b" }, "c"); ngram.AddData(new string[] { "b" }, "c"); ngram.AddData(new string[] { "b" }, "a"); ngram.AddData(new string[] { "b" }, "c"); ngram.AddData(new string[] { "a" }, "e"); ngram.AddData(new string[] { "a" }, "e"); ngram.AddData(new string[] { "a" }, "z"); ngram.AddData(new string[] { "a" }, "z"); ICompiledGram comipledGram = ngram.Compile(); bool seenA = false; bool seenC = false; bool seenE = false; bool seenZ = false; for (int i = 0; i < 500; ++i) { string val = comipledGram.Get(new string[] { "b" }); switch (val) { case "a": seenA = true; break; case "c": seenC = true; break; default: Assert.Fail($"{val} should not be possible."); break; } } for (int i = 0; i < 500; ++i) { string val = comipledGram.Get(new string[] { "a" }); switch (val) { case "e": seenE = true; break; case "z": seenZ = true; break; default: Assert.Fail($"{val} should not be possible."); break; } } // in theory, we could potentially not see one of these but it is very unlikely. Assert.IsTrue(seenA); Assert.IsTrue(seenC); Assert.IsTrue(seenE); Assert.IsTrue(seenZ); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { comipledGram.Get(null); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { comipledGram.Get(new string[] { "b", "c" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { comipledGram.Get(new string[] { "b", "c", "d" }); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { comipledGram.Get(new string[] { "z" }); }); }