private ICompiledGram GetCompiledUniGram(string[] inData, int size) { Assert.IsNotNull(inData); Assert.AreEqual(size - 1, inData.Length); if (cachedInData == inData) { return(cachedGram); } Dictionary <string, float> temp = null; ICompiledGram tempGram; UniGram gram = new UniGram(); int length = inData.Length; Assert.IsTrue(length == size - 1); for (int i = size - 1; i >= 0; --i) { tempGram = CompiledGrammars[i]; int n = tempGram.GetN() - 1; if (n == 0) { temp = tempGram.GetValues(null); } else { ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n); string[] input = segment.ToArray(); if (tempGram.HasNextStep(input)) { temp = tempGram.GetValues(segment.ToArray()); } } if (temp != null) { foreach (KeyValuePair <string, float> kvp in temp) { if (gram.Grammar.ContainsKey(kvp.Key) == false) { gram.Grammar.Add(kvp.Key, kvp.Value * Weights[i]); } } } } cachedGram = gram.Compile();; cachedInData = inData; return(cachedGram); }
/// <summary> /// For some set of input columns, this function will go through every /// grammar and input the correct number of columns. For a unigram that /// means no columns. For a bi-gram that means the last column only. /// For the largest n-gram, that means the entire n-gram. For every /// one of these, it will call GetValues on the compiled grammar and /// multiply the result by the pre-calculated weights. The UniGram is /// returned. Mostl likely this will be immediately compiled. /// /// If speed is a concern, then the results can be held in a cache by /// first converting in data into a comma separated string and then /// use a Dictionary. /// </summary> /// <param name="inData"></param> /// <returns></returns> private UniGram GetUniGram(string[] inData) { Assert.IsNotNull(inData); Assert.AreEqual(n - 1, inData.Length); if (cachedInData == inData) { return(cachedUniGram); } UniGram grammar = new UniGram(); int length = inData.Length; Assert.IsTrue(length == n - 1); foreach (ICompiledGram gram in CompiledGrammars) { Dictionary <string, float> grammarValues = null; int n = gram.GetN() - 1; if (n == 0) { // unigram special case grammarValues = gram.GetValues(null); } else { // n-gram generic case ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n); string[] input = segment.ToArray(); if (gram.HasNextStep(input)) { grammarValues = gram.GetValues(segment.ToArray()); } } if (grammarValues != null) { foreach (KeyValuePair <string, float> kvp in grammarValues) { grammar.AddData(kvp.Key, kvp.Value * Weights[n]); } } } cachedUniGram = grammar; cachedInData = inData; return(grammar); }
public void AddGrammar(IGram gram) { Assert.IsTrue(gram.GetN() == N); NGram ngram = (NGram)gram; foreach (string key in ngram.Grammar.Keys) { if (Grammar.ContainsKey(key) == false) { Grammar[key] = new UniGram(); } Grammar[key].AddGrammar(ngram.Grammar[key]); } }
public void AddData(string[] inData, string outData) { Assert.IsTrue(inData.Length == N - 1); string key = string.Join(",", inData); if (Grammar.ContainsKey(key)) { Grammar[key].AddData(null, outData); } else { UniGram uniGram = new UniGram(); uniGram.AddData(null, outData); Grammar[key] = uniGram; } }
public HierarchicalNGram(int n, float compiledMemoryUpdate) { Assert.IsTrue(compiledMemoryUpdate > 0); Assert.IsTrue(compiledMemoryUpdate < 1); Assert.IsTrue(n > 1); CompiledMemoryUpdate = compiledMemoryUpdate; N = n; Grammars = new IGram[n]; Grammars[0] = new UniGram(); for (int grammarSize = 2; grammarSize <= n; ++grammarSize) { Grammars[grammarSize - 1] = new NGram(grammarSize); } }
public static IGram InitGrammar(int n) { Assert.IsTrue(n >= 1); IGram gram; if (n == 1) { gram = new UniGram(); } else { gram = new NGram(n); } return(gram); }
public static IGram InitBackOffNGram(int n, float weightMultiplier) { Assert.IsTrue(n >= 1); IGram gram; if (n == 1) { gram = new UniGram(); } else { gram = new BackOffNGram(n, weightMultiplier); } return(gram); }
public void AddGrammar(IGram gram) { Assert.IsTrue(gram.GetN() == 1); UniGram unigram = (UniGram)gram; foreach (KeyValuePair <string, float> keyValue in unigram.Grammar) { if (Grammar.ContainsKey(keyValue.Key) == false) { Grammar[keyValue.Key] = keyValue.Value; } else { Grammar[keyValue.Key] += keyValue.Value; } } }