Beispiel #1
0
        private ICompiledGram GetCompiledUniGram(string[] inData, int size)
        {
            Assert.IsNotNull(inData);
            Assert.AreEqual(size - 1, inData.Length);

            if (cachedInData == inData)
            {
                return(cachedGram);
            }

            Dictionary <string, float> temp = null;
            ICompiledGram tempGram;
            UniGram       gram = new UniGram();

            int length = inData.Length;

            Assert.IsTrue(length == size - 1);

            for (int i = size - 1; i >= 0; --i)
            {
                tempGram = CompiledGrammars[i];
                int n = tempGram.GetN() - 1;

                if (n == 0)
                {
                    temp = tempGram.GetValues(null);
                }
                else
                {
                    ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n);
                    string[] input = segment.ToArray();
                    if (tempGram.HasNextStep(input))
                    {
                        temp = tempGram.GetValues(segment.ToArray());
                    }
                }

                if (temp != null)
                {
                    foreach (KeyValuePair <string, float> kvp in temp)
                    {
                        if (gram.Grammar.ContainsKey(kvp.Key) == false)
                        {
                            gram.Grammar.Add(kvp.Key, kvp.Value * Weights[i]);
                        }
                    }
                }
            }

            cachedGram   = gram.Compile();;
            cachedInData = inData;

            return(cachedGram);
        }
        /// <summary>
        /// For some set of input columns, this function will go through every
        /// grammar and input the correct number of columns. For a unigram that
        /// means no columns. For a bi-gram that means the last column only.
        /// For the largest n-gram, that means the entire n-gram. For every
        /// one of these, it will call GetValues on the compiled grammar and
        /// multiply the result by the pre-calculated weights. The UniGram is
        /// returned. Mostl likely this will be immediately compiled.
        ///
        /// If speed is a concern, then the results can be held in a cache by
        /// first converting in data into a comma separated string and then
        /// use a Dictionary.
        /// </summary>
        /// <param name="inData"></param>
        /// <returns></returns>
        private UniGram GetUniGram(string[] inData)
        {
            Assert.IsNotNull(inData);
            Assert.AreEqual(n - 1, inData.Length);

            if (cachedInData == inData)
            {
                return(cachedUniGram);
            }

            UniGram grammar = new UniGram();
            int     length  = inData.Length;

            Assert.IsTrue(length == n - 1);

            foreach (ICompiledGram gram in CompiledGrammars)
            {
                Dictionary <string, float> grammarValues = null;
                int n = gram.GetN() - 1;

                if (n == 0)
                {
                    // unigram special case
                    grammarValues = gram.GetValues(null);
                }
                else
                {
                    // n-gram generic case
                    ArraySegment <string> segment = new ArraySegment <string>(inData, length - n, n);
                    string[] input = segment.ToArray();
                    if (gram.HasNextStep(input))
                    {
                        grammarValues = gram.GetValues(segment.ToArray());
                    }
                }

                if (grammarValues != null)
                {
                    foreach (KeyValuePair <string, float> kvp in grammarValues)
                    {
                        grammar.AddData(kvp.Key, kvp.Value * Weights[n]);
                    }
                }
            }

            cachedUniGram = grammar;
            cachedInData  = inData;

            return(grammar);
        }
Beispiel #3
0
        public void AddGrammar(IGram gram)
        {
            Assert.IsTrue(gram.GetN() == N);
            NGram ngram = (NGram)gram;

            foreach (string key in ngram.Grammar.Keys)
            {
                if (Grammar.ContainsKey(key) == false)
                {
                    Grammar[key] = new UniGram();
                }

                Grammar[key].AddGrammar(ngram.Grammar[key]);
            }
        }
Beispiel #4
0
        public void AddData(string[] inData, string outData)
        {
            Assert.IsTrue(inData.Length == N - 1);
            string key = string.Join(",", inData);

            if (Grammar.ContainsKey(key))
            {
                Grammar[key].AddData(null, outData);
            }
            else
            {
                UniGram uniGram = new UniGram();
                uniGram.AddData(null, outData);
                Grammar[key] = uniGram;
            }
        }
Beispiel #5
0
        public HierarchicalNGram(int n, float compiledMemoryUpdate)
        {
            Assert.IsTrue(compiledMemoryUpdate > 0);
            Assert.IsTrue(compiledMemoryUpdate < 1);
            Assert.IsTrue(n > 1);

            CompiledMemoryUpdate = compiledMemoryUpdate;
            N = n;

            Grammars    = new IGram[n];
            Grammars[0] = new UniGram();
            for (int grammarSize = 2; grammarSize <= n; ++grammarSize)
            {
                Grammars[grammarSize - 1] = new NGram(grammarSize);
            }
        }
Beispiel #6
0
        public static IGram InitGrammar(int n)
        {
            Assert.IsTrue(n >= 1);

            IGram gram;

            if (n == 1)
            {
                gram = new UniGram();
            }
            else
            {
                gram = new NGram(n);
            }

            return(gram);
        }
Beispiel #7
0
        public static IGram InitBackOffNGram(int n, float weightMultiplier)
        {
            Assert.IsTrue(n >= 1);

            IGram gram;

            if (n == 1)
            {
                gram = new UniGram();
            }
            else
            {
                gram = new BackOffNGram(n, weightMultiplier);
            }

            return(gram);
        }
Beispiel #8
0
        public void AddGrammar(IGram gram)
        {
            Assert.IsTrue(gram.GetN() == 1);
            UniGram unigram = (UniGram)gram;

            foreach (KeyValuePair <string, float> keyValue in unigram.Grammar)
            {
                if (Grammar.ContainsKey(keyValue.Key) == false)
                {
                    Grammar[keyValue.Key] = keyValue.Value;
                }
                else
                {
                    Grammar[keyValue.Key] += keyValue.Value;
                }
            }
        }