Ejemplo n.º 1
0
        /// <summary>
        ///     Creates and returns a new NGramDictionary from a string.<br />
        ///     This string must be produced by the ToString() method of a NGramDictionary object.
        /// </summary>
        /// <param name="str">A string produced by the ToString() method of a NGramDictionary object.</param>
        /// <returns>A new NGramDictionary object</returns>
        public static NGramDictionary DeserializeFrom(string str)
        {
            string[] lines    = str.Split('\n');
            int      minNGram = Int32.Parse(lines[0].Split('\t')[0]);
            int      maxNGram = Int32.Parse(lines[0].Split('\t')[1]);

            IDictionary <NGram, int> nGrams = new Dictionary <NGram, int>();
            var extractor = new NGramExtractor(minNGram, maxNGram);

            foreach (string line in lines.Skip(1))
            {
                string[] row = line.Split('\t');

                var nGram = new NGram(row[0].Split(null));

                int freq = Int32.Parse(row[1]);

                if (!nGrams.ContainsKey(nGram))
                {
                    nGrams.Add(nGram, freq);
                }
            }

            return(new NGramDictionary(extractor, nGrams));
        }
Ejemplo n.º 2
0
        /// <summary>
        ///     returns frequency of the n-gram which consists of nGramTokens
        /// </summary>
        /// <param name="nGramTokens">Tokens of the n-gram</param>
        /// <returns></returns>
        public int GetFrequency(params string[] nGramTokens)
        {
            Validate(nGramTokens);
            var nGram = new NGram(nGramTokens);

            return(GetFrequency(nGram));
        }
Ejemplo n.º 3
0
 /// <summary>
 ///     returns requency of the n-gram
 /// </summary>
 /// <param name="nGram"></param>
 /// <returns></returns>
 public int GetFrequency(NGram nGram)
 {
     if (nGrams.ContainsKey(nGram))
     {
         return(nGrams[nGram]);
     }
     return(0);
 }
Ejemplo n.º 4
0
        private IEnumerable <NGram> GetNGrams(IList <string> tokens, int index)
        {
            var nGrams        = new List <NGram>();
            int maxWindowSize = Math.Min(tokens.Count() - index, maxNGramSize);

            for (int windowSize = minNGramSize; windowSize <= maxWindowSize; windowSize++)
            {
                NGram nGram = GetNGram(tokens, index, windowSize);
                nGrams.Add(nGram);
            }
            return(nGrams);
        }
Ejemplo n.º 5
0
        public double GetMLE(NGram denominatorNGram, NGram nominatorNGram)
        {
            int nom   = nGramDictionary.GetFrequency(nominatorNGram);
            int denom = nGramDictionary.GetFrequency(denominatorNGram);

            if (denom == 0)
            {
                return(0);
            }

            return(nom / (double)denom);
        }
Ejemplo n.º 6
0
        public double GetSentenceProbability(IList <string> tokens)
        {
            if (maxNGramSize == 1)
            {
                return(GetSentenceProbabilityForUnigrams(tokens));
            }

            List <string> tokenList = tokens.ToList();

            AddStartStopSymbols(tokenList);

            var           ext    = new NGramExtractor(maxNGramSize - 1, maxNGramSize);
            IList <NGram> nGrams = ext.ExtractAsList(tokenList);

            nGrams.RemoveAt(nGrams.Count - 1);

            double logP = 0;

            for (int i = 0; i < nGrams.Count; i += 2)
            {
                double p = GetMLE(nGrams[i], nGrams[i + 1]);

                logP += Math.Log10(p);

                NGram x = nGrams[i];

                //Console.WriteLine(nGrams[i + 1] + "/" + nGrams[i] + ":" + Math.Log10(p));
                //Console.WriteLine(nGrams[i] + ":" + Math.Log10(GetUnigramMLE(nGrams[i])));
            }

            //double p = 1;
            //for (int i = 0; i < nGrams.Count; i += 2)
            //{
            //    p *= GetMLE(nGrams[i], nGrams[i + 1]);
            //}

            //return p;

            return(logP);
        }
Ejemplo n.º 7
0
 public double GetUnigramMLE(NGram nGram)
 {
     return(nGramDictionary.GetFrequency(nGram) / (double)tokenCount);
 }
Ejemplo n.º 8
0
 private bool Equals(NGram other)
 {
     return(Tokens.SequenceEqual(other.Tokens));
 }
Ejemplo n.º 9
-1
        /// <summary>
        ///     Creates and returns a new NGramDictionary from a string.<br />
        ///     This string must be produced by the ToString() method of a NGramDictionary object.
        /// </summary>
        /// <param name="str">A string produced by the ToString() method of a NGramDictionary object.</param>
        /// <returns>A new NGramDictionary object</returns>
        public static NGramDictionary DeserializeFrom(string str)
        {
            string[] lines = str.Split('\n');
            int minNGram = Int32.Parse(lines[0].Split('\t')[0]);
            int maxNGram = Int32.Parse(lines[0].Split('\t')[1]);

            IDictionary<NGram, int> nGrams = new Dictionary<NGram, int>();
            var extractor = new NGramExtractor(minNGram, maxNGram);

            foreach (string line in lines.Skip(1))
            {
                string[] row = line.Split('\t');

                var nGram = new NGram(row[0].Split(null));

                int freq = Int32.Parse(row[1]);

                if (!nGrams.ContainsKey(nGram))
                {
                    nGrams.Add(nGram, freq);
                }
            }

            return new NGramDictionary(extractor, nGrams);
        }
Ejemplo n.º 10
-1
        public void TestEquals()
        {
            var trigram1 = new NGram(new List<string> {"one", "two", "three"});
            var trigram2 = new NGram(new List<string> {"one", "two", "three"});

            Assert.AreEqual(trigram1, trigram2);
            Assert.AreEqual(trigram1.GetHashCode(), trigram2.GetHashCode());

            trigram1 = new NGram(new List<string> {"two", "one", "three"});
            trigram2 = new NGram(new List<string> {"one", "two", "three"});

            Assert.AreNotEqual(trigram1, trigram2);
            Assert.AreNotEqual(trigram1.GetHashCode(), trigram2.GetHashCode());

            trigram1 = new NGram("one", "two", "three");
            trigram2 = new NGram("one", "two", "three");

            Assert.AreEqual(trigram1, trigram2);
            Assert.AreEqual(trigram1.GetHashCode(), trigram2.GetHashCode());

            trigram1 = new NGram("two", "one", "three");
            trigram2 = new NGram("one", "two", "three");

            Assert.AreNotEqual(trigram1, trigram2);
            Assert.AreNotEqual(trigram1.GetHashCode(), trigram2.GetHashCode());
        }
Ejemplo n.º 11
-1
        public double GetMLE(NGram denominatorNGram, NGram nominatorNGram)
        {
            int nom = nGramDictionary.GetFrequency(nominatorNGram);
            int denom = nGramDictionary.GetFrequency(denominatorNGram);

            if (denom == 0)
            {
                return 0;
            }

            return nom/(double) denom;
        }
Ejemplo n.º 12
-1
 /// <summary>
 ///     returns requency of the n-gram
 /// </summary>
 /// <param name="nGram"></param>
 /// <returns></returns>
 public int GetFrequency(NGram nGram)
 {
     if (nGrams.ContainsKey(nGram))
     {
         return nGrams[nGram];
     }
     return 0;
 }
Ejemplo n.º 13
-1
 /// <summary>
 ///     returns frequency of the n-gram which consists of nGramTokens
 /// </summary>
 /// <param name="nGramTokens">Tokens of the n-gram</param>
 /// <returns></returns>
 public int GetFrequency(params string[] nGramTokens)
 {
     Validate(nGramTokens);
     var nGram = new NGram(nGramTokens);
     return GetFrequency(nGram);
 }
Ejemplo n.º 14
-1
 private bool Equals(NGram other)
 {
     return Tokens.SequenceEqual(other.Tokens);
 }
Ejemplo n.º 15
-1
 public double GetUnigramMLE(NGram nGram)
 {
     return nGramDictionary.GetFrequency(nGram)/(double) tokenCount;
 }