예제 #1
0
        public static double CalculatePerplexity(ILanguageModel model, CorpusParsingResult trainingCorpus, StringParsingResult testCorpus, out TestStats testStats)
        {
            testStats = new TestStats
            {
                UniqueUnksFound = new HashSet<string>(),
                UniqueWordsFound = new HashSet<string>()
            };

            double logSumOfCorpus = 0;
            for (int k = 0; k < testCorpus.Sentences.Count; k++)
            {
                Sentence sentence = testCorpus.Sentences[k];
                double logOfSentence = 0;
                string previousWord = Constants.Start;
                string previousPreviousWord = Constants.Start;

                testStats.TotalSentencesFound++;
                for (int i = 0; i < sentence.Words.Length; i++)
                {
                    string calculatedWord = sentence.Words[i];
                    if (!trainingCorpus.UniqueWords.ContainsKey(sentence.Words[i]))
                    {
                        calculatedWord = Constants.Unknown;
                        testStats.TotalUnksFound++;
                        testStats.UniqueUnksFound.Add(sentence.Words[i]);
                    }
                    testStats.TotalWordsFound++;
                    testStats.UniqueWordsFound.Add(calculatedWord);

                    double modelP = model.P(previousPreviousWord, previousWord, calculatedWord);
                    double logModelP = Math.Log(modelP, 2);
                    logOfSentence += logModelP;

                    previousPreviousWord = previousWord;
                    previousWord = calculatedWord;
                }

                if (Double.IsInfinity(logOfSentence))
                {
                    throw new InvalidOperationException();
                }
                logSumOfCorpus += logOfSentence;
                if (Double.IsInfinity(logSumOfCorpus))
                {
                    throw new InvalidOperationException();
                }

                if (model is Problem1Model && k % 100 == 0)
                {
                    Console.WriteLine("Now at sentence {0}/{1}", k, testCorpus.Sentences.Count);
                }
            }

            double sum = logSumOfCorpus / testCorpus.TotalWordCount;
            return Math.Pow(2, -1*sum);
        }
예제 #2
0
        private static double CalculatePerplexityWrapper(ILanguageModel model, CorpusParsingResult trainingCorpus, StringParsingResult testCorpus)
        {
            Perplexity.TestStats testStats;
            double perplexity = Perplexity.CalculatePerplexity(model, trainingCorpus, testCorpus, out testStats);

            Console.WriteLine("{0}\tPerplexity", perplexity);
            Console.WriteLine("Test stats:");
            Console.WriteLine(testStats.ToString());
            return perplexity;
        }
예제 #3
0
        private static double CalculateProblem1ModelPerplexityOnTestCorpus(CorpusParsingResult trainingCorpus, ReadCorpusResult evaluatingCorpus, bool development)
        {
            Console.WriteLine("Calculating Perplexity after training on {0}", trainingCorpus.CorpusName);
            Console.WriteLine("Calculating perplexity for {0}", evaluatingCorpus.CorpusName);

            Console.WriteLine("{0}\tProblem", evaluatingCorpus.CorpusName.ToString());

            ILanguageModel model = new Problem1Model(trainingCorpus);
            Console.WriteLine("{0}\tModel", model.GetModelName());
            StringParsingResult testCorpus = CorpusParsing.ParseString(development ? evaluatingCorpus.Development : evaluatingCorpus.Evaluation);
            double perplexity = CalculatePerplexityWrapper(model, trainingCorpus, testCorpus);

            Console.WriteLine("============================================================");

            return perplexity;
        }
예제 #4
0
 public TrigramModel(CorpusParsingResult result)
 {
     _result = result;
 }
예제 #5
0
 public LinearModel(CorpusParsingResult result)
 {
     _result = result;
 }
예제 #6
0
 public Problem1Model(CorpusParsingResult result)
 {
     _result = result;
 }