public static double CalculatePerplexity(ILanguageModel model, CorpusParsingResult trainingCorpus, StringParsingResult testCorpus, out TestStats testStats) { testStats = new TestStats { UniqueUnksFound = new HashSet<string>(), UniqueWordsFound = new HashSet<string>() }; double logSumOfCorpus = 0; for (int k = 0; k < testCorpus.Sentences.Count; k++) { Sentence sentence = testCorpus.Sentences[k]; double logOfSentence = 0; string previousWord = Constants.Start; string previousPreviousWord = Constants.Start; testStats.TotalSentencesFound++; for (int i = 0; i < sentence.Words.Length; i++) { string calculatedWord = sentence.Words[i]; if (!trainingCorpus.UniqueWords.ContainsKey(sentence.Words[i])) { calculatedWord = Constants.Unknown; testStats.TotalUnksFound++; testStats.UniqueUnksFound.Add(sentence.Words[i]); } testStats.TotalWordsFound++; testStats.UniqueWordsFound.Add(calculatedWord); double modelP = model.P(previousPreviousWord, previousWord, calculatedWord); double logModelP = Math.Log(modelP, 2); logOfSentence += logModelP; previousPreviousWord = previousWord; previousWord = calculatedWord; } if (Double.IsInfinity(logOfSentence)) { throw new InvalidOperationException(); } logSumOfCorpus += logOfSentence; if (Double.IsInfinity(logSumOfCorpus)) { throw new InvalidOperationException(); } if (model is Problem1Model && k % 100 == 0) { Console.WriteLine("Now at sentence {0}/{1}", k, testCorpus.Sentences.Count); } } double sum = logSumOfCorpus / testCorpus.TotalWordCount; return Math.Pow(2, -1*sum); }
private static double CalculatePerplexityWrapper(ILanguageModel model, CorpusParsingResult trainingCorpus, StringParsingResult testCorpus) { Perplexity.TestStats testStats; double perplexity = Perplexity.CalculatePerplexity(model, trainingCorpus, testCorpus, out testStats); Console.WriteLine("{0}\tPerplexity", perplexity); Console.WriteLine("Test stats:"); Console.WriteLine(testStats.ToString()); return perplexity; }
private static double CalculateProblem1ModelPerplexityOnTestCorpus(CorpusParsingResult trainingCorpus, ReadCorpusResult evaluatingCorpus, bool development) { Console.WriteLine("Calculating Perplexity after training on {0}", trainingCorpus.CorpusName); Console.WriteLine("Calculating perplexity for {0}", evaluatingCorpus.CorpusName); Console.WriteLine("{0}\tProblem", evaluatingCorpus.CorpusName.ToString()); ILanguageModel model = new Problem1Model(trainingCorpus); Console.WriteLine("{0}\tModel", model.GetModelName()); StringParsingResult testCorpus = CorpusParsing.ParseString(development ? evaluatingCorpus.Development : evaluatingCorpus.Evaluation); double perplexity = CalculatePerplexityWrapper(model, trainingCorpus, testCorpus); Console.WriteLine("============================================================"); return perplexity; }
public TrigramModel(CorpusParsingResult result) { _result = result; }
public LinearModel(CorpusParsingResult result) { _result = result; }
public Problem1Model(CorpusParsingResult result) { _result = result; }