private static void Analyze(string corpusPath, LanguageModel lm) { Console.WriteLine("Corpus path: {0}", corpusPath); Console.WriteLine(); Console.WriteLine("Splitting corpus."); List<List<string>> splitCorpus = SplitCorpus(corpusPath,80, 10, 10); Console.WriteLine("Splitted corpus as follow:"); Console.WriteLine("Training: {0}", splitCorpus[0].Count); Console.WriteLine("Validate: {0}", splitCorpus[1].Count); Console.WriteLine("Test: {0}", splitCorpus[2].Count); Console.WriteLine("Training model."); lm.Train(splitCorpus[0]); Console.WriteLine("Calculate Perplextiy with validate set."); PerplexityCalculator perplexityCalculator = new PerplexityCalculator(lm); int unkWords = 0; double perplexity = perplexityCalculator.GetPerplexity(splitCorpus[1], out unkWords); Console.WriteLine("Found {0} unknown words:", unkWords); Console.WriteLine("Perplexity of validation is {0}", perplexity); perplexity = perplexityCalculator.GetPerplexity(splitCorpus[2], out unkWords); Console.WriteLine("Found {0} unknown words:", unkWords); Console.WriteLine("Perplexity of testing is {0}", perplexity); }