Ejemplo n.º 1
0
        private static void Analyze(string corpusPath, LanguageModel lm)
        {
            Console.WriteLine("Corpus path: {0}", corpusPath);
            Console.WriteLine();

            Console.WriteLine("Splitting corpus.");
            List<List<string>> splitCorpus = SplitCorpus(corpusPath,80, 10, 10);
            Console.WriteLine("Splitted corpus as follow:");
            Console.WriteLine("Training: {0}", splitCorpus[0].Count);
            Console.WriteLine("Validate: {0}", splitCorpus[1].Count);
            Console.WriteLine("Test: {0}", splitCorpus[2].Count);

            Console.WriteLine("Training model.");
            lm.Train(splitCorpus[0]);

            Console.WriteLine("Calculate Perplextiy with validate set.");
            PerplexityCalculator perplexityCalculator = new PerplexityCalculator(lm);
            int unkWords = 0;
            double perplexity = perplexityCalculator.GetPerplexity(splitCorpus[1], out unkWords);
            Console.WriteLine("Found {0} unknown words:", unkWords);
            Console.WriteLine("Perplexity of validation is {0}", perplexity);

            perplexity = perplexityCalculator.GetPerplexity(splitCorpus[2], out unkWords);
            Console.WriteLine("Found {0} unknown words:", unkWords);
            Console.WriteLine("Perplexity of testing is {0}", perplexity);
        }