예제 #1
0
        public static void Parse(string[] args)
        {
            if (args == null) throw new ArgumentNullException("args");
            if (args.Length < 2)
            {
                ShowUsage();
                return;
            }

            CorpusPath = args[0];
            switch (args[1].ToUpperInvariant())
            {
                case "LINEARINTERPOLATION":
                    LanguageModel = new LinearInterpolationModel();
                    break;
                case "BACKOFF":
                    LanguageModel = new ExampleBackOffModelWithDiscounting();
                    break;
                default:
                    ShowUsage();
                    break;
            }

            if (args.Length > 2)
            {
                if (args.Length < 5)
                    ShowUsage();

                TrainingPercentage = double.Parse(args[2]);
                ValidatePercentage = double.Parse(args[3]);
                TestPercentage = double.Parse(args[4]);
            }
            else
            {
                TrainingPercentage = 80;
                ValidatePercentage = 10;
                TestPercentage = 10;
            }

            if (args.Length > 6)
            {
                if (args.Length < 7)
                    ShowUsage();
                if (string.Equals(args[5], "Optimize", StringComparison.OrdinalIgnoreCase))
                {
                    Optimize = true;
                    OptimzeValue = int.Parse(args[6]);
                }
            }
        }
예제 #2
0
파일: Program.cs 프로젝트: elendil326/UWNLP
        static void Main(string[] args)
        {
            //ArgumentParser.Parse(args);
            //if (ArgumentParser.ShowedUsage) return;

            string brownCorpus = @"C:\Users\azend\Documents\GitHubVisualStudio\UWNLP\Assignment1\LanguageModels.UnitTests\TestData\brown.txt";
            string gutenberg = @"C:\Users\azend\Documents\GitHubVisualStudio\UWNLP\Assignment1\LanguageModels.UnitTests\TestData\gutenberg.txt";
            string reuters = @"C:\Users\azend\Documents\GitHubVisualStudio\UWNLP\Assignment1\LanguageModels.UnitTests\TestData\reuters.txt";

            // Analyze with Linear interpolation
            Console.WriteLine("First analyzing lambas");
            LinearInterpolationModel linearModel = new LinearInterpolationModel();
            Analyze(brownCorpus, linearModel);
            linearModel = new LinearInterpolationModel();
            Analyze(gutenberg, linearModel);
            linearModel = new LinearInterpolationModel();
            Analyze(reuters, linearModel);

            // Analyze with back-off
            Console.WriteLine("Analyze back-off");
            ExampleBackOffModelWithDiscounting backOff = new ExampleBackOffModelWithDiscounting();
            Analyze(brownCorpus, backOff);
            backOff = new ExampleBackOffModelWithDiscounting();
            Analyze(gutenberg, backOff);
            backOff = new ExampleBackOffModelWithDiscounting();
            Analyze(reuters, backOff);

            // Optimze Lambda
            List<List<string>> splitBrownCorpus = SplitCorpus(brownCorpus, 80, 10, 10);
            Console.WriteLine("Running optimizer with validation set for Brown.", 1000);
            linearModel = new LinearInterpolationModel();
            List<DoubleCombination> optimumCombinations = Optimizer.GetOptimumCombination(1000, linearModel, splitBrownCorpus[1]);

            Console.WriteLine("These are the optimum combinations:");
            foreach (DoubleCombination combination in optimumCombinations)
            {
                Console.WriteLine(combination.ToString());
            }

            // Optimze Lambda
            List<List<string>> splitGutenbergsCorpus = SplitCorpus(gutenberg, 80, 10, 10);
            Console.WriteLine("Running optimizer with validation set for Gutenberg.", 1000);
            linearModel = new LinearInterpolationModel();
            optimumCombinations = Optimizer.GetOptimumCombination(1000, linearModel, splitGutenbergsCorpus[1]);

            Console.WriteLine("These are the optimum combinations:");
            foreach (DoubleCombination combination in optimumCombinations)
            {
                Console.WriteLine(combination.ToString());
            }

            // Optimze Lambda
            List<List<string>> splitReutersCorpus = SplitCorpus(reuters, 80, 10, 10);
            Console.WriteLine("Running optimizer with validation set for Reuters.", 1000);
            linearModel = new LinearInterpolationModel();
            optimumCombinations = Optimizer.GetOptimumCombination(1000, linearModel, splitReutersCorpus[1]);

            Console.WriteLine("These are the optimum combinations:");
            foreach (DoubleCombination combination in optimumCombinations)
            {
                Console.WriteLine(combination.ToString());
            }

            // Train and test in differnet corpus
            Console.WriteLine("Train in Reuters, test in Brown");
            linearModel = new LinearInterpolationModel();
            Analyze(reuters, brownCorpus, linearModel);
            IEnumerable<KeyValuePair<NGram, int>> commonTrigrams = linearModel.NGramCounter.NGramCountDictionaries[3].OrderByDescending(kvp => kvp.Value).Take(100);
            Console.WriteLine("------------ Common 100 Trigrams -------");
            foreach (KeyValuePair<NGram, int> kvp in commonTrigrams)
            {
                Console.WriteLine("{0}\t\t{1}", kvp.Key, kvp.Value);
            }
            Console.WriteLine();

            // Train and test in differnet corpus
            Console.WriteLine("Train in Brown, test in Gutenberg");
            linearModel = new LinearInterpolationModel();
            Analyze(brownCorpus, gutenberg, linearModel);
            commonTrigrams = linearModel.NGramCounter.NGramCountDictionaries[3].OrderByDescending(kvp => kvp.Value).Take(100);
            Console.WriteLine("------------ Common 100 Trigrams -------");
            foreach (KeyValuePair<NGram, int> kvp in commonTrigrams)
            {
                Console.WriteLine("{0}\t\t{1}", kvp.Key, kvp.Value);
            }
            Console.WriteLine();

            // Train and test in differnet corpus
            Console.WriteLine("Train in Gutenbergs, test in Reuters");
            linearModel = new LinearInterpolationModel();
            Analyze(gutenberg, reuters, linearModel);
            commonTrigrams = linearModel.NGramCounter.NGramCountDictionaries[3].OrderByDescending(kvp => kvp.Value).Take(100);
            Console.WriteLine("------------ Common 100 Trigrams -------");
            foreach (KeyValuePair<NGram, int> kvp in commonTrigrams)
            {
                Console.WriteLine("{0}\t\t{1}", kvp.Key, kvp.Value);
            }
            Console.WriteLine();
        }