public static void Parse(string[] args) { if (args == null) throw new ArgumentNullException("args"); if (args.Length < 2) { ShowUsage(); return; } CorpusPath = args[0]; switch (args[1].ToUpperInvariant()) { case "LINEARINTERPOLATION": LanguageModel = new LinearInterpolationModel(); break; case "BACKOFF": LanguageModel = new ExampleBackOffModelWithDiscounting(); break; default: ShowUsage(); break; } if (args.Length > 2) { if (args.Length < 5) ShowUsage(); TrainingPercentage = double.Parse(args[2]); ValidatePercentage = double.Parse(args[3]); TestPercentage = double.Parse(args[4]); } else { TrainingPercentage = 80; ValidatePercentage = 10; TestPercentage = 10; } if (args.Length > 6) { if (args.Length < 7) ShowUsage(); if (string.Equals(args[5], "Optimize", StringComparison.OrdinalIgnoreCase)) { Optimize = true; OptimzeValue = int.Parse(args[6]); } } }
static void Main(string[] args) { //ArgumentParser.Parse(args); //if (ArgumentParser.ShowedUsage) return; string brownCorpus = @"C:\Users\azend\Documents\GitHubVisualStudio\UWNLP\Assignment1\LanguageModels.UnitTests\TestData\brown.txt"; string gutenberg = @"C:\Users\azend\Documents\GitHubVisualStudio\UWNLP\Assignment1\LanguageModels.UnitTests\TestData\gutenberg.txt"; string reuters = @"C:\Users\azend\Documents\GitHubVisualStudio\UWNLP\Assignment1\LanguageModels.UnitTests\TestData\reuters.txt"; // Analyze with Linear interpolation Console.WriteLine("First analyzing lambas"); LinearInterpolationModel linearModel = new LinearInterpolationModel(); Analyze(brownCorpus, linearModel); linearModel = new LinearInterpolationModel(); Analyze(gutenberg, linearModel); linearModel = new LinearInterpolationModel(); Analyze(reuters, linearModel); // Analyze with back-off Console.WriteLine("Analyze back-off"); ExampleBackOffModelWithDiscounting backOff = new ExampleBackOffModelWithDiscounting(); Analyze(brownCorpus, backOff); backOff = new ExampleBackOffModelWithDiscounting(); Analyze(gutenberg, backOff); backOff = new ExampleBackOffModelWithDiscounting(); Analyze(reuters, backOff); // Optimze Lambda List<List<string>> splitBrownCorpus = SplitCorpus(brownCorpus, 80, 10, 10); Console.WriteLine("Running optimizer with validation set for Brown.", 1000); linearModel = new LinearInterpolationModel(); List<DoubleCombination> optimumCombinations = Optimizer.GetOptimumCombination(1000, linearModel, splitBrownCorpus[1]); Console.WriteLine("These are the optimum combinations:"); foreach (DoubleCombination combination in optimumCombinations) { Console.WriteLine(combination.ToString()); } // Optimze Lambda List<List<string>> splitGutenbergsCorpus = SplitCorpus(gutenberg, 80, 10, 10); Console.WriteLine("Running optimizer with validation set for Gutenberg.", 1000); linearModel = new LinearInterpolationModel(); optimumCombinations = Optimizer.GetOptimumCombination(1000, linearModel, splitGutenbergsCorpus[1]); Console.WriteLine("These are the optimum combinations:"); foreach (DoubleCombination combination in optimumCombinations) { Console.WriteLine(combination.ToString()); } // Optimze Lambda List<List<string>> splitReutersCorpus = SplitCorpus(reuters, 80, 10, 10); Console.WriteLine("Running optimizer with validation set for Reuters.", 1000); linearModel = new LinearInterpolationModel(); optimumCombinations = Optimizer.GetOptimumCombination(1000, linearModel, splitReutersCorpus[1]); Console.WriteLine("These are the optimum combinations:"); foreach (DoubleCombination combination in optimumCombinations) { Console.WriteLine(combination.ToString()); } // Train and test in differnet corpus Console.WriteLine("Train in Reuters, test in Brown"); linearModel = new LinearInterpolationModel(); Analyze(reuters, brownCorpus, linearModel); IEnumerable<KeyValuePair<NGram, int>> commonTrigrams = linearModel.NGramCounter.NGramCountDictionaries[3].OrderByDescending(kvp => kvp.Value).Take(100); Console.WriteLine("------------ Common 100 Trigrams -------"); foreach (KeyValuePair<NGram, int> kvp in commonTrigrams) { Console.WriteLine("{0}\t\t{1}", kvp.Key, kvp.Value); } Console.WriteLine(); // Train and test in differnet corpus Console.WriteLine("Train in Brown, test in Gutenberg"); linearModel = new LinearInterpolationModel(); Analyze(brownCorpus, gutenberg, linearModel); commonTrigrams = linearModel.NGramCounter.NGramCountDictionaries[3].OrderByDescending(kvp => kvp.Value).Take(100); Console.WriteLine("------------ Common 100 Trigrams -------"); foreach (KeyValuePair<NGram, int> kvp in commonTrigrams) { Console.WriteLine("{0}\t\t{1}", kvp.Key, kvp.Value); } Console.WriteLine(); // Train and test in differnet corpus Console.WriteLine("Train in Gutenbergs, test in Reuters"); linearModel = new LinearInterpolationModel(); Analyze(gutenberg, reuters, linearModel); commonTrigrams = linearModel.NGramCounter.NGramCountDictionaries[3].OrderByDescending(kvp => kvp.Value).Take(100); Console.WriteLine("------------ Common 100 Trigrams -------"); foreach (KeyValuePair<NGram, int> kvp in commonTrigrams) { Console.WriteLine("{0}\t\t{1}", kvp.Key, kvp.Value); } Console.WriteLine(); }