public TrigramWithLinearInterpolationLanguageModel(double l1, double l2, double l3, ISmoother smoother) { UnigramLM = new UnigramLanguageModel { Smoother = smoother }; BigramLM = new BigramLanguageModel { Smoother = smoother }; TrigramLM = new TrigramLanguageModel { Smoother = smoother }; Smoother = smoother; L1 = l1; L2 = l2; L3 = l3; }
public static LanguageModelHyperparameters GenerateFromArguments(string args) { args = args.ToLower(); var splittedArgs = args.Split(' ', StringSplitOptions.RemoveEmptyEntries); // Smoothers // Create the collection-level unigram model with no smoothing (max-likelihood) used in some smoothing techniques INGramLanguageModel collectionLevelLanguageModel = new UnigramLanguageModel { Smoother = new MaxLikelihoodSmoother() }; ISmoother smoother = null; switch (splittedArgs[Array.IndexOf(splittedArgs, "-smoothingtechnique") + 1]) { case "ml": smoother = new MaxLikelihoodSmoother(); break; case "addk": smoother = new AddKSmoother { K = Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]) }; break; case "jm": smoother = new JelinekMercerSmoother { CollectionLevelLanguageModel = collectionLevelLanguageModel, L = Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]) }; break; case "dirichlet": smoother = new DirichletSmoother { CollectionLevelLanguageModel = collectionLevelLanguageModel, M = Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]) }; break; case "ad": smoother = new AbsoluteDiscountSmoother { CollectionLevelLanguageModel = collectionLevelLanguageModel, D = Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]) }; break; case "ts": smoother = new TwoStageSmoother { CollectionLevelLanguageModel = collectionLevelLanguageModel, L = Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]), M = Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l2") + 1]) }; break; } // LM var nGramLanguageModels = Corpus.CategoriesMap.ToDictionary(cdp => cdp.Key, cdp => { INGramLanguageModel modelToUse = null; switch (splittedArgs[Array.IndexOf(splittedArgs, "-lm") + 1]) { case "unigram": modelToUse = new UnigramLanguageModel { Smoother = smoother }; break; case "bigram": modelToUse = new BigramLanguageModel { Smoother = smoother }; break; case "trigram": modelToUse = new TrigramLanguageModel { Smoother = smoother }; break; case "trigramwithlinearinterpolation": modelToUse = new TrigramWithLinearInterpolationLanguageModel( Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]), Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l2") + 1]), Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l3") + 1]), smoother); break; } return(modelToUse); }); return(new LanguageModelHyperparameters { CategoryNGramLanguageModelsMap = nGramLanguageModels, UnkRatio = Array.IndexOf(splittedArgs, "-unkratio") >= 0 ? Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-unkratio") + 1]) : 0.1, IgnoreCase = Array.IndexOf(splittedArgs, "-ignorecase") >= 0, L1 = Array.IndexOf(splittedArgs, "-l1") >= 0 ? Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l1") + 1]) : 0.0, L2 = Array.IndexOf(splittedArgs, "-l2") >= 0 ? Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l2") + 1]) : 0.0, L3 = Array.IndexOf(splittedArgs, "-l3") >= 0 ? Double.Parse(splittedArgs[Array.IndexOf(splittedArgs, "-l3") + 1]) : 0.0, CollectionLevelLanguageModel = collectionLevelLanguageModel, }); }