public static IDistribution <T> CreateLangaugeModel <T>(IEnumerable <T> tokens, int minOccuranceNumberThreshold, int maxTokensInDistribution) { IModifiableDistribution <T> distribution = new Distribution <T>(new Bag <T>()); distribution.AddEventRange(tokens); // text_cat prunes by count and then by rank. // resulting distribution should not contain threshold-values (text_cat excludes them), // but distribution's PruneByCount leaves threshold in distribution, hence lower threshold by one. // todo: remove correction, update documentation and comments if (minOccuranceNumberThreshold > 0) { distribution.PruneByCount(minOccuranceNumberThreshold - 1); } distribution.PruneByRank(maxTokensInDistribution); return(distribution); }