예제 #1
0
        public static IDistribution <T> CreateLangaugeModel <T>(IEnumerable <T> tokens, int minOccuranceNumberThreshold, int maxTokensInDistribution)
        {
            IModifiableDistribution <T> distribution = new Distribution <T>(new Bag <T>());

            distribution.AddEventRange(tokens);
            // text_cat prunes by count and then by rank.
            // resulting distribution should not contain threshold-values (text_cat excludes them),
            // but distribution's PruneByCount leaves threshold in distribution, hence lower threshold by one.
            // todo: remove correction, update documentation and comments
            if (minOccuranceNumberThreshold > 0)
            {
                distribution.PruneByCount(minOccuranceNumberThreshold - 1);
            }
            distribution.PruneByRank(maxTokensInDistribution);
            return(distribution);
        }