Esempio n. 1
0
        public string ToGainSummary(FeaturesToContentMap fmap, Dictionary <int, int> featureToID, int prefix, bool includeZeroGainFeatures, bool normalize, int startingCommentNumber)
        {
            if (_trees.Count == 0)
            {
                return(string.Empty);
            }

            StringBuilder output = new StringBuilder();

            // use only first prefix trees
            if (prefix > _trees.Count || prefix < 0)
            {
                prefix = _trees.Count;
            }
            FeatureToGainMap gainMap = new FeatureToGainMap(_trees.Take(prefix).ToList(), normalize);

            if (includeZeroGainFeatures)
            {
                for (int ifeat = 0; ifeat < fmap.Count; ++ifeat)
                {
                    gainMap[ifeat++] += 0.0;
                }
            }

            var    sortedByGain      = gainMap.OrderByDescending(pair => pair.Value).AsEnumerable();
            var    maxValue          = sortedByGain.First().Value;
            double normalizingFactor = normalize && maxValue != 0 ? Math.Sqrt(maxValue) : 1.0;
            double power             = normalize ? 0.5 : 1.0;

            foreach (var pair in sortedByGain)
            {
                int outputInputId = featureToID.ContainsKey(pair.Key) ? featureToID[pair.Key] : 0;
                output.Append(string.Format("C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId,
                                            fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor));
            }
            return(output.ToString());
        }