Exemple #1
0
        public string ToGainSummary(FeaturesToContentMap fmap, Dictionary <int, int> featureToID, int prefix, bool includeZeroGainFeatures, bool normalize, int startingCommentNumber)
        {
            if (_trees.Count == 0)
            {
                return(string.Empty);
            }

            StringBuilder output = new StringBuilder();

            // use only first prefix trees
            if (prefix > _trees.Count || prefix < 0)
            {
                prefix = _trees.Count;
            }
            FeatureToGainMap gainMap = new FeatureToGainMap(_trees.Take(prefix).ToList(), normalize);

            if (includeZeroGainFeatures)
            {
                for (int ifeat = 0; ifeat < fmap.Count; ++ifeat)
                {
                    gainMap[ifeat++] += 0.0;
                }
            }

            var    sortedByGain      = gainMap.OrderByDescending(pair => pair.Value).AsEnumerable();
            var    maxValue          = sortedByGain.First().Value;
            double normalizingFactor = normalize && maxValue != 0 ? Math.Sqrt(maxValue) : 1.0;
            double power             = normalize ? 0.5 : 1.0;

            foreach (var pair in sortedByGain)
            {
                int outputInputId = featureToID.ContainsKey(pair.Key) ? featureToID[pair.Key] : 0;
                output.Append(string.Format("C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId,
                                            fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor));
            }
            return(output.ToString());
        }
        /// <summary>
        /// returns the ensemble in the production TreeEnsemble format
        /// </summary>
        internal string ToTreeEnsembleIni(FeaturesToContentMap fmap,
                                          string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures = true)
        {
            StringBuilder sbEvaluator = new StringBuilder();
            StringBuilder sbInput     = new StringBuilder();
            StringBuilder sb          = new StringBuilder();

            Dictionary <int, int> featureToID = new Dictionary <int, int>();  //Mapping from feature to ini input id
            int numNodes = 0;

            // Append the pretrained input
            if (_firstInputInitializationContent != null)
            {
                numNodes++;
                featureToID[-1] = 0;
                sbInput.AppendFormat("\n[Input:1]\n{0}\n", _firstInputInitializationContent);
            }

            int evaluatorCounter = 0;

            for (int w = 0; w < NumTrees; ++w)
            {
                _trees[w].ToTreeEnsembleFormat(sbEvaluator, sbInput, fmap, ref evaluatorCounter, featureToID);
            }

            numNodes += evaluatorCounter;

            sb.AppendFormat("[TreeEnsemble]\nInputs={0}\nEvaluators={1}\n", featureToID.Count, evaluatorCounter + 1);

            sb.Append(sbInput);
            sb.Append(sbEvaluator);

            // Append the final aggregator
            sb.AppendFormat("\n[Evaluator:{0}]\nEvaluatorType=Aggregator\nNumNodes={1}\nNodes=", evaluatorCounter + 1, numNodes);

            // Nodes
            if (_firstInputInitializationContent != null)
            {
                sb.Append("I:1");
            }
            if (NumTrees > 0)
            {
                if (_firstInputInitializationContent != null)
                {
                    sb.Append("\t");
                }
                sb.Append("E:1");
            }
            for (int w = 1; w < NumTrees; ++w)
            {
                sb.AppendFormat("\tE:{0}", w + 1);
            }

            // weights
            sb.Append("\nWeights=");
            if (_firstInputInitializationContent != null)
            {
                sb.AppendFormat("1");
            }

            if (NumTrees > 0)
            {
                if (_firstInputInitializationContent != null)
                {
                    sb.Append("\t");
                }
                sb.AppendFormat("{0}", _trees[0].Weight);
            }

            for (int w = 1; w < NumTrees; ++w)
            {
                if (w > 0)
                {
                    sb.Append("\t");
                }
                sb.Append(_trees[w].Weight);
            }

            sb.AppendFormat("\nBias={0}", Bias);
            sb.Append("\nType=Linear");

            // Add comments section with training parameters

            int commentsWritten = AppendComments(sb, trainingParams);

            if (appendFeatureGain)
            {
                var gainSummary = ToGainSummary(fmap, featureToID, NumTrees, includeZeroGainFeatures,
                                                normalize: false, startingCommentNumber: commentsWritten);
                sb.Append(gainSummary);
            }

            return(sb.ToString());
        }