public string ToGainSummary(FeaturesToContentMap fmap, Dictionary <int, int> featureToID, int prefix, bool includeZeroGainFeatures, bool normalize, int startingCommentNumber) { if (_trees.Count == 0) { return(string.Empty); } StringBuilder output = new StringBuilder(); // use only first prefix trees if (prefix > _trees.Count || prefix < 0) { prefix = _trees.Count; } FeatureToGainMap gainMap = new FeatureToGainMap(_trees.Take(prefix).ToList(), normalize); if (includeZeroGainFeatures) { for (int ifeat = 0; ifeat < fmap.Count; ++ifeat) { gainMap[ifeat++] += 0.0; } } var sortedByGain = gainMap.OrderByDescending(pair => pair.Value).AsEnumerable(); var maxValue = sortedByGain.First().Value; double normalizingFactor = normalize && maxValue != 0 ? Math.Sqrt(maxValue) : 1.0; double power = normalize ? 0.5 : 1.0; foreach (var pair in sortedByGain) { int outputInputId = featureToID.ContainsKey(pair.Key) ? featureToID[pair.Key] : 0; output.Append(string.Format("C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId, fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor)); } return(output.ToString()); }
/// <summary> /// returns the ensemble in the production TreeEnsemble format /// </summary> internal string ToTreeEnsembleIni(FeaturesToContentMap fmap, string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures = true) { StringBuilder sbEvaluator = new StringBuilder(); StringBuilder sbInput = new StringBuilder(); StringBuilder sb = new StringBuilder(); Dictionary <int, int> featureToID = new Dictionary <int, int>(); //Mapping from feature to ini input id int numNodes = 0; // Append the pretrained input if (_firstInputInitializationContent != null) { numNodes++; featureToID[-1] = 0; sbInput.AppendFormat("\n[Input:1]\n{0}\n", _firstInputInitializationContent); } int evaluatorCounter = 0; for (int w = 0; w < NumTrees; ++w) { _trees[w].ToTreeEnsembleFormat(sbEvaluator, sbInput, fmap, ref evaluatorCounter, featureToID); } numNodes += evaluatorCounter; sb.AppendFormat("[TreeEnsemble]\nInputs={0}\nEvaluators={1}\n", featureToID.Count, evaluatorCounter + 1); sb.Append(sbInput); sb.Append(sbEvaluator); // Append the final aggregator sb.AppendFormat("\n[Evaluator:{0}]\nEvaluatorType=Aggregator\nNumNodes={1}\nNodes=", evaluatorCounter + 1, numNodes); // Nodes if (_firstInputInitializationContent != null) { sb.Append("I:1"); } if (NumTrees > 0) { if (_firstInputInitializationContent != null) { sb.Append("\t"); } sb.Append("E:1"); } for (int w = 1; w < NumTrees; ++w) { sb.AppendFormat("\tE:{0}", w + 1); } // weights sb.Append("\nWeights="); if (_firstInputInitializationContent != null) { sb.AppendFormat("1"); } if (NumTrees > 0) { if (_firstInputInitializationContent != null) { sb.Append("\t"); } sb.AppendFormat("{0}", _trees[0].Weight); } for (int w = 1; w < NumTrees; ++w) { if (w > 0) { sb.Append("\t"); } sb.Append(_trees[w].Weight); } sb.AppendFormat("\nBias={0}", Bias); sb.Append("\nType=Linear"); // Add comments section with training parameters int commentsWritten = AppendComments(sb, trainingParams); if (appendFeatureGain) { var gainSummary = ToGainSummary(fmap, featureToID, NumTrees, includeZeroGainFeatures, normalize: false, startingCommentNumber: commentsWritten); sb.Append(gainSummary); } return(sb.ToString()); }