// insert a new parition split (find insertion point and start at first element of the queue) // elements are removed from the queue at the last position // O(n), splits could be organized as a heap to improve runtime (see alglib tsort) private void InsertSortedQueue(PartitionSplits split) { // find insertion position int i = 0; while (i < queue.Count && queue[i].SplittingImprovement < split.SplittingImprovement) { i++; } queue.Insert(i, split); }
// calculates the optimal split for the partition [startIdx .. endIdx] (inclusive) // which is represented by the leaf node with the specified nodeIdx private void EnqueuePartitionSplit(int nodeIdx, int startIdx, int endIdx) { double threshold, improvement; string bestVariableName; // only enqueue a new split if there are at least 2 rows left and a split is possible if (startIdx < endIdx && FindBestVariableAndThreshold(startIdx, endIdx, out threshold, out bestVariableName, out improvement)) { var split = new PartitionSplits() { ParentNodeIdx = nodeIdx, StartIdx = startIdx, EndIndex = endIdx, SplittingThreshold = threshold, SplittingVariable = bestVariableName }; InsertSortedQueue(split); } }