/// <summary> /// find a optimal stump in sub dataset /// </summary> /// <param name="sortdata">sorted X[][dim]</param> /// <param name="flag">flag of the sub dataset</param> /// <param name="datasetFlags">flags of the whole dataset</param> /// <param name="weight">current weight</param> /// <returns>the stump of min Pm</returns> Stump OptimalOneNode(SortedData sortdata, int flag, int[] datasetFlags, double[] weight) { int N = sortdata.N, MaxDim = sortdata.MaxDim; List <Stump> stumpArray = new List <Stump>(MaxDim); double[] sortedWeight = new double[sortdata.N]; object _lock = new object(); for (int dim = 1; dim <= MaxDim; dim++) //Parallel.For(1, MaxDim + 1, dim => { SortedNode[] sortedDim = sortdata[dim]; for (int n = 0; n < N; n++) { sortedWeight[n] = weight[sortedDim[n].N]; } Stump stump = OptimalOneDim(sortedDim, flag, datasetFlags, sortedWeight, N, dim); stumpArray.Add(stump); }//); stumpArray.Sort(); Stump beststump = null; do { beststump = stumpArray[0]; stumpArray.RemoveAt(0); } while (beststump == null && stumpArray.Count != 0); return(beststump); }
public TrainData CreateTrainData(Problem prob) { SortedData data = new SortedData(); data.GenTrainData(prob); if (data.IsSparse == true) { string mesg = /*this.GetType().ToString()+*/ "StumpLearner do not support sparse data!"; throw new Exception(mesg); } return(data); }
public TrainData CreateTrainData(Problem prob) { SortedData data = new SortedData(); data.GenTrainData(prob); if (data.IsSparse == true) { string mesg = "TreeLearner do not support sparse data!"; throw new Exception(mesg); } return(data); }
public double Train(TrainData data, double[] weight) { SortedData sortdata = data as SortedData; int N = sortdata.N, MaxDim = sortdata.MaxDim; List <Stump> stumpArray = new List <Stump>(MaxDim); double[] sortedWeight = new double[sortdata.N]; for (int dim = 1; dim <= MaxDim; dim++) { SortedNode[] sortedDim = sortdata[dim]; //sort the weight as sortedDim for (int n = 0; n < N; n++) { sortedWeight[n] = weight[sortedDim[n].N]; } //find the best Stump in a dim. Stump stump = OptimalOneDim(sortedDim, sortedWeight, N, dim); stumpArray.Add(stump); } stumpArray.Sort(); _stump = stumpArray[0]; return(stumpArray[0].Pm); }
// private Stump OptimalOneDim2(SortedNode[] sortedDim, int flag, int[] datasetFlags, double[] sortedWeight, int N, int dim) // { // //integral of (yn*wn) for speed improvement // List<double> integralPos = new List<double>(N); // List<double> integralNeg = new List<double>(N); // List<double> valueCollection = new List<double>(N); // for (int n = 0; n < N; ) // { // if (datasetFlags[sortedDim[n].N] != flag) // { // n++; // continue; // } // // int offset = 0; // while (n + offset < N && sortedDim[n].Value == sortedDim[n + offset].Value) // { // if (offset == 0) // { // integralPos.Add(0); // integralNeg.Add(0); // } // int nx = n + offset; // if (datasetFlags[sortedDim[nx].N] == flag) // { // if (sortedDim[n + offset].Y > 0) // integralPos[integralPos.Count - 1] += sortedWeight[nx]; // else // integralNeg[integralNeg.Count - 1] += sortedWeight[nx]; // } // offset++; // } // valueCollection.Add(sortedDim[n].Value); // n += offset; // } // for (int v = 1; v < integralNeg.Count; v++) // { // integralPos[v] += integralPos[v - 1]; // integralNeg[v] += integralNeg[v - 1]; // } // // // calculate error rates of (Count-1) split // double[] errorPos = new double[integralPos.Count - 1]; // double[] errorNeg = new double[integralNeg.Count - 1]; // for (int v = 0; v < integralNeg.Count - 1; v++) // { // errorPos[v] = integralPos[v] + integralNeg[integralNeg.Count - 1] - integralNeg[v]; // errorNeg[v] = integralNeg[v] + integralPos[integralPos.Count - 1] - integralPos[v]; // } // // //find the min error rate as Pm and return the best stump of this dim // int minPosIndex = 0, minNegIndex = 0; // double errorPosMin = 1, errorNegMin = 1; // for (int v = 1; v < errorPos.Length; v++) // { // if (errorPosMin >= errorPos[v]) // { // minPosIndex = v; // errorPosMin = errorPos[v]; // } // if (errorNegMin >= errorNeg[v]) // { // minNegIndex = v; // errorNegMin = errorNeg[v]; // } // } // // double sign = 1, threshold = 0, Pm = 0.49, Pl = 0.49, Pr = 0.49; // if (errorNeg.Length < 1 || errorPos.Length < 1) // { // sign = -1; // threshold = valueCollection[0]; // Pm = 1; // return null; // } // else if (errorNegMin > errorPosMin) // { // sign = 1; // threshold = 0.5 * (valueCollection[minPosIndex] + valueCollection[minPosIndex + 1]); // Pm = errorPosMin; // Pl = integralPos[minPosIndex]; // Pr = integralNeg[integralNeg.Count - 1] - integralNeg[minPosIndex]; // } // else // { // threshold = 0.5 * (valueCollection[minNegIndex] + valueCollection[minNegIndex + 1]); // sign = -1; // Pm = errorNegMin; // Pl = integralNeg[minNegIndex]; // Pr = integralPos[integralPos.Count - 1] - integralPos[minNegIndex]; // } // return new Stump(dim, threshold, sign, Pm, Pl, Pr); // } /// <summary> /// Cut the sub dataset of treeNode into 2 branches. /// each branch sign with leftFlag and rightFlag. /// </summary> /// <param name="treeNode">the treeNode to cut</param> /// <param name="sortdata">sorted X[][dim]</param> /// <param name="leftFlag">flag of the left branch</param> /// <param name="rightFlag">flag of the right branch</param> /// <param name="datasetFlag">flags of the whole dataset</param> /// <returns></returns> private void CutDataSet(TreeNode treeNode, SortedData sortdata, int leftFlag, int rightFlag, ref int[] datasetFlag) { int N = sortdata.N; SortedNode[] sortedDim = sortdata[treeNode.InnerStump.Dim]; double Thr = treeNode.InnerStump.Thr; for (int n = 0; n < N; n++) { //SortedNode node = sortedDim[n]; if (datasetFlag[sortedDim[n].N] == treeNode.Flag) { if (sortedDim[n].Value > Thr) { datasetFlag[sortedDim[n].N] = rightFlag; } else { datasetFlag[sortedDim[n].N] = leftFlag; } } } }
public double Train(TrainData data, double[] weight) { SortedData sortdata = data as SortedData; int N = sortdata.N, MaxDim = sortdata.MaxDim; int[] datasetFlags = new int[sortdata.N]; int flag = 0; Stump stump = OptimalOneNode(sortdata, flag, datasetFlags, weight); if (stump == null) { throw new Exception(Messege.CouldNotClassify); } TreeNode treeNode = new TreeNode(); treeNode.InnerStump = stump; treeNode.Parent = null; treeNode.Flag = flag; treeNode.Delta = 0.5 - treeNode.InnerStump.Pm; List <TreeNode> priorityQueue = new List <TreeNode>(); priorityQueue.Add(treeNode); double Pm = stump.Pm; for (int splitIndex = 0; splitIndex < _maxSplit; splitIndex++) { do { treeNode = priorityQueue[0]; priorityQueue.RemoveAt(0); } while (treeNode == null && priorityQueue.Count != 0); if (treeNode == null) { break; } if (treeNode.Parent == null) { _treeRoot = treeNode; } else { if (treeNode.Flag % 2 != 0) { treeNode.Parent.Left = treeNode; } else { treeNode.Parent.Right = treeNode; } Pm = Pm - treeNode.Delta; } int leftFlag = ++flag; int rightFlag = ++flag; CutDataSet(treeNode, sortdata, leftFlag, rightFlag, ref datasetFlags); TreeNode leftNode = null, rightNode = null; if (treeNode.InnerStump.Pl > double.Epsilon) { stump = OptimalOneNode(sortdata, leftFlag, datasetFlags, weight); if (stump != null) { leftNode = new TreeNode(); leftNode.InnerStump = stump; leftNode.Parent = treeNode; leftNode.Flag = leftFlag; leftNode.Delta = treeNode.InnerStump.Pl - stump.Pm; } } if (treeNode.InnerStump.Pr > double.Epsilon) { stump = OptimalOneNode(sortdata, rightFlag, datasetFlags, weight); if (stump != null) { rightNode = new TreeNode(); rightNode.InnerStump = stump; rightNode.Parent = treeNode; rightNode.Flag = rightFlag; rightNode.Delta = treeNode.InnerStump.Pr - stump.Pm; } } priorityQueue.Add(leftNode); priorityQueue.Add(rightNode); priorityQueue.Sort(); } return(Pm); }