Пример #1
0
        /// <summary>
        /// find a optimal stump in sub dataset
        /// </summary>
        /// <param name="sortdata">sorted X[][dim]</param>
        /// <param name="flag">flag of the sub dataset</param>
        /// <param name="datasetFlags">flags of the whole dataset</param>
        /// <param name="weight">current weight</param>
        /// <returns>the stump of min Pm</returns>
        Stump OptimalOneNode(SortedData sortdata, int flag, int[] datasetFlags, double[] weight)
        {
            int          N = sortdata.N, MaxDim = sortdata.MaxDim;
            List <Stump> stumpArray = new List <Stump>(MaxDim);

            double[] sortedWeight = new double[sortdata.N];
            object   _lock        = new object();

            for (int dim = 1; dim <= MaxDim; dim++)
            //Parallel.For(1, MaxDim + 1, dim =>
            {
                SortedNode[] sortedDim = sortdata[dim];
                for (int n = 0; n < N; n++)
                {
                    sortedWeight[n] = weight[sortedDim[n].N];
                }
                Stump stump = OptimalOneDim(sortedDim, flag, datasetFlags, sortedWeight, N, dim);
                stumpArray.Add(stump);
            }//);
            stumpArray.Sort();
            Stump beststump = null;

            do
            {
                beststump = stumpArray[0];
                stumpArray.RemoveAt(0);
            } while (beststump == null && stumpArray.Count != 0);
            return(beststump);
        }
Пример #2
0
        public TrainData CreateTrainData(Problem prob)
        {
            SortedData data = new SortedData();

            data.GenTrainData(prob);
            if (data.IsSparse == true)
            {
                string mesg = /*this.GetType().ToString()+*/ "StumpLearner do not support sparse data!";
                throw new Exception(mesg);
            }
            return(data);
        }
Пример #3
0
        public TrainData CreateTrainData(Problem prob)
        {
            SortedData data = new SortedData();

            data.GenTrainData(prob);
            if (data.IsSparse == true)
            {
                string mesg = "TreeLearner do not support sparse data!";
                throw new Exception(mesg);
            }
            return(data);
        }
Пример #4
0
        public double Train(TrainData data, double[] weight)
        {
            SortedData   sortdata = data as SortedData;
            int          N = sortdata.N, MaxDim = sortdata.MaxDim;
            List <Stump> stumpArray = new List <Stump>(MaxDim);

            double[] sortedWeight = new double[sortdata.N];
            for (int dim = 1; dim <= MaxDim; dim++)
            {
                SortedNode[] sortedDim = sortdata[dim];
                //sort the weight as sortedDim
                for (int n = 0; n < N; n++)
                {
                    sortedWeight[n] = weight[sortedDim[n].N];
                }
                //find the best Stump in a dim.
                Stump stump = OptimalOneDim(sortedDim, sortedWeight, N, dim);
                stumpArray.Add(stump);
            }
            stumpArray.Sort();
            _stump = stumpArray[0];
            return(stumpArray[0].Pm);
        }
Пример #5
0
//         private Stump OptimalOneDim2(SortedNode[] sortedDim,  int flag, int[] datasetFlags, double[] sortedWeight, int N, int dim)
//         {
//             //integral of (yn*wn)  for speed improvement
//             List<double> integralPos = new List<double>(N);
//             List<double> integralNeg = new List<double>(N);
//             List<double> valueCollection = new List<double>(N);
//             for (int n = 0; n < N; )
//             {
//                 if (datasetFlags[sortedDim[n].N] != flag)
//                 {
//                     n++;
//                     continue;
//                 }
//
//                 int offset = 0;
//                 while (n + offset < N && sortedDim[n].Value == sortedDim[n + offset].Value)
//                 {
//                     if (offset == 0)
//                     {
//                         integralPos.Add(0);
//                         integralNeg.Add(0);
//                     }
//                     int nx = n + offset;
//                     if (datasetFlags[sortedDim[nx].N] == flag)
//                     {
//                         if (sortedDim[n + offset].Y > 0)
//                             integralPos[integralPos.Count - 1] += sortedWeight[nx];
//                         else
//                             integralNeg[integralNeg.Count - 1] += sortedWeight[nx];
//                     }
//                     offset++;
//                 }
//                 valueCollection.Add(sortedDim[n].Value);
//                 n += offset;
//             }
//             for (int v = 1; v < integralNeg.Count; v++)
//             {
//                 integralPos[v] += integralPos[v - 1];
//                 integralNeg[v] += integralNeg[v - 1];
//             }
//
//             // calculate error rates of (Count-1) split
//             double[] errorPos = new double[integralPos.Count - 1];
//             double[] errorNeg = new double[integralNeg.Count - 1];
//             for (int v = 0; v < integralNeg.Count - 1; v++)
//             {
//                 errorPos[v] = integralPos[v] + integralNeg[integralNeg.Count - 1] - integralNeg[v];
//                 errorNeg[v] = integralNeg[v] + integralPos[integralPos.Count - 1] - integralPos[v];
//             }
//
//             //find the min error rate as Pm and return the best stump of this dim
//             int minPosIndex = 0, minNegIndex = 0;
//             double errorPosMin = 1, errorNegMin = 1;
//             for (int v = 1; v < errorPos.Length; v++)
//             {
//                 if (errorPosMin >= errorPos[v])
//                 {
//                     minPosIndex = v;
//                     errorPosMin = errorPos[v];
//                 }
//                 if (errorNegMin >= errorNeg[v])
//                 {
//                     minNegIndex = v;
//                     errorNegMin = errorNeg[v];
//                 }
//             }
//
//             double sign = 1, threshold = 0, Pm = 0.49, Pl = 0.49, Pr = 0.49;
//             if (errorNeg.Length < 1 || errorPos.Length < 1)
//             {
//                 sign = -1;
//                 threshold = valueCollection[0];
//                 Pm = 1;
//                 return null;
//             }
//             else if (errorNegMin > errorPosMin)
//             {
//                 sign = 1;
//                 threshold = 0.5 * (valueCollection[minPosIndex] + valueCollection[minPosIndex + 1]);
//                 Pm = errorPosMin;
//                 Pl = integralPos[minPosIndex];
//                 Pr = integralNeg[integralNeg.Count - 1] - integralNeg[minPosIndex];
//             }
//             else
//             {
//                 threshold = 0.5 * (valueCollection[minNegIndex] + valueCollection[minNegIndex + 1]);
//                 sign = -1;
//                 Pm = errorNegMin;
//                 Pl = integralNeg[minNegIndex];
//                 Pr = integralPos[integralPos.Count - 1] - integralPos[minNegIndex];
//             }
//             return new Stump(dim, threshold, sign, Pm, Pl, Pr);
//         }
        /// <summary>
        /// Cut the sub dataset of treeNode into 2 branches.
        /// each branch sign with leftFlag and rightFlag.
        /// </summary>
        /// <param name="treeNode">the treeNode to cut</param>
        /// <param name="sortdata">sorted X[][dim]</param>
        /// <param name="leftFlag">flag of the left branch</param>
        /// <param name="rightFlag">flag of the right branch</param>
        /// <param name="datasetFlag">flags of the whole dataset</param>
        /// <returns></returns>
        private void CutDataSet(TreeNode treeNode, SortedData sortdata, int leftFlag, int rightFlag, ref int[] datasetFlag)
        {
            int N = sortdata.N;

            SortedNode[] sortedDim = sortdata[treeNode.InnerStump.Dim];
            double       Thr       = treeNode.InnerStump.Thr;

            for (int n = 0; n < N; n++)
            {
                //SortedNode node = sortedDim[n];
                if (datasetFlag[sortedDim[n].N] == treeNode.Flag)
                {
                    if (sortedDim[n].Value > Thr)
                    {
                        datasetFlag[sortedDim[n].N] = rightFlag;
                    }
                    else
                    {
                        datasetFlag[sortedDim[n].N] = leftFlag;
                    }
                }
            }
        }
Пример #6
0
        public double Train(TrainData data, double[] weight)
        {
            SortedData sortdata = data as SortedData;
            int        N = sortdata.N, MaxDim = sortdata.MaxDim;

            int[] datasetFlags = new int[sortdata.N];
            int   flag         = 0;

            Stump stump = OptimalOneNode(sortdata, flag, datasetFlags, weight);

            if (stump == null)
            {
                throw new Exception(Messege.CouldNotClassify);
            }

            TreeNode treeNode = new TreeNode();

            treeNode.InnerStump = stump;
            treeNode.Parent     = null;
            treeNode.Flag       = flag;
            treeNode.Delta      = 0.5 - treeNode.InnerStump.Pm;

            List <TreeNode> priorityQueue = new List <TreeNode>();

            priorityQueue.Add(treeNode);
            double Pm = stump.Pm;

            for (int splitIndex = 0; splitIndex < _maxSplit; splitIndex++)
            {
                do
                {
                    treeNode = priorityQueue[0];
                    priorityQueue.RemoveAt(0);
                } while (treeNode == null && priorityQueue.Count != 0);

                if (treeNode == null)
                {
                    break;
                }

                if (treeNode.Parent == null)
                {
                    _treeRoot = treeNode;
                }
                else
                {
                    if (treeNode.Flag % 2 != 0)
                    {
                        treeNode.Parent.Left = treeNode;
                    }
                    else
                    {
                        treeNode.Parent.Right = treeNode;
                    }
                    Pm = Pm - treeNode.Delta;
                }

                int leftFlag  = ++flag;
                int rightFlag = ++flag;
                CutDataSet(treeNode, sortdata, leftFlag, rightFlag, ref datasetFlags);

                TreeNode leftNode = null, rightNode = null;
                if (treeNode.InnerStump.Pl > double.Epsilon)
                {
                    stump = OptimalOneNode(sortdata, leftFlag, datasetFlags, weight);
                    if (stump != null)
                    {
                        leftNode            = new TreeNode();
                        leftNode.InnerStump = stump;
                        leftNode.Parent     = treeNode;
                        leftNode.Flag       = leftFlag;
                        leftNode.Delta      = treeNode.InnerStump.Pl - stump.Pm;
                    }
                }
                if (treeNode.InnerStump.Pr > double.Epsilon)
                {
                    stump = OptimalOneNode(sortdata, rightFlag, datasetFlags, weight);
                    if (stump != null)
                    {
                        rightNode            = new TreeNode();
                        rightNode.InnerStump = stump;
                        rightNode.Parent     = treeNode;
                        rightNode.Flag       = rightFlag;
                        rightNode.Delta      = treeNode.InnerStump.Pr - stump.Pm;
                    }
                }

                priorityQueue.Add(leftNode);
                priorityQueue.Add(rightNode);
                priorityQueue.Sort();
            }
            return(Pm);
        }