Exemple #1
0
        public List <Tree> MakeTreesParallel(int ntrees)
        {
            int cores = Environment.ProcessorCount;

            int treesPerCore = ntrees / cores;             // int division == floor

            int[] coreChunks = Yarr.Repeat(treesPerCore, cores);

            int diff = ntrees - (treesPerCore * cores);

            for (int i = 0; i < diff; i++)
            {
                coreChunks[i]++;
            }

            Task <List <Tree> >[] tasks = new Task <List <Tree> > [cores];
            for (int i = 0; i < cores; i++)
            {
                int privateI = i;
                tasks[i] = Task.Factory.StartNew(
                    () => MakeTrees(coreChunks[privateI])
                    );
            }
            Task.WaitAll(tasks);

            return(tasks.SelectMany(t => t.Result).ToList());
        }
Exemple #2
0
        public Score Score(RecordSet data, bool parallel = true)
        {
            //NOTE: ignore parallel parameter

            double[] sScores = Yarr.Repeat(double.NaN, data.NRows);
            double[] bScores = Yarr.Repeat(double.NaN, data.NRows);

            bool[] filter       = Yarr.InlineNot(data.HasNaN(this.TargetFeatures));
            var    filteredData = data.Filter(filter);

            data = null;             // unlikely to let anything be GC'ed (lots of references to same obj) but it can't hurt

            this._Score(
                filteredData,
                Yarr.Range(filteredData.NRows).MakeSlice(),
                sScores,
                bScores
                );
            return(new Score(sScores, bScores));
        }
Exemple #3
0
        public Tree(
            TrainingRecordSet trainPoints,
            int[] targetFeatures,
            double[] minCorner,
            double[] maxCorner,
            bool[] includeMax          = null,
            double?normalizingConstant = null
            )
        {
            this.TargetFeatures = targetFeatures;
            int ndim = this.NDim = targetFeatures.Length;

            this.TrainPoints = trainPoints;
            int npoints = this.NTrainPoints = trainPoints.NRows;

            this.MinCorner = minCorner;
            this.MaxCorner = maxCorner;

            if (includeMax == null)
            {
                includeMax = Yarr.Repeat(true, ndim);
            }
            this.IncludeMax = includeMax;

            if (!normalizingConstant.HasValue)
            {
                normalizingConstant = npoints;
            }
            this.NormalizingConstant = normalizingConstant.Value;

            double normalizedVolume = (CalcVolume() * this.NormalizingConstant);
            int    nS = trainPoints.Labels.CountEqu('s');
            int    nB = npoints - nS;

            this.SDensity = nS / normalizedVolume;
            this.BDensity = nB / normalizedVolume;
        }
Exemple #4
0
//		private Score ScoreParallel(RecordSet data)
//		{
//
//		}

        private Score GMean(IEnumerable <Score> scores, int nrows)
        {
            double[] sSums   = Yarr.Repeat <double>(0.0, nrows);
            double[] bSums   = Yarr.Repeat <double>(0.0, nrows);
            int[]    sCounts = Yarr.Repeat <int>(0, nrows);
            int[]    bCounts = Yarr.Repeat <int>(0, nrows);

            foreach (Score score in scores)
            {
                for (int rowIndex = 0; rowIndex < nrows; rowIndex++)
                {
                    double sScore = score.SScores[rowIndex];
                    if (!double.IsNaN(sScore))
                    {
                        sSums[rowIndex] += Math.Log(sScore);
                        sCounts[rowIndex]++;
                    }

                    double bScore = score.BScores[rowIndex];
                    if (!double.IsNaN(bScore))
                    {
                        bSums[rowIndex] += Math.Log(bScore);
                        bCounts[rowIndex]++;
                    }
                }
            }

            double[] sScores = new double[nrows];
            double[] bScores = new double[nrows];
            for (int rowIndex = 0; rowIndex < nrows; rowIndex++)
            {
                sScores[rowIndex] = Math.Exp(sSums[rowIndex] / sCounts[rowIndex]);
                bScores[rowIndex] = Math.Exp(bSums[rowIndex] / bCounts[rowIndex]);
            }

            return(new Score(sScores, bScores));
        }
Exemple #5
0
        private Tuple <double, double> FindBestSplit(int localDimIndex)
        {
            const int NSPLITS      = 5;
            double    totalEntropy = TotalEntropy();

            int globalDimIndex = this.TargetFeatures[localDimIndex];

            int[] globalDimIndices = Yarr.Repeat <int>(globalDimIndex, 1);

            double[] localMins = this.TrainPoints.CalcLocalMins(globalDimIndices);
            double[] localMaxs = this.TrainPoints.CalcLocalMaxs(globalDimIndices);

            double dimMin = localMins[0];
            double dimMax = localMaxs[0];

            double[] splits = RandomUtils.RandBetween(dimMin, dimMax, NSPLITS);

            double maxExpectedInfo = 0.0;
            double bestSplit       = double.NaN;

            for (int i = 0; i < NSPLITS; i++)
            {
                double split = splits[i];

                int nAbove, sAbove, bAbove;
                int nBelow, sBelow, bBelow;
                nAbove = sAbove = bAbove = nBelow = sBelow = bBelow = 0;
                for (int rowNum = 0; rowNum < NTrainPoints; rowNum++)
                {
                    double val      = TrainPoints.FeatureCols[globalDimIndex][rowNum];
                    bool   isSignal = TrainPoints.Labels[rowNum] == 's';

                    if (val >= split)
                    {
                        nAbove++;
                        if (isSignal)
                        {
                            sAbove++;
                        }
                        else
                        {
                            bAbove++;
                        }
                    }
                    else
                    {
                        nBelow++;
                        if (isSignal)
                        {
                            sBelow++;
                        }
                        else
                        {
                            bBelow++;
                        }
                    }
                }

                double probAbove = ((double)nAbove) / NTrainPoints;
                double probBelow = 1.0 - probAbove; // == ((double)nBelow) / NTrainPoints

                double entropyAbove = Entropy(sAbove, bAbove);
                double entropyBelow = Entropy(sBelow, bBelow);

                double expectedInfo = totalEntropy - ((probAbove * entropyAbove) + (probBelow * entropyBelow));

                if (expectedInfo > maxExpectedInfo)
                {
                    maxExpectedInfo = expectedInfo;
                    bestSplit       = split;
                }
            }

            return(new Tuple <double, double>(maxExpectedInfo, bestSplit));
        }
Exemple #6
0
        private Score ParallelGmeaner(BlockingCollection <Score> scores, int nrows)
        {
            double[] sSums   = Yarr.Repeat <double>(0.0, nrows);
            double[] bSums   = Yarr.Repeat <double>(0.0, nrows);
            int[]    sCounts = Yarr.Repeat <int>(0, nrows);
            int[]    bCounts = Yarr.Repeat <int>(0, nrows);

            int cores = Environment.ProcessorCount;
            BlockingCollection <double[]> sScoresCollection = new BlockingCollection <double[]>(cores * 10);
            BlockingCollection <double[]> bScoresCollection = new BlockingCollection <double[]>(cores * 10);

            Func <int[], double[], BlockingCollection <double[]>, Task> taskMaker =
                (counts, sums, scoreCollection) => Task.Factory.StartNew(
                    () =>
            {
                double[] scoreArr;
                while (!scoreCollection.IsCompleted)
                {
                    try
                    {
                        scoreArr = scoreCollection.Take();
                    }
                    catch (InvalidOperationException)
                    {
                        continue;
                    }

                    for (int rowIndex = 0; rowIndex < nrows; rowIndex++)
                    {
                        double val = scoreArr[rowIndex];
                        if (!double.IsNaN(val))
                        {
                            sums[rowIndex] += Math.Log(val);
                            counts[rowIndex]++;
                        }
                    }
                }
            }
                    );

            Task sTask = taskMaker(sCounts, sSums, sScoresCollection);
            Task bTask = taskMaker(bCounts, bSums, bScoresCollection);

            Score score;

            while (!scores.IsCompleted)
            {
                try
                {
                    score = scores.Take();
                }
                catch (InvalidOperationException)
                {
                    continue;
                }

                sScoresCollection.Add(score.SScores);
                bScoresCollection.Add(score.BScores);
            }

            sScoresCollection.CompleteAdding();
            bScoresCollection.CompleteAdding();
            Task.WaitAll(sTask, bTask);

            double[] sScores = new double[nrows];
            double[] bScores = new double[nrows];
            for (int rowIndex = 0; rowIndex < nrows; rowIndex++)
            {
                sScores[rowIndex] = Math.Exp(sSums[rowIndex] / sCounts[rowIndex]);
                bScores[rowIndex] = Math.Exp(bSums[rowIndex] / bCounts[rowIndex]);
            }

            return(new Score(sScores, bScores));
        }