public List <Tree> MakeTreesParallel(int ntrees) { int cores = Environment.ProcessorCount; int treesPerCore = ntrees / cores; // int division == floor int[] coreChunks = Yarr.Repeat(treesPerCore, cores); int diff = ntrees - (treesPerCore * cores); for (int i = 0; i < diff; i++) { coreChunks[i]++; } Task <List <Tree> >[] tasks = new Task <List <Tree> > [cores]; for (int i = 0; i < cores; i++) { int privateI = i; tasks[i] = Task.Factory.StartNew( () => MakeTrees(coreChunks[privateI]) ); } Task.WaitAll(tasks); return(tasks.SelectMany(t => t.Result).ToList()); }
public Score Score(RecordSet data, bool parallel = true) { //NOTE: ignore parallel parameter double[] sScores = Yarr.Repeat(double.NaN, data.NRows); double[] bScores = Yarr.Repeat(double.NaN, data.NRows); bool[] filter = Yarr.InlineNot(data.HasNaN(this.TargetFeatures)); var filteredData = data.Filter(filter); data = null; // unlikely to let anything be GC'ed (lots of references to same obj) but it can't hurt this._Score( filteredData, Yarr.Range(filteredData.NRows).MakeSlice(), sScores, bScores ); return(new Score(sScores, bScores)); }
public Tree( TrainingRecordSet trainPoints, int[] targetFeatures, double[] minCorner, double[] maxCorner, bool[] includeMax = null, double?normalizingConstant = null ) { this.TargetFeatures = targetFeatures; int ndim = this.NDim = targetFeatures.Length; this.TrainPoints = trainPoints; int npoints = this.NTrainPoints = trainPoints.NRows; this.MinCorner = minCorner; this.MaxCorner = maxCorner; if (includeMax == null) { includeMax = Yarr.Repeat(true, ndim); } this.IncludeMax = includeMax; if (!normalizingConstant.HasValue) { normalizingConstant = npoints; } this.NormalizingConstant = normalizingConstant.Value; double normalizedVolume = (CalcVolume() * this.NormalizingConstant); int nS = trainPoints.Labels.CountEqu('s'); int nB = npoints - nS; this.SDensity = nS / normalizedVolume; this.BDensity = nB / normalizedVolume; }
// private Score ScoreParallel(RecordSet data) // { // // } private Score GMean(IEnumerable <Score> scores, int nrows) { double[] sSums = Yarr.Repeat <double>(0.0, nrows); double[] bSums = Yarr.Repeat <double>(0.0, nrows); int[] sCounts = Yarr.Repeat <int>(0, nrows); int[] bCounts = Yarr.Repeat <int>(0, nrows); foreach (Score score in scores) { for (int rowIndex = 0; rowIndex < nrows; rowIndex++) { double sScore = score.SScores[rowIndex]; if (!double.IsNaN(sScore)) { sSums[rowIndex] += Math.Log(sScore); sCounts[rowIndex]++; } double bScore = score.BScores[rowIndex]; if (!double.IsNaN(bScore)) { bSums[rowIndex] += Math.Log(bScore); bCounts[rowIndex]++; } } } double[] sScores = new double[nrows]; double[] bScores = new double[nrows]; for (int rowIndex = 0; rowIndex < nrows; rowIndex++) { sScores[rowIndex] = Math.Exp(sSums[rowIndex] / sCounts[rowIndex]); bScores[rowIndex] = Math.Exp(bSums[rowIndex] / bCounts[rowIndex]); } return(new Score(sScores, bScores)); }
private Tuple <double, double> FindBestSplit(int localDimIndex) { const int NSPLITS = 5; double totalEntropy = TotalEntropy(); int globalDimIndex = this.TargetFeatures[localDimIndex]; int[] globalDimIndices = Yarr.Repeat <int>(globalDimIndex, 1); double[] localMins = this.TrainPoints.CalcLocalMins(globalDimIndices); double[] localMaxs = this.TrainPoints.CalcLocalMaxs(globalDimIndices); double dimMin = localMins[0]; double dimMax = localMaxs[0]; double[] splits = RandomUtils.RandBetween(dimMin, dimMax, NSPLITS); double maxExpectedInfo = 0.0; double bestSplit = double.NaN; for (int i = 0; i < NSPLITS; i++) { double split = splits[i]; int nAbove, sAbove, bAbove; int nBelow, sBelow, bBelow; nAbove = sAbove = bAbove = nBelow = sBelow = bBelow = 0; for (int rowNum = 0; rowNum < NTrainPoints; rowNum++) { double val = TrainPoints.FeatureCols[globalDimIndex][rowNum]; bool isSignal = TrainPoints.Labels[rowNum] == 's'; if (val >= split) { nAbove++; if (isSignal) { sAbove++; } else { bAbove++; } } else { nBelow++; if (isSignal) { sBelow++; } else { bBelow++; } } } double probAbove = ((double)nAbove) / NTrainPoints; double probBelow = 1.0 - probAbove; // == ((double)nBelow) / NTrainPoints double entropyAbove = Entropy(sAbove, bAbove); double entropyBelow = Entropy(sBelow, bBelow); double expectedInfo = totalEntropy - ((probAbove * entropyAbove) + (probBelow * entropyBelow)); if (expectedInfo > maxExpectedInfo) { maxExpectedInfo = expectedInfo; bestSplit = split; } } return(new Tuple <double, double>(maxExpectedInfo, bestSplit)); }
private Score ParallelGmeaner(BlockingCollection <Score> scores, int nrows) { double[] sSums = Yarr.Repeat <double>(0.0, nrows); double[] bSums = Yarr.Repeat <double>(0.0, nrows); int[] sCounts = Yarr.Repeat <int>(0, nrows); int[] bCounts = Yarr.Repeat <int>(0, nrows); int cores = Environment.ProcessorCount; BlockingCollection <double[]> sScoresCollection = new BlockingCollection <double[]>(cores * 10); BlockingCollection <double[]> bScoresCollection = new BlockingCollection <double[]>(cores * 10); Func <int[], double[], BlockingCollection <double[]>, Task> taskMaker = (counts, sums, scoreCollection) => Task.Factory.StartNew( () => { double[] scoreArr; while (!scoreCollection.IsCompleted) { try { scoreArr = scoreCollection.Take(); } catch (InvalidOperationException) { continue; } for (int rowIndex = 0; rowIndex < nrows; rowIndex++) { double val = scoreArr[rowIndex]; if (!double.IsNaN(val)) { sums[rowIndex] += Math.Log(val); counts[rowIndex]++; } } } } ); Task sTask = taskMaker(sCounts, sSums, sScoresCollection); Task bTask = taskMaker(bCounts, bSums, bScoresCollection); Score score; while (!scores.IsCompleted) { try { score = scores.Take(); } catch (InvalidOperationException) { continue; } sScoresCollection.Add(score.SScores); bScoresCollection.Add(score.BScores); } sScoresCollection.CompleteAdding(); bScoresCollection.CompleteAdding(); Task.WaitAll(sTask, bTask); double[] sScores = new double[nrows]; double[] bScores = new double[nrows]; for (int rowIndex = 0; rowIndex < nrows; rowIndex++) { sScores[rowIndex] = Math.Exp(sSums[rowIndex] / sCounts[rowIndex]); bScores[rowIndex] = Math.Exp(bSums[rowIndex] / bCounts[rowIndex]); } return(new Score(sScores, bScores)); }