public void FloatSortMedian() { Assert.True(MedianUtils.Median(new float[] { 1f, 2f, 3f, 10f }) == 3f); Assert.True(MedianUtils.Median(new float[] { 2f, 1f, 3f, 40f }) == 3f); Assert.True(MedianUtils.Median(new float[] { 3f, 1f, 2f, 45f }) == 3f); Assert.True(MedianUtils.Median(new float[] { 1f, 3f, 2f, 43f }) == 3f); Assert.True(MedianUtils.Median(new float[] { 2f, 3f, 1f, 44f }) == 3f); Assert.True(MedianUtils.Median(new float[] { 3f, 2f, 1f, 54f }) == 3f); }
public void FloatTripleMedian() { Assert.True(MedianUtils.Median(new float[] { 1f, 2f, 3f }) == 2f); Assert.True(MedianUtils.Median(new float[] { 2f, 1f, 3f }) == 2f); Assert.True(MedianUtils.Median(new float[] { 3f, 1f, 2f }) == 2f); Assert.True(MedianUtils.Median(new float[] { 1f, 3f, 2f }) == 2f); Assert.True(MedianUtils.Median(new float[] { 2f, 3f, 1f }) == 2f); Assert.True(MedianUtils.Median(new float[] { 3f, 2f, 1f }) == 2f); }
public void Vector3Median() { var a = new Vector3(1f, 2f, 3f); var b = new Vector3(2f, 3f, 1f); var c = new Vector3(3f, 1f, 2f); Assert.True(MedianUtils.Median(new Vector3[] { a }) == new Vector3(1f, 2f, 3f)); Assert.True(MedianUtils.Median(new Vector3[] { a, b }) == new Vector3(1.5f, 2.5f, 2f)); Assert.True(MedianUtils.Median(new Vector3[] { a, b, c }) == new Vector3(2f, 2f, 2f)); }
public void ConsiderSplitting() { // make sure there is a splittable dimension where the spread is nonzero int dimension = -1; foreach (int coordinate in this.dimensionsToSplit) { if (this.observedBoundary.Coordinates[coordinate].IsSplittable) { dimension = coordinate; } } if (dimension < 0) { return; } // now split // compute the coordinates of each child #if true List <double> inputs = new List <double>(this.unpropogatedDatapoints.Count()); foreach (IDatapoint <ScoreType> datapoint in this.unpropogatedDatapoints) { inputs.Add(datapoint.InputCoordinates[dimension]); } double splitValue = MedianUtils.EstimateMedian(inputs); if (splitValue == this.currentBoundary.Coordinates[dimension].HighCoordinate || splitValue == this.currentBoundary.Coordinates[dimension].LowCoordinate) { splitValue = (this.currentBoundary.Coordinates[dimension].LowCoordinate + this.currentBoundary.Coordinates[dimension].HighCoordinate) / 2; } #else //double splitValue = (this.currentBoundary.Coordinates[dimension].LowCoordinate + this.currentBoundary.Coordinates[dimension].HighCoordinate) / 2; double splitValue = (this.observedBoundary.Coordinates[dimension].LowCoordinate + this.observedBoundary.Coordinates[dimension].HighCoordinate) / 2; #endif // TODO: consider splitting at the median, since that might run slightly faster // double splitValue = this.splitDimension_inputs.Mean; HyperBox <ScoreType> lowerBoundary = new HyperBox <ScoreType>(this.currentBoundary); lowerBoundary.Coordinates[dimension].HighCoordinate = splitValue; lowerBoundary.Coordinates[dimension].HighInclusive = true; HyperBox <ScoreType> upperBoundary = new HyperBox <ScoreType>(this.currentBoundary); upperBoundary.Coordinates[dimension].LowCoordinate = splitValue; upperBoundary.Coordinates[dimension].LowInclusive = false; // determine the split order for the children List <int> childSplitOrder = new List <int>(this.dimensionsToSplit); childSplitOrder.RemoveAt(0); childSplitOrder.Add(dimension); // fill data into the children this.lowerChild = new SimpleInterpolationBox <ScoreType>(lowerBoundary, childSplitOrder, this.dimensionToSort, this.scoreHandler); this.upperChild = new SimpleInterpolationBox <ScoreType>(upperBoundary, childSplitOrder, this.dimensionToSort, this.scoreHandler); // skip half of the datapoints because it saves a lot of time (the skipping compounds in grandchildren etc) and shouldn't make much difference in our decision of which dim to split #if false int desiredNumPointsPerChild = this.unpropogatedDatapoints.Count(); #else int desiredNumPointsPerChild = this.unpropogatedDatapoints.Count() / 4; #endif List <IDatapoint <ScoreType> > lowerPoints = new List <IDatapoint <ScoreType> >(desiredNumPointsPerChild); List <IDatapoint <ScoreType> > upperPoints = new List <IDatapoint <ScoreType> >(desiredNumPointsPerChild); foreach (IDatapoint <ScoreType> newDatapoint in this.unpropogatedDatapoints) { if (newDatapoint.InputCoordinates[dimension] >= splitValue) { if (upperPoints.Count < desiredNumPointsPerChild) { upperPoints.Add(newDatapoint); } } else { if (lowerPoints.Count < desiredNumPointsPerChild) { lowerPoints.Add(newDatapoint); } } } this.unpropogatedDatapoints = new List <IDatapoint <ScoreType> >(0); this.lowerChild.AddDatapoints(lowerPoints); this.upperChild.AddDatapoints(upperPoints); }
public void FloatMedianAverage() { var doubleArray = new float[] { 4f, 8f }; Assert.True(MedianUtils.Median((IEnumerable <float>)doubleArray) == 6f); }
public void FloatMedianSingleData() { var singleArray = new float[] { 15f }; Assert.True(MedianUtils.Median((IEnumerable <float>)singleArray) == 15f); }
public void FloatMedianEmptyData() { var emptyArray = new float[] { }; Assert.Throws <MedianUtils.EmptyData>(() => MedianUtils.Median((IEnumerable <float>)emptyArray)); }
public void Split(int dimension) { this.splitDimension = dimension; // make sure that datapointsByInput exists, and that it sorts in the correct dimension //this.datapointsByInput = new StatList<Datapoint, Datapoint>(new DatapointComparer(dimension), this); // compute the coordinates of each child #if true List <double> inputs = new List <double>(this.datapoints.Count); foreach (IDatapoint <SummaryType> datapoint in this.datapoints) { inputs.Add(datapoint.InputCoordinates[dimension]); } double splitValue = MedianUtils.EstimateMedian(inputs); // check for the possibility that MedianUtils was unlucky and found something on the edge of observedBoundary if (splitValue >= this.observedBoundary.Coordinates[dimension].HighCoordinate || splitValue <= this.observedBoundary.Coordinates[dimension].LowCoordinate) { splitValue = (this.observedBoundary.Coordinates[dimension].LowCoordinate + this.observedBoundary.Coordinates[dimension].HighCoordinate) / 2; // check for the possibility that rounding error is preventing a split if (splitValue >= this.observedBoundary.Coordinates[dimension].HighCoordinate || splitValue <= this.observedBoundary.Coordinates[dimension].LowCoordinate) { this.lowerChild = this.upperChild = null; return; } } #else double splitValue = (this.observedBoundary.Coordinates[dimension].LowCoordinate + this.observedBoundary.Coordinates[dimension].HighCoordinate) / 2; if (splitValue == this.currentBoundary.Coordinates[dimension].HighCoordinate || splitValue == this.currentBoundary.Coordinates[dimension].LowCoordinate) { splitValue = (this.currentBoundary.Coordinates[dimension].LowCoordinate + this.currentBoundary.Coordinates[dimension].HighCoordinate) / 2; } #endif HyperBox <SummaryType> lowerBoundary = new HyperBox <SummaryType>(this.currentBoundary); lowerBoundary.Coordinates[dimension].HighCoordinate = splitValue; lowerBoundary.Coordinates[dimension].HighInclusive = true; HyperBox <SummaryType> upperBoundary = new HyperBox <SummaryType>(this.currentBoundary); upperBoundary.Coordinates[dimension].LowCoordinate = splitValue; upperBoundary.Coordinates[dimension].LowInclusive = false; //if (this.NumDimensions == 3 && dimension == 0 && splitValue == 900 && this.currentBoundary.Coordinates[0].LowCoordinate == 0) // upperBoundary = upperBoundary; // decide which data goes in which child List <IDatapoint <SummaryType> > lowerPoints = new List <IDatapoint <SummaryType> >(this.datapoints.Count / 2); List <IDatapoint <SummaryType> > upperPoints = new List <IDatapoint <SummaryType> >(this.datapoints.Count / 2); foreach (IDatapoint <SummaryType> datapoint in this.datapoints) { if (lowerBoundary.Contains(datapoint)) { lowerPoints.Add(datapoint); } if (upperBoundary.Contains(datapoint)) { upperPoints.Add(datapoint); } } this.lowerChild = new SmartInterpolationBox <SummaryType>(lowerBoundary, this.scoreHandler, this.depthFromRoot + 1); this.upperChild = new SmartInterpolationBox <SummaryType>(upperBoundary, this.scoreHandler, this.depthFromRoot + 1); // If we're told that we don't yet have to spend a lot of effort choosing a split dimension, then just ask the children to split the next dimension if (this.numPreplannedSplits > 1) { int nextSplitDimension = (this.splitDimension + 1) % this.NumDimensions; this.lowerChild.ForceSplits(nextSplitDimension, this.numPreplannedSplits - 1); this.upperChild.ForceSplits(nextSplitDimension, this.numPreplannedSplits - 1); } this.lowerChild.AddDatapoints(lowerPoints); // this child was constructed all at once using a known size and couldn't have been queried in the meanwhile, // so the child can use all of its points for determining where to split (this won't cause inconsistencies across runs, even as more data gets added) this.lowerChild.PermitSplitting(); this.upperChild.AddDatapoints(upperPoints); // this child was constructed all at once using a known size and couldn't have been queried in the meanwhile, // so the child can use all of its points for determining where to split (this won't cause inconsistencies across runs, even as more data gets added) this.upperChild.PermitSplitting(); #if false this.lowerChild.ApplyPendingPoints(); this.upperChild.ApplyPendingPoints(); #endif #if MIN_SPLIT_COUNTS this.UpdateSplitCounts(); #endif }