private double[] GetBestPartionAtSingleDim(int dim, GeoWave node) { var errorNPoint = new double[2];//error index int bestId = -1; if (Form1.MainGrid[dim].Count == 1)//empty feature { errorNPoint[VALUE] = double.MaxValue; errorNPoint[SPLIT_INDEX] = -1; return(errorNPoint); } //sort ids (for labels) acording to position at Form1.MainGrid[dimIndex][index] at 'dim' dimention var sortedIds = new List <int>(node.pointsIdArray); sortedIds.Sort((c1, c2) => _training[c1][dim].CompareTo(_training[c2][dim])); if (Math.Abs(_training[sortedIds[0]][dim] - _training[sortedIds[sortedIds.Count - 1]][dim]) < double.Epsilon)//all values are the same { errorNPoint[VALUE] = double.MaxValue; errorNPoint[SPLIT_INDEX] = -1; return(errorNPoint); } var leftDicClone = GiniHelper.CloneLabelAmountDic(node.MgStuff.dicLabelCount); //start with parent data at left var rightDic = GiniHelper.CreateEmptyLabelAmountDic(_labelsDim); var startLeftLabelsAmount = node.pointsIdArray.Count(); int bestSplitId = -1; var parentSize = startLeftLabelsAmount; double giniLowest = node.MgStuff.GiniAvg; for (var i = 0; i < sortedIds.Count() - 1; i++) { var sortedId = sortedIds[startLeftLabelsAmount - i - 1]; var nextSortedId = sortedIds[startLeftLabelsAmount - i - 2]; double[] movedLabel = _labels[sortedId]; var leftSize = parentSize - i - 1; var rightSize = i + 1; var leftGini = GiniHelper.GetGiniByAction(leftDicClone, movedLabel, GiniHelper.ActionType.Remove, leftSize); var rightGini = GiniHelper.GetGiniByAction(rightDic, movedLabel, GiniHelper.ActionType.Insert, rightSize); double tempGiniPartion = ((double)leftSize / parentSize) * leftGini + ((double)rightSize / parentSize) * rightGini; //in case some points has the same values - we calc the avarage (relevant for splitting) only after all the points (with same values) had moved to the right //we don't alow "improving" the same split with two points with the same position (sort is not unique) double nowMovedValue = _training[sortedId][dim]; double nextMovedValue = _training[nextSortedId][dim]; if (tempGiniPartion < giniLowest && nowMovedValue != nextMovedValue && (i + 1) >= _minWaveSize && (i + _minWaveSize) < sortedIds.Count) { giniLowest = tempGiniPartion; bestSplitId = sortedIds[sortedIds.Count() - i - 1]; } } if (bestSplitId == -1) { errorNPoint[VALUE] = double.MaxValue; errorNPoint[SPLIT_INDEX] = double.MaxValue; return(errorNPoint); } errorNPoint[VALUE] = Math.Max(giniLowest, 0); errorNPoint[SPLIT_INDEX] = _trainingGridIndex[bestSplitId][dim]; return(errorNPoint); }
private double CalculateGini(Dictionary <double, double>[] dicLabelCount, ref GeoWave node, GeoWave parent = null) { var vecGini = new double[_labelsDim]; double giniNorm = 0; for (int dim = 0; dim < _labelsDim; dim++) { foreach (var dimDic in dicLabelCount[dim]) { //label probability double labelProb = dimDic.Value / node.pointsIdArray.Count(); vecGini[dim] += labelProb * (1 - labelProb); } //save <label,amount> dictionary array node.MgStuff.dicLabelCount[dim] = new Dictionary <double, double>(dicLabelCount[dim]); } //save gini average value node.MgStuff.GiniAvg = vecGini.Sum() / _labelsDim; //save gini vector Array.Copy(vecGini, node.MgStuff.GiniVector, _labelsDim); //save gini norm double parentGiniAvg = (parent != null) ? parent.MgStuff.GiniAvg : 0; giniNorm = (node.MgStuff.GiniAvg - parentGiniAvg) * (node.MgStuff.GiniAvg - parentGiniAvg) * node.pointsIdArray.Count(); node.MgStuff.GiniNorm = giniNorm; return(giniNorm); }
public static List <GeoWave> getConstWaveletsFromFile(string filename, recordConfig rc) { if (!Form1.UseS3 && !File.Exists(filename))//this func was not debugged after modification { MessageBox.Show(@"the file " + Path.GetFileName(filename) + @" doesnt exist in " + Path.GetFullPath(filename)); return(null); } StreamReader sr; /* if (Form1.UseS3) * { * string dir_name = Path.GetDirectoryName(filename); * string file_name = Path.GetFileName(filename); * * S3DirectoryInfo s3dir = new S3DirectoryInfo(Form1.S3client, Form1.bucketName, dir_name); * S3FileInfo artFile = s3dir.GetFile(file_name); * sr = artFile.OpenText(); * } * else*/ sr = new StreamReader(File.OpenRead(filename)); string[] values = { "" }; string line; string DimensionReductionMatrix = ""; int numOfWavlets = -1; int dimension = -1; int labelDimension = -1; double approxOrder = -1; while (!sr.EndOfStream && values[0] != "StartReading") { line = sr.ReadLine(); values = line.Split(Form1.seperator, StringSplitOptions.RemoveEmptyEntries); if (values[0] == "DimensionReductionMatrix") { DimensionReductionMatrix = values[1]; } else if (values[0] == "numOfWavlets") { numOfWavlets = int.Parse(values[1]); } else if (values[0] == "approxOrder") { approxOrder = int.Parse(values[1]); } else if (values[0] == "dimension") { dimension = int.Parse(values[1]); } else if (values[0] == "labelDimension") { labelDimension = int.Parse(values[1]); } else if (values[0] == "StartReading") { ; } else { MessageBox.Show(@"the file " + Path.GetFileName(filename) + @" already exist in " + Path.GetFullPath(filename) + @" might have bad input !"); } } //read values List <GeoWave> gwArr = new List <GeoWave>(); while (!sr.EndOfStream) { GeoWave gw = new GeoWave(dimension, labelDimension, rc); line = sr.ReadLine(); if (line != null) { values = line.Split(Form1.seperator, StringSplitOptions.RemoveEmptyEntries); } gw.ID = int.Parse(values[0]); gw.child0 = int.Parse(values[1]); gw.child1 = int.Parse(values[2]); int counter = 0; for (int j = 0; j < dimension; j++) { gw.boubdingBox[0][j] = int.Parse(values[3 + 4 * j]);//the next are the actual values and not the indeces int the maingrid - so we skip 4 elementsat a time gw.boubdingBox[1][j] = int.Parse(values[4 + 4 * j]); counter = 4 + 2 * 4; } gw.level = int.Parse(values[counter + 1]); counter = counter + 2; for (int j = 0; j < labelDimension; j++) { gw.MeanValue[j] = double.Parse(values[counter + j]); counter++; } gw.norm = double.Parse(values[counter]); gw.parentID = int.Parse(values[counter + 1]); gwArr.Add(gw); } sr.Close(); return(gwArr); }
public static void constructNodePcaByOriginalData(double[][] nodeOriginalData, GeoWave node) { node.localPca = new ModifedPca(nodeOriginalData, AnalysisMethod.Standardize); node.localPca.Compute(); }
private double[] getGiniPartitionLargeDb(int dimIndex, GeoWave geoWave) { double[] error_n_point = new double[2]; //gain index if (Form1.MainGrid[dimIndex].Count == 1) //empty feature { error_n_point[0] = double.MinValue; //min gain error_n_point[1] = -1; return(error_n_point); } //sort ids (for labels) acording to position at Form1.MainGrid[dimIndex][index] List <int> tmpIDs = new List <int>(geoWave.pointsIdArray); tmpIDs.Sort(delegate(int c1, int c2) { return(_trainingDt[c1][dimIndex].CompareTo(_trainingDt[c2][dimIndex])); }); if (_trainingDt[tmpIDs[0]][dimIndex] == _trainingDt[tmpIDs[tmpIDs.Count - 1]][dimIndex]) //all values are the same { error_n_point[0] = double.MinValue; //min gain error_n_point[1] = -1; return(error_n_point); } Dictionary <double, double> leftcategories = new Dictionary <double, double>(); //double as counter to enable devision Dictionary <double, double> rightcategories = new Dictionary <double, double>(); //double as counter to enable devision for (int i = 0; i < tmpIDs.Count(); i++) { if (leftcategories.ContainsKey(_trainingLabel[tmpIDs[i]][0])) { leftcategories[_trainingLabel[tmpIDs[i]][0]] += 1; } else { leftcategories.Add(_trainingLabel[tmpIDs[i]][0], 1); } } double N_points = Convert.ToDouble(tmpIDs.Count); double initialGini = calcGini(leftcategories, N_points); double NpointsLeft = N_points; double NpointsRight = 0; double leftGini = 0; double rightGini = 0; double gain = 0; double bestGain = 0; int best_ID = -1; for (int i = 0; i < tmpIDs.Count - 1; i++)//we dont calc the last (rightmost) boundary - it equal to the left most { double rightMostLable = _trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][0]; if (leftcategories[rightMostLable] == 1) { leftcategories.Remove(rightMostLable); } else { leftcategories[rightMostLable] -= 1; } if (rightcategories.ContainsKey(rightMostLable)) { rightcategories[rightMostLable] += 1; } else { rightcategories.Add(rightMostLable, 1); } NpointsLeft -= 1; NpointsRight += 1; leftGini = calcGini(leftcategories, NpointsLeft); rightGini = calcGini(rightcategories, NpointsRight); gain = (initialGini - leftGini) * (NpointsLeft / N_points) + (initialGini - rightGini) * (NpointsRight / N_points); //in case some points has the same values (in this dim) - we calc the avarage (relevant for splitting) only after all the points (with same values) had moved to the right //we don't alow "improving" the same split with two points with the same position (sort is not unique) if (gain > bestGain && _trainingDt[tmpIDs[tmpIDs.Count - i - 1]][dimIndex] != _trainingDt[tmpIDs[tmpIDs.Count - i - 2]][dimIndex] && (i + 1) >= _rc.minWaveSize && (i + _rc.minWaveSize) < tmpIDs.Count && !Form1.trainNaTable.ContainsKey(new Tuple <int, int>(tmpIDs[tmpIDs.Count - i - 1], dimIndex))) { best_ID = tmpIDs[tmpIDs.Count - i - 1]; bestGain = gain; } } if (best_ID == -1) { error_n_point[0] = double.MinValue;//min gain error_n_point[1] = -1; return(error_n_point); } error_n_point[0] = bestGain; error_n_point[1] = _trainingGridIndexDt[best_ID][dimIndex]; return(error_n_point); }
/* private void recursiveBSP_WaveletsByConsts(List<GeoWave> geoWaveArr, int geoWaveId, int seed=0) * { * //CALC APPROX_SOLUTION FOR GEO WAVE * double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray); * if (error < _rc.approxThresh || * geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize || * _rc.boundDepthTree <= geoWaveArr[geoWaveId].level) * return; * * int dimIndex = -1; * int Maingridindex = -1; * * bool IsPartitionOK = false; * switch (_rc.split_type) * { * case 0: * IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); * break; * case 1: * IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed); * break; * case 2: * { * Random ran1 = new Random(seed); * Random ran2 = new Random(geoWaveId); * int one = ran1.Next(0, int.MaxValue / 10); * int two = ran2.Next(0, int.MaxValue / 10); * bool[] Dim2TakeNode = getDim2Take(_rc, one + two); * IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); * } * break; * case 3: * IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); * break; * case 4: * { * Random ran1 = new Random(seed); * Random ran2 = new Random(geoWaveId); * int one = ran1.Next(0, int.MaxValue / 10); * int two = ran2.Next(0, int.MaxValue / 10); * bool[] Dim2TakeNode = getDim2Take(_rc, one + two); * IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); * } * break; * * * } * * * * * if (!IsPartitionOK) * return; * * * GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); * GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); * * //set partition * child0.boubdingBox[1][dimIndex] = Maingridindex; * child1.boubdingBox[0][dimIndex] = Maingridindex; * * //DOCUMENT ON CHILDREN * child0.dimIndex = dimIndex; * child0.Maingridindex = Maingridindex; * child1.dimIndex = dimIndex; * child1.Maingridindex = Maingridindex; * * child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; * child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; * * //calc norm * //calc mean value * * if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim)) * return; * * //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION) * * setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray); * //SET TWO CHILDS * child0.parentID = child1.parentID = geoWaveId; * child0.child0 = child1.child0 = -1; * child0.child1 = child1.child1 = -1; * child0.level = child1.level = geoWaveArr[geoWaveId].level + 1; * * child0.computeNormOfConsts(geoWaveArr[geoWaveId]); * child1.computeNormOfConsts(geoWaveArr[geoWaveId]); * geoWaveArr.Add(child0); * geoWaveArr.Add(child1); * geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2; * geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1; * * * * * //RECURSION STEP !!! * recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed); * recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed); * } */ private SplitProps getTransformedPartitionAllDim(GeoWave parentNode, double error, SplitType splitType) { double[][] originalNodeData = parentNode.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); double[][] transformedData; //clean columns of categorical variables 2m0rr0w2 // originalNodeData = Helpers.copyAndRemoveCategoricalColumns(originalNodeData, _rc); //result struct SplitProps resultProps = new SplitProps(); switch (splitType) { case SplitType.LocalPca: DimReduction.constructNodePcaByOriginalData(originalNodeData, parentNode); transformedData = parentNode.localPca.Transform(originalNodeData); break; case SplitType.DiffMaps5Percent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.05); break; case SplitType.DiffMaps1Percent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.01); break; case SplitType.DiffMapsHalfPercent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.005); break; case SplitType.MainAxes: transformedData = originalNodeData; break; case SplitType.Categorical: transformedData = originalNodeData; break; default: transformedData = null; break; } if (transformedData == null) { //throw new Exception("******TRANSFORMATION ERROR!!!"); resultProps.isPartitionOk = false; Debug.WriteLine("*********Failed transformation"); Debug.WriteLine("*********Failed node size: " + parentNode.pointsIdArray.Count); return(resultProps); } parentNode.transformedDim = transformedData.First().Length; //save dim of transformed data int transformedDim = parentNode.transformedDim; double[] errorEachDim = new double[transformedDim]; int[] partitionIdEachDim = new int[transformedDim]; // _rc.dim replaced by transformedDim dimention Helpers.applyFor(0, transformedDim, dim => { errorEachDim[dim] = getTransformedDataPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim); }); int bestDim = Enumerable.Range(0, transformedDim) .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b); resultProps.splitId = partitionIdEachDim[bestDim]; //save id's order in transformed data at best dimention resultProps.sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention List <int> idsClone = new List <int>(resultProps.sortedIds); // id's in original position resultProps.sortedIds.Sort((c1, c2) => transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim])); //save partition value int originalSplitLocation = idsClone.IndexOf(resultProps.splitId); if (originalSplitLocation == -1) { resultProps.isPartitionOk = false; return(resultProps); } resultProps.isPartitionOk = (errorEachDim[bestDim] < error); resultProps.splitValue = transformedData[originalSplitLocation][bestDim]; resultProps.error = errorEachDim[bestDim]; resultProps.type = splitType; resultProps.dimIndex = bestDim; //shift dimention if it was not categorical split 2m0rr0w2 /* foreach (int categoricalInd in _rc.indOfCategorical) * { * resultProps.dimIndex = (resultProps.dimIndex == categoricalInd) * ? resultProps.dimIndex++ * : resultProps.dimIndex; * }*/ return(resultProps); }
// Transformed data decomposition private void recursiveBSP_TransformedData(IList <GeoWave> geoWaveArr, int geoWaveId, List <SplitType> splitTypes) { GeoWave parentNode = geoWaveArr[geoWaveId]; double error = parentNode.calc_MeanValueReturnError(_trainingLabel, parentNode.pointsIdArray); if (error < _rc.approxThresh || parentNode.pointsIdArray.Count() <= _rc.minWaveSize || _rc.boundDepthTree <= parentNode.level) { return; } List <SplitProps> resultSplitsProperties = (from splitType in splitTypes select getTransformedPartitionAllDim(parentNode, error, splitType)).ToList(); resultSplitsProperties = resultSplitsProperties.Where(x => x.isPartitionOk).ToList(); //not exist split that may help if (resultSplitsProperties.Count == 0) { return; } SplitProps bestSplit = resultSplitsProperties.Aggregate((a, b) => (a.error < b.error) ? a : b); if (!bestSplit.isPartitionOk) { return; } parentNode.typeTransformed = bestSplit.type; GeoWave child0 = new GeoWave(_rc.dim, _rc.labelDim, _rc); GeoWave child1 = new GeoWave(_rc.dim, _rc.labelDim, _rc); child0.dimIndex = bestSplit.dimIndex; child1.dimIndex = bestSplit.dimIndex; List <int> sortedIds = bestSplit.sortedIds; int splitId = bestSplit.splitId; //set childs id's child0.pointsIdArray = sortedIds.GetRange(0, sortedIds.IndexOf(splitId)); child1.pointsIdArray = sortedIds.GetRange(sortedIds.IndexOf(splitId), sortedIds.Count - child0.pointsIdArray.Count); // set upper split value only child0.upperSplitValue = bestSplit.splitValue; //set mean values setTransformedChildMeanValue(ref child0); setTransformedChildMeanValue(ref child1); //set parent id child0.parentID = geoWaveId; child1.parentID = geoWaveId; //set level child0.level = parentNode.level + 1; child1.level = parentNode.level + 1; //debug writelines Debug.WriteLine("************Parent Size:" + parentNode.pointsIdArray.Count); Debug.WriteLine("************Level:" + (parentNode.level + 1)); Debug.WriteLine("************Type Splitted:" + bestSplit.type); Debug.WriteLine("***********************************************************"); //!!! START DEBUG VISULIZE SPIRAL SPLIT /* double[][] child0Data = child0.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); * double[][] child1Data = child1.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); * double[][] child0responce = child0.pointsIdArray.Select(id => _trainingLabel[id]).ToArray(); * double[][] child1responce = child1.pointsIdArray.Select(id => _trainingLabel[id]).ToArray(); * int level = child0.level; * PrintEngine.debugVisualizeSpiralSplit(child0Data, child1Data, * child0responce, child1responce, * level, parentNode.typeTransformed, debugAnalysisFolderName);*/ //!!! END DEBUG VISUALIZE SPIRAL SPLIT' //compute norms child0.computeNormOfConsts(parentNode); child1.computeNormOfConsts(parentNode); child0.meanDiffFromParent = child0.MeanValue[0] - parentNode.MeanValue[0]; child1.meanDiffFromParent = child1.MeanValue[0] - parentNode.MeanValue[0]; geoWaveArr.Add(child0); geoWaveArr.Add(child1); parentNode.child0 = geoWaveArr.IndexOf(child0); parentNode.child1 = geoWaveArr.IndexOf(child1); //RECURSION STEP !!! recursiveBSP_TransformedData(geoWaveArr, parentNode.child0, splitTypes); recursiveBSP_TransformedData(geoWaveArr, parentNode.child1, splitTypes); }
private double[] getBestPartitionAtSingleDim(int dimIndex, GeoWave geoWave) { double[] error_n_point = new double[2]; //error index if (Form1.MainGrid[dimIndex].Count == 1) //empty feature { error_n_point[0] = double.MaxValue; error_n_point[1] = -1; return(error_n_point); } //sort ids (for labels) acording to position at Form1.MainGrid[dimIndex][index] List <int> tmpIDs = new List <int>(geoWave.pointsIdArray); tmpIDs.Sort((c1, c2) => _trainingDt[c1][dimIndex].CompareTo(_trainingDt[c2][dimIndex])); if (_trainingDt[tmpIDs[0]][dimIndex] == _trainingDt[tmpIDs[tmpIDs.Count - 1]][dimIndex])//all values are the same { error_n_point[0] = double.MaxValue; error_n_point[1] = -1; return(error_n_point); } int best_ID = -1; double lowest_err = double.MaxValue; double[] leftAvg = new double[geoWave.MeanValue.Count()]; double[] rightAvg = new double[geoWave.MeanValue.Count()]; double[] leftErr = geoWave.calc_MeanValueReturnError(_trainingLabel, geoWave.pointsIdArray, ref leftAvg);//CONTAINES ALL POINTS - AT THE BEGINING double[] rightErr = new double[geoWave.MeanValue.Count()]; double N_points = Convert.ToDouble(tmpIDs.Count); double tmp_err; for (int i = 0; i < tmpIDs.Count - 1; i++)//we dont calc the last (rightmost) boundary - it equal to the left most { tmp_err = 0; for (int j = 0; j < geoWave.MeanValue.Count(); j++) { leftErr[j] = leftErr[j] - (N_points - i) * (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - leftAvg[j]) * (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - leftAvg[j]) / (N_points - i - 1); leftAvg[j] = (N_points - i) * leftAvg[j] / (N_points - i - 1) - _trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] / (N_points - i - 1); rightErr[j] = rightErr[j] + (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - rightAvg[j]) * (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - rightAvg[j]) * Convert.ToDouble(i) / Convert.ToDouble(i + 1); rightAvg[j] = rightAvg[j] * Convert.ToDouble(i) / Convert.ToDouble(i + 1) + _trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] / Convert.ToDouble(i + 1); tmp_err += leftErr[j] + rightErr[j]; } //in case some points has the same values - we calc the avarage (relevant for splitting) only after all the points (with same values) had moved to the right //we don't alow "improving" the same split with two points with the same position (sort is not unique) if (lowest_err > tmp_err && _trainingDt[tmpIDs[tmpIDs.Count - i - 1]][dimIndex] != _trainingDt[tmpIDs[tmpIDs.Count - i - 2]][dimIndex] && (i + 1) >= _rc.minWaveSize && (i + _rc.minWaveSize) < tmpIDs.Count && !Form1.trainNaTable.ContainsKey(new Tuple <int, int>(tmpIDs[tmpIDs.Count - i - 1], dimIndex))) { best_ID = tmpIDs[tmpIDs.Count - i - 1]; lowest_err = tmp_err; } } //errorPointsArr[tmpIDs.Count - 1] = errorPointsArr[0];//we dont calc the last (rightmost) boundary - it equal to the left most ////search lowest error //int minIndex = Enumerable.Range(0, errorPointsArr.Length).Aggregate((a, b) => (errorPointsArr[a] < errorPointsArr[b]) ? a : b); if (best_ID == -1) { error_n_point[0] = double.MaxValue; error_n_point[1] = double.MaxValue; return(error_n_point); } error_n_point[0] = Math.Max(lowest_err, 0); error_n_point[1] = _trainingGridIndexDt[best_ID][dimIndex]; //if (best_ID == tmpIDs[0] || best_ID == tmpIDs[tmpIDs.Count() - 1])// //{ // long stop = 0; // stop++; //} //=getMaingridIndex(geoWave.boubdingBox[0][dimIndex], Form1.MainGrid[dimIndex], training_dt[best_ID][dimIndex]);//MaingridIndex return(error_n_point); }
private void recursiveBSP_WaveletsByConsts(List <GeoWave> geoWaveArr, int geoWaveId, int seed = 0) { //CALC APPROX_SOLUTION FOR GEO WAVE double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray); if (error < _rc.approxThresh || geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize || _rc.boundDepthTree <= geoWaveArr[geoWaveId].level) { return; } int dimIndex = -1; int Maingridindex = -1; bool IsPartitionOK = false; switch (_rc.split_type) { case 0: IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); break; case 1: IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed); break; case 2: { Random ran1 = new Random(seed); Random ran2 = new Random(geoWaveId); int one = ran1.Next(0, int.MaxValue / 10); int two = ran2.Next(0, int.MaxValue / 10); bool[] Dim2TakeNode = getDim2Take(_rc, one + two); IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); } break; case 3: IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); break; case 4: { Random ran1 = new Random(seed); Random ran2 = new Random(geoWaveId); int one = ran1.Next(0, int.MaxValue / 10); int two = ran2.Next(0, int.MaxValue / 10); bool[] Dim2TakeNode = getDim2Take(_rc, one + two); IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); } break; case 5: //local pca at each parent node, recursion inside a case { recursiveBSP_LocalPCA(geoWaveArr, 0); return; } //break; } if (!IsPartitionOK) { return; } GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); //set partition child0.boubdingBox[1][dimIndex] = Maingridindex; child1.boubdingBox[0][dimIndex] = Maingridindex; //DOCUMENT ON CHILDREN child0.dimIndex = dimIndex; child0.Maingridindex = Maingridindex; child1.dimIndex = dimIndex; child1.Maingridindex = Maingridindex; child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; //calc norm //calc mean value if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim)) { return; } //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION) setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray); //SET TWO CHILDS child0.parentID = child1.parentID = geoWaveId; child0.child0 = child1.child0 = -1; child0.child1 = child1.child1 = -1; child0.level = child1.level = geoWaveArr[geoWaveId].level + 1; child0.computeNormOfConsts(geoWaveArr[geoWaveId]); child1.computeNormOfConsts(geoWaveArr[geoWaveId]); geoWaveArr.Add(child0); geoWaveArr.Add(child1); geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2; geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1; //RECURSION STEP !!! recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed); recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed); }