/* private void recursiveBSP_WaveletsByConsts(List<GeoWave> geoWaveArr, int geoWaveId, int seed=0) * { * //CALC APPROX_SOLUTION FOR GEO WAVE * double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray); * if (error < _rc.approxThresh || * geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize || * _rc.boundDepthTree <= geoWaveArr[geoWaveId].level) * return; * * int dimIndex = -1; * int Maingridindex = -1; * * bool IsPartitionOK = false; * switch (_rc.split_type) * { * case 0: * IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); * break; * case 1: * IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed); * break; * case 2: * { * Random ran1 = new Random(seed); * Random ran2 = new Random(geoWaveId); * int one = ran1.Next(0, int.MaxValue / 10); * int two = ran2.Next(0, int.MaxValue / 10); * bool[] Dim2TakeNode = getDim2Take(_rc, one + two); * IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); * } * break; * case 3: * IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); * break; * case 4: * { * Random ran1 = new Random(seed); * Random ran2 = new Random(geoWaveId); * int one = ran1.Next(0, int.MaxValue / 10); * int two = ran2.Next(0, int.MaxValue / 10); * bool[] Dim2TakeNode = getDim2Take(_rc, one + two); * IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); * } * break; * * * } * * * * * if (!IsPartitionOK) * return; * * * GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); * GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); * * //set partition * child0.boubdingBox[1][dimIndex] = Maingridindex; * child1.boubdingBox[0][dimIndex] = Maingridindex; * * //DOCUMENT ON CHILDREN * child0.dimIndex = dimIndex; * child0.Maingridindex = Maingridindex; * child1.dimIndex = dimIndex; * child1.Maingridindex = Maingridindex; * * child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; * child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; * * //calc norm * //calc mean value * * if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim)) * return; * * //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION) * * setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray); * //SET TWO CHILDS * child0.parentID = child1.parentID = geoWaveId; * child0.child0 = child1.child0 = -1; * child0.child1 = child1.child1 = -1; * child0.level = child1.level = geoWaveArr[geoWaveId].level + 1; * * child0.computeNormOfConsts(geoWaveArr[geoWaveId]); * child1.computeNormOfConsts(geoWaveArr[geoWaveId]); * geoWaveArr.Add(child0); * geoWaveArr.Add(child1); * geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2; * geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1; * * * * * //RECURSION STEP !!! * recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed); * recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed); * } */ private SplitProps getTransformedPartitionAllDim(GeoWave parentNode, double error, SplitType splitType) { double[][] originalNodeData = parentNode.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); double[][] transformedData; //clean columns of categorical variables 2m0rr0w2 // originalNodeData = Helpers.copyAndRemoveCategoricalColumns(originalNodeData, _rc); //result struct SplitProps resultProps = new SplitProps(); switch (splitType) { case SplitType.LocalPca: DimReduction.constructNodePcaByOriginalData(originalNodeData, parentNode); transformedData = parentNode.localPca.Transform(originalNodeData); break; case SplitType.DiffMaps5Percent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.05); break; case SplitType.DiffMaps1Percent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.01); break; case SplitType.DiffMapsHalfPercent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.005); break; case SplitType.MainAxes: transformedData = originalNodeData; break; case SplitType.Categorical: transformedData = originalNodeData; break; default: transformedData = null; break; } if (transformedData == null) { //throw new Exception("******TRANSFORMATION ERROR!!!"); resultProps.isPartitionOk = false; Debug.WriteLine("*********Failed transformation"); Debug.WriteLine("*********Failed node size: " + parentNode.pointsIdArray.Count); return(resultProps); } parentNode.transformedDim = transformedData.First().Length; //save dim of transformed data int transformedDim = parentNode.transformedDim; double[] errorEachDim = new double[transformedDim]; int[] partitionIdEachDim = new int[transformedDim]; // _rc.dim replaced by transformedDim dimention Helpers.applyFor(0, transformedDim, dim => { errorEachDim[dim] = getTransformedDataPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim); }); int bestDim = Enumerable.Range(0, transformedDim) .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b); resultProps.splitId = partitionIdEachDim[bestDim]; //save id's order in transformed data at best dimention resultProps.sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention List <int> idsClone = new List <int>(resultProps.sortedIds); // id's in original position resultProps.sortedIds.Sort((c1, c2) => transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim])); //save partition value int originalSplitLocation = idsClone.IndexOf(resultProps.splitId); if (originalSplitLocation == -1) { resultProps.isPartitionOk = false; return(resultProps); } resultProps.isPartitionOk = (errorEachDim[bestDim] < error); resultProps.splitValue = transformedData[originalSplitLocation][bestDim]; resultProps.error = errorEachDim[bestDim]; resultProps.type = splitType; resultProps.dimIndex = bestDim; //shift dimention if it was not categorical split 2m0rr0w2 /* foreach (int categoricalInd in _rc.indOfCategorical) * { * resultProps.dimIndex = (resultProps.dimIndex == categoricalInd) * ? resultProps.dimIndex++ * : resultProps.dimIndex; * }*/ return(resultProps); }
// Transformed data decomposition private void recursiveBSP_TransformedData(IList <GeoWave> geoWaveArr, int geoWaveId, List <SplitType> splitTypes) { GeoWave parentNode = geoWaveArr[geoWaveId]; double error = parentNode.calc_MeanValueReturnError(_trainingLabel, parentNode.pointsIdArray); if (error < _rc.approxThresh || parentNode.pointsIdArray.Count() <= _rc.minWaveSize || _rc.boundDepthTree <= parentNode.level) { return; } List <SplitProps> resultSplitsProperties = (from splitType in splitTypes select getTransformedPartitionAllDim(parentNode, error, splitType)).ToList(); resultSplitsProperties = resultSplitsProperties.Where(x => x.isPartitionOk).ToList(); //not exist split that may help if (resultSplitsProperties.Count == 0) { return; } SplitProps bestSplit = resultSplitsProperties.Aggregate((a, b) => (a.error < b.error) ? a : b); if (!bestSplit.isPartitionOk) { return; } parentNode.typeTransformed = bestSplit.type; GeoWave child0 = new GeoWave(_rc.dim, _rc.labelDim, _rc); GeoWave child1 = new GeoWave(_rc.dim, _rc.labelDim, _rc); child0.dimIndex = bestSplit.dimIndex; child1.dimIndex = bestSplit.dimIndex; List <int> sortedIds = bestSplit.sortedIds; int splitId = bestSplit.splitId; //set childs id's child0.pointsIdArray = sortedIds.GetRange(0, sortedIds.IndexOf(splitId)); child1.pointsIdArray = sortedIds.GetRange(sortedIds.IndexOf(splitId), sortedIds.Count - child0.pointsIdArray.Count); // set upper split value only child0.upperSplitValue = bestSplit.splitValue; //set mean values setTransformedChildMeanValue(ref child0); setTransformedChildMeanValue(ref child1); //set parent id child0.parentID = geoWaveId; child1.parentID = geoWaveId; //set level child0.level = parentNode.level + 1; child1.level = parentNode.level + 1; //debug writelines Debug.WriteLine("************Parent Size:" + parentNode.pointsIdArray.Count); Debug.WriteLine("************Level:" + (parentNode.level + 1)); Debug.WriteLine("************Type Splitted:" + bestSplit.type); Debug.WriteLine("***********************************************************"); //!!! START DEBUG VISULIZE SPIRAL SPLIT /* double[][] child0Data = child0.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); * double[][] child1Data = child1.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); * double[][] child0responce = child0.pointsIdArray.Select(id => _trainingLabel[id]).ToArray(); * double[][] child1responce = child1.pointsIdArray.Select(id => _trainingLabel[id]).ToArray(); * int level = child0.level; * PrintEngine.debugVisualizeSpiralSplit(child0Data, child1Data, * child0responce, child1responce, * level, parentNode.typeTransformed, debugAnalysisFolderName);*/ //!!! END DEBUG VISUALIZE SPIRAL SPLIT' //compute norms child0.computeNormOfConsts(parentNode); child1.computeNormOfConsts(parentNode); child0.meanDiffFromParent = child0.MeanValue[0] - parentNode.MeanValue[0]; child1.meanDiffFromParent = child1.MeanValue[0] - parentNode.MeanValue[0]; geoWaveArr.Add(child0); geoWaveArr.Add(child1); parentNode.child0 = geoWaveArr.IndexOf(child0); parentNode.child1 = geoWaveArr.IndexOf(child1); //RECURSION STEP !!! recursiveBSP_TransformedData(geoWaveArr, parentNode.child0, splitTypes); recursiveBSP_TransformedData(geoWaveArr, parentNode.child1, splitTypes); }