Esempio n. 1
0
        /*  private void recursiveBSP_WaveletsByConsts(List<GeoWave> geoWaveArr, int geoWaveId, int seed=0)
         * {
         *    //CALC APPROX_SOLUTION FOR GEO WAVE
         *    double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray);
         *    if (error < _rc.approxThresh ||
         *        geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize ||
         *        _rc.boundDepthTree <=  geoWaveArr[geoWaveId].level)
         *    return;
         *
         *    int dimIndex = -1;
         *    int Maingridindex = -1;
         *
         *    bool IsPartitionOK = false;
         *    switch (_rc.split_type)
         *    {
         *        case 0:
         *           IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
         *            break;
         *        case 1:
         *            IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed);
         *            break;
         *        case 2:
         *        {
         *            Random ran1 = new Random(seed);
         *            Random ran2 = new Random(geoWaveId);
         *            int one = ran1.Next(0, int.MaxValue / 10);
         *            int two = ran2.Next(0, int.MaxValue / 10);
         *            bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
         *            IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
         *        }
         *            break;
         *        case 3:
         *            IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
         *            break;
         *        case 4:
         *        {
         *            Random ran1 = new Random(seed);
         *            Random ran2 = new Random(geoWaveId);
         *            int one = ran1.Next(0, int.MaxValue / 10);
         *            int two = ran2.Next(0, int.MaxValue / 10);
         *            bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
         *            IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
         *        }
         *            break;
         *
         *
         *    }
         *
         *
         *
         *
         *    if (!IsPartitionOK)
         *        return;
         *
         *
         *    GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
         *    GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
         *
         *    //set partition
         *    child0.boubdingBox[1][dimIndex] = Maingridindex;
         *    child1.boubdingBox[0][dimIndex] = Maingridindex;
         *
         *    //DOCUMENT ON CHILDREN
         *    child0.dimIndex = dimIndex;
         *    child0.Maingridindex = Maingridindex;
         *    child1.dimIndex = dimIndex;
         *    child1.Maingridindex = Maingridindex;
         *
         *    child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
         *    child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
         *
         *    //calc norm
         *    //calc mean value
         *
         *    if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim))
         *        return;
         *
         *    //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION)
         *
         *    setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray);
         *    //SET TWO CHILDS
         *    child0.parentID = child1.parentID = geoWaveId;
         *    child0.child0 = child1.child0 = -1;
         *    child0.child1 = child1.child1 = -1;
         *    child0.level = child1.level = geoWaveArr[geoWaveId].level + 1;
         *
         *    child0.computeNormOfConsts(geoWaveArr[geoWaveId]);
         *    child1.computeNormOfConsts(geoWaveArr[geoWaveId]);
         *    geoWaveArr.Add(child0);
         *    geoWaveArr.Add(child1);
         *    geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2;
         *    geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1;
         *
         *
         *
         *
         *    //RECURSION STEP !!!
         *    recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed);
         *    recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed);
         * }
         */
        private SplitProps getTransformedPartitionAllDim(GeoWave parentNode, double error, SplitType splitType)
        {
            double[][] originalNodeData = parentNode.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
            double[][] transformedData;
            //clean columns of categorical variables 2m0rr0w2
            // originalNodeData = Helpers.copyAndRemoveCategoricalColumns(originalNodeData, _rc);
            //result struct
            SplitProps resultProps = new SplitProps();

            switch (splitType)
            {
            case SplitType.LocalPca:
                DimReduction.constructNodePcaByOriginalData(originalNodeData, parentNode);
                transformedData = parentNode.localPca.Transform(originalNodeData);
                break;

            case SplitType.DiffMaps5Percent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.05);
                break;

            case SplitType.DiffMaps1Percent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.01);
                break;

            case SplitType.DiffMapsHalfPercent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.005);
                break;

            case SplitType.MainAxes:
                transformedData = originalNodeData;
                break;

            case SplitType.Categorical:
                transformedData = originalNodeData;
                break;

            default:
                transformedData = null;
                break;
            }

            if (transformedData == null)
            {
                //throw new Exception("******TRANSFORMATION ERROR!!!");
                resultProps.isPartitionOk = false;
                Debug.WriteLine("*********Failed transformation");
                Debug.WriteLine("*********Failed node size: " + parentNode.pointsIdArray.Count);
                return(resultProps);
            }
            parentNode.transformedDim = transformedData.First().Length;
            //save dim of transformed data
            int transformedDim = parentNode.transformedDim;

            double[] errorEachDim       = new double[transformedDim];
            int[]    partitionIdEachDim = new int[transformedDim];
            // _rc.dim replaced by transformedDim dimention
            Helpers.applyFor(0, transformedDim, dim =>
            {
                errorEachDim[dim] = getTransformedDataPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim);
            });
            int bestDim = Enumerable.Range(0, transformedDim)
                          .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b);

            resultProps.splitId = partitionIdEachDim[bestDim];
            //save id's order in transformed data at best dimention
            resultProps.sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention
            List <int> idsClone = new List <int>(resultProps.sortedIds);      // id's in original position

            resultProps.sortedIds.Sort((c1, c2) =>
                                       transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim]));
            //save partition value
            int originalSplitLocation = idsClone.IndexOf(resultProps.splitId);

            if (originalSplitLocation == -1)
            {
                resultProps.isPartitionOk = false;
                return(resultProps);
            }
            resultProps.isPartitionOk = (errorEachDim[bestDim] < error);
            resultProps.splitValue    = transformedData[originalSplitLocation][bestDim];
            resultProps.error         = errorEachDim[bestDim];
            resultProps.type          = splitType;
            resultProps.dimIndex      = bestDim;
            //shift dimention if it was not categorical split 2m0rr0w2

            /*    foreach (int categoricalInd in _rc.indOfCategorical)
             *  {
             *      resultProps.dimIndex = (resultProps.dimIndex == categoricalInd)
             *          ? resultProps.dimIndex++
             *          : resultProps.dimIndex;
             *  }*/
            return(resultProps);
        }
Esempio n. 2
0
        // Transformed data decomposition
        private void recursiveBSP_TransformedData(IList <GeoWave> geoWaveArr, int geoWaveId, List <SplitType> splitTypes)
        {
            GeoWave parentNode = geoWaveArr[geoWaveId];
            double  error      = parentNode.calc_MeanValueReturnError(_trainingLabel, parentNode.pointsIdArray);

            if (error < _rc.approxThresh ||
                parentNode.pointsIdArray.Count() <= _rc.minWaveSize ||
                _rc.boundDepthTree <= parentNode.level)
            {
                return;
            }


            List <SplitProps> resultSplitsProperties = (from splitType in splitTypes
                                                        select getTransformedPartitionAllDim(parentNode, error, splitType)).ToList();

            resultSplitsProperties = resultSplitsProperties.Where(x => x.isPartitionOk).ToList();
            //not exist split that may help
            if (resultSplitsProperties.Count == 0)
            {
                return;
            }
            SplitProps bestSplit = resultSplitsProperties.Aggregate((a, b) => (a.error < b.error) ? a : b);


            if (!bestSplit.isPartitionOk)
            {
                return;
            }
            parentNode.typeTransformed = bestSplit.type;
            GeoWave child0 = new GeoWave(_rc.dim, _rc.labelDim, _rc);
            GeoWave child1 = new GeoWave(_rc.dim, _rc.labelDim, _rc);

            child0.dimIndex = bestSplit.dimIndex;
            child1.dimIndex = bestSplit.dimIndex;
            List <int> sortedIds = bestSplit.sortedIds;
            int        splitId   = bestSplit.splitId;

            //set childs id's
            child0.pointsIdArray = sortedIds.GetRange(0, sortedIds.IndexOf(splitId));
            child1.pointsIdArray = sortedIds.GetRange(sortedIds.IndexOf(splitId), sortedIds.Count - child0.pointsIdArray.Count);
            // set upper split value only
            child0.upperSplitValue = bestSplit.splitValue;
            //set mean values
            setTransformedChildMeanValue(ref child0);
            setTransformedChildMeanValue(ref child1);
            //set parent id
            child0.parentID = geoWaveId;
            child1.parentID = geoWaveId;
            //set level
            child0.level = parentNode.level + 1;
            child1.level = parentNode.level + 1;
            //debug writelines
            Debug.WriteLine("************Parent Size:" + parentNode.pointsIdArray.Count);
            Debug.WriteLine("************Level:" + (parentNode.level + 1));
            Debug.WriteLine("************Type Splitted:" + bestSplit.type);
            Debug.WriteLine("***********************************************************");

            //!!! START DEBUG VISULIZE SPIRAL SPLIT

            /*         double[][] child0Data = child0.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
             *       double[][] child1Data = child1.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
             *       double[][] child0responce = child0.pointsIdArray.Select(id => _trainingLabel[id]).ToArray();
             *       double[][] child1responce = child1.pointsIdArray.Select(id => _trainingLabel[id]).ToArray();
             *       int level = child0.level;
             *       PrintEngine.debugVisualizeSpiralSplit(child0Data, child1Data,
             *           child0responce, child1responce,
             *           level, parentNode.typeTransformed, debugAnalysisFolderName);*/

            //!!! END DEBUG VISUALIZE SPIRAL SPLIT'

            //compute norms
            child0.computeNormOfConsts(parentNode);
            child1.computeNormOfConsts(parentNode);

            child0.meanDiffFromParent = child0.MeanValue[0] - parentNode.MeanValue[0];
            child1.meanDiffFromParent = child1.MeanValue[0] - parentNode.MeanValue[0];

            geoWaveArr.Add(child0);
            geoWaveArr.Add(child1);
            parentNode.child0 = geoWaveArr.IndexOf(child0);
            parentNode.child1 = geoWaveArr.IndexOf(child1);


            //RECURSION STEP !!!
            recursiveBSP_TransformedData(geoWaveArr, parentNode.child0, splitTypes);
            recursiveBSP_TransformedData(geoWaveArr, parentNode.child1, splitTypes);
        }