예제 #1
0
        private double[] GetBestPartionAtSingleDim(int dim, GeoWave node)
        {
            var errorNPoint = new double[2];//error index
            int bestId      = -1;

            if (Form1.MainGrid[dim].Count == 1)//empty feature
            {
                errorNPoint[VALUE]       = double.MaxValue;
                errorNPoint[SPLIT_INDEX] = -1;
                return(errorNPoint);
            }
            //sort ids (for labels) acording to position at Form1.MainGrid[dimIndex][index] at 'dim' dimention
            var sortedIds = new List <int>(node.pointsIdArray);

            sortedIds.Sort((c1, c2) => _training[c1][dim].CompareTo(_training[c2][dim]));

            if (Math.Abs(_training[sortedIds[0]][dim] - _training[sortedIds[sortedIds.Count - 1]][dim]) < double.Epsilon)//all values are the same
            {
                errorNPoint[VALUE]       = double.MaxValue;
                errorNPoint[SPLIT_INDEX] = -1;
                return(errorNPoint);
            }
            var    leftDicClone          = GiniHelper.CloneLabelAmountDic(node.MgStuff.dicLabelCount); //start with parent data at left
            var    rightDic              = GiniHelper.CreateEmptyLabelAmountDic(_labelsDim);
            var    startLeftLabelsAmount = node.pointsIdArray.Count();
            int    bestSplitId           = -1;
            var    parentSize            = startLeftLabelsAmount;
            double giniLowest            = node.MgStuff.GiniAvg;

            for (var i = 0; i < sortedIds.Count() - 1; i++)
            {
                var      sortedId        = sortedIds[startLeftLabelsAmount - i - 1];
                var      nextSortedId    = sortedIds[startLeftLabelsAmount - i - 2];
                double[] movedLabel      = _labels[sortedId];
                var      leftSize        = parentSize - i - 1;
                var      rightSize       = i + 1;
                var      leftGini        = GiniHelper.GetGiniByAction(leftDicClone, movedLabel, GiniHelper.ActionType.Remove, leftSize);
                var      rightGini       = GiniHelper.GetGiniByAction(rightDic, movedLabel, GiniHelper.ActionType.Insert, rightSize);
                double   tempGiniPartion = ((double)leftSize / parentSize) * leftGini + ((double)rightSize / parentSize) * rightGini;
                //in case some points has the same values - we calc the avarage (relevant for splitting) only after all the points (with same values) had moved to the right
                //we don't alow "improving" the same split with two points with the same position (sort is not unique)

                double nowMovedValue  = _training[sortedId][dim];
                double nextMovedValue = _training[nextSortedId][dim];
                if (tempGiniPartion < giniLowest && nowMovedValue != nextMovedValue &&
                    (i + 1) >= _minWaveSize && (i + _minWaveSize) < sortedIds.Count)
                {
                    giniLowest  = tempGiniPartion;
                    bestSplitId = sortedIds[sortedIds.Count() - i - 1];
                }
            }
            if (bestSplitId == -1)
            {
                errorNPoint[VALUE]       = double.MaxValue;
                errorNPoint[SPLIT_INDEX] = double.MaxValue;
                return(errorNPoint);
            }

            errorNPoint[VALUE]       = Math.Max(giniLowest, 0);
            errorNPoint[SPLIT_INDEX] = _trainingGridIndex[bestSplitId][dim];
            return(errorNPoint);
        }
예제 #2
0
        private double CalculateGini(Dictionary <double, double>[] dicLabelCount, ref GeoWave node, GeoWave parent = null)
        {
            var    vecGini  = new double[_labelsDim];
            double giniNorm = 0;

            for (int dim = 0; dim < _labelsDim; dim++)
            {
                foreach (var dimDic in dicLabelCount[dim])
                {
                    //label probability
                    double labelProb = dimDic.Value / node.pointsIdArray.Count();
                    vecGini[dim] += labelProb * (1 - labelProb);
                }
                //save <label,amount> dictionary array
                node.MgStuff.dicLabelCount[dim] = new Dictionary <double, double>(dicLabelCount[dim]);
            }
            //save gini average value
            node.MgStuff.GiniAvg = vecGini.Sum() / _labelsDim;
            //save gini vector
            Array.Copy(vecGini, node.MgStuff.GiniVector, _labelsDim);
            //save gini norm
            double parentGiniAvg = (parent != null) ? parent.MgStuff.GiniAvg : 0;

            giniNorm = (node.MgStuff.GiniAvg - parentGiniAvg) * (node.MgStuff.GiniAvg - parentGiniAvg) * node.pointsIdArray.Count();
            node.MgStuff.GiniNorm = giniNorm;
            return(giniNorm);
        }
예제 #3
0
        public static List <GeoWave> getConstWaveletsFromFile(string filename, recordConfig rc)
        {
            if (!Form1.UseS3 && !File.Exists(filename))//this func was not debugged after modification
            {
                MessageBox.Show(@"the file " + Path.GetFileName(filename) + @" doesnt exist  in " + Path.GetFullPath(filename));
                return(null);
            }

            StreamReader sr;

            /* if (Form1.UseS3)
             * {
             *   string dir_name = Path.GetDirectoryName(filename);
             *   string file_name = Path.GetFileName(filename);
             *
             *   S3DirectoryInfo s3dir = new S3DirectoryInfo(Form1.S3client, Form1.bucketName, dir_name);
             *   S3FileInfo artFile = s3dir.GetFile(file_name);
             *   sr = artFile.OpenText();
             * }
             * else*/
            sr = new StreamReader(File.OpenRead(filename));

            string[] values = { "" };
            string   line;
            string   DimensionReductionMatrix = "";
            int      numOfWavlets             = -1;
            int      dimension      = -1;
            int      labelDimension = -1;
            double   approxOrder    = -1;

            while (!sr.EndOfStream && values[0] != "StartReading")
            {
                line   = sr.ReadLine();
                values = line.Split(Form1.seperator, StringSplitOptions.RemoveEmptyEntries);
                if (values[0] == "DimensionReductionMatrix")
                {
                    DimensionReductionMatrix = values[1];
                }
                else if (values[0] == "numOfWavlets")
                {
                    numOfWavlets = int.Parse(values[1]);
                }
                else if (values[0] == "approxOrder")
                {
                    approxOrder = int.Parse(values[1]);
                }
                else if (values[0] == "dimension")
                {
                    dimension = int.Parse(values[1]);
                }
                else if (values[0] == "labelDimension")
                {
                    labelDimension = int.Parse(values[1]);
                }
                else if (values[0] == "StartReading")
                {
                    ;
                }
                else
                {
                    MessageBox.Show(@"the file " + Path.GetFileName(filename) + @" already exist in " + Path.GetFullPath(filename) + @" might have bad input !");
                }
            }

            //read values
            List <GeoWave> gwArr = new List <GeoWave>();

            while (!sr.EndOfStream)
            {
                GeoWave gw = new GeoWave(dimension, labelDimension, rc);
                line = sr.ReadLine();
                if (line != null)
                {
                    values = line.Split(Form1.seperator, StringSplitOptions.RemoveEmptyEntries);
                }
                gw.ID     = int.Parse(values[0]);
                gw.child0 = int.Parse(values[1]);
                gw.child1 = int.Parse(values[2]);
                int counter = 0;
                for (int j = 0; j < dimension; j++)
                {
                    gw.boubdingBox[0][j] = int.Parse(values[3 + 4 * j]);//the next are the actual values and not the indeces int the maingrid - so we skip 4 elementsat a time
                    gw.boubdingBox[1][j] = int.Parse(values[4 + 4 * j]);
                    counter = 4 + 2 * 4;
                }
                gw.level = int.Parse(values[counter + 1]);
                counter  = counter + 2;
                for (int j = 0; j < labelDimension; j++)
                {
                    gw.MeanValue[j] = double.Parse(values[counter + j]);
                    counter++;
                }
                gw.norm     = double.Parse(values[counter]);
                gw.parentID = int.Parse(values[counter + 1]);
                gwArr.Add(gw);
            }

            sr.Close();
            return(gwArr);
        }
예제 #4
0
 public static void constructNodePcaByOriginalData(double[][] nodeOriginalData, GeoWave node)
 {
     node.localPca = new ModifedPca(nodeOriginalData, AnalysisMethod.Standardize);
     node.localPca.Compute();
 }
예제 #5
0
        private double[] getGiniPartitionLargeDb(int dimIndex, GeoWave geoWave)
        {
            double[] error_n_point = new double[2];  //gain index
            if (Form1.MainGrid[dimIndex].Count == 1) //empty feature
            {
                error_n_point[0] = double.MinValue;  //min gain
                error_n_point[1] = -1;
                return(error_n_point);
            }
            //sort ids (for labels) acording to position at Form1.MainGrid[dimIndex][index]
            List <int> tmpIDs = new List <int>(geoWave.pointsIdArray);

            tmpIDs.Sort(delegate(int c1, int c2) { return(_trainingDt[c1][dimIndex].CompareTo(_trainingDt[c2][dimIndex])); });

            if (_trainingDt[tmpIDs[0]][dimIndex] == _trainingDt[tmpIDs[tmpIDs.Count - 1]][dimIndex]) //all values are the same
            {
                error_n_point[0] = double.MinValue;                                                  //min gain
                error_n_point[1] = -1;
                return(error_n_point);
            }

            Dictionary <double, double> leftcategories  = new Dictionary <double, double>(); //double as counter to enable devision
            Dictionary <double, double> rightcategories = new Dictionary <double, double>(); //double as counter to enable devision

            for (int i = 0; i < tmpIDs.Count(); i++)
            {
                if (leftcategories.ContainsKey(_trainingLabel[tmpIDs[i]][0]))
                {
                    leftcategories[_trainingLabel[tmpIDs[i]][0]] += 1;
                }
                else
                {
                    leftcategories.Add(_trainingLabel[tmpIDs[i]][0], 1);
                }
            }
            double N_points     = Convert.ToDouble(tmpIDs.Count);
            double initialGini  = calcGini(leftcategories, N_points);
            double NpointsLeft  = N_points;
            double NpointsRight = 0;
            double leftGini     = 0;
            double rightGini    = 0;
            double gain         = 0;
            double bestGain     = 0;
            int    best_ID      = -1;

            for (int i = 0; i < tmpIDs.Count - 1; i++)//we dont calc the last (rightmost) boundary - it equal to the left most
            {
                double rightMostLable = _trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][0];

                if (leftcategories[rightMostLable] == 1)
                {
                    leftcategories.Remove(rightMostLable);
                }
                else
                {
                    leftcategories[rightMostLable] -= 1;
                }

                if (rightcategories.ContainsKey(rightMostLable))
                {
                    rightcategories[rightMostLable] += 1;
                }
                else
                {
                    rightcategories.Add(rightMostLable, 1);
                }

                NpointsLeft  -= 1;
                NpointsRight += 1;

                leftGini  = calcGini(leftcategories, NpointsLeft);
                rightGini = calcGini(rightcategories, NpointsRight);

                gain = (initialGini - leftGini) * (NpointsLeft / N_points) + (initialGini - rightGini) * (NpointsRight / N_points);

                //in case some points has the same values (in this dim) - we calc the avarage (relevant for splitting) only after all the points (with same values) had moved to the right
                //we don't alow "improving" the same split with two points with the same position (sort is not unique)
                if (gain > bestGain && _trainingDt[tmpIDs[tmpIDs.Count - i - 1]][dimIndex] != _trainingDt[tmpIDs[tmpIDs.Count - i - 2]][dimIndex] &&
                    (i + 1) >= _rc.minWaveSize && (i + _rc.minWaveSize) < tmpIDs.Count &&
                    !Form1.trainNaTable.ContainsKey(new Tuple <int, int>(tmpIDs[tmpIDs.Count - i - 1], dimIndex)))
                {
                    best_ID  = tmpIDs[tmpIDs.Count - i - 1];
                    bestGain = gain;
                }
            }

            if (best_ID == -1)
            {
                error_n_point[0] = double.MinValue;//min gain
                error_n_point[1] = -1;
                return(error_n_point);
            }

            error_n_point[0] = bestGain;
            error_n_point[1] = _trainingGridIndexDt[best_ID][dimIndex];

            return(error_n_point);
        }
예제 #6
0
        /*  private void recursiveBSP_WaveletsByConsts(List<GeoWave> geoWaveArr, int geoWaveId, int seed=0)
         * {
         *    //CALC APPROX_SOLUTION FOR GEO WAVE
         *    double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray);
         *    if (error < _rc.approxThresh ||
         *        geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize ||
         *        _rc.boundDepthTree <=  geoWaveArr[geoWaveId].level)
         *    return;
         *
         *    int dimIndex = -1;
         *    int Maingridindex = -1;
         *
         *    bool IsPartitionOK = false;
         *    switch (_rc.split_type)
         *    {
         *        case 0:
         *           IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
         *            break;
         *        case 1:
         *            IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed);
         *            break;
         *        case 2:
         *        {
         *            Random ran1 = new Random(seed);
         *            Random ran2 = new Random(geoWaveId);
         *            int one = ran1.Next(0, int.MaxValue / 10);
         *            int two = ran2.Next(0, int.MaxValue / 10);
         *            bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
         *            IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
         *        }
         *            break;
         *        case 3:
         *            IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
         *            break;
         *        case 4:
         *        {
         *            Random ran1 = new Random(seed);
         *            Random ran2 = new Random(geoWaveId);
         *            int one = ran1.Next(0, int.MaxValue / 10);
         *            int two = ran2.Next(0, int.MaxValue / 10);
         *            bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
         *            IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
         *        }
         *            break;
         *
         *
         *    }
         *
         *
         *
         *
         *    if (!IsPartitionOK)
         *        return;
         *
         *
         *    GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
         *    GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
         *
         *    //set partition
         *    child0.boubdingBox[1][dimIndex] = Maingridindex;
         *    child1.boubdingBox[0][dimIndex] = Maingridindex;
         *
         *    //DOCUMENT ON CHILDREN
         *    child0.dimIndex = dimIndex;
         *    child0.Maingridindex = Maingridindex;
         *    child1.dimIndex = dimIndex;
         *    child1.Maingridindex = Maingridindex;
         *
         *    child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
         *    child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
         *
         *    //calc norm
         *    //calc mean value
         *
         *    if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim))
         *        return;
         *
         *    //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION)
         *
         *    setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray);
         *    //SET TWO CHILDS
         *    child0.parentID = child1.parentID = geoWaveId;
         *    child0.child0 = child1.child0 = -1;
         *    child0.child1 = child1.child1 = -1;
         *    child0.level = child1.level = geoWaveArr[geoWaveId].level + 1;
         *
         *    child0.computeNormOfConsts(geoWaveArr[geoWaveId]);
         *    child1.computeNormOfConsts(geoWaveArr[geoWaveId]);
         *    geoWaveArr.Add(child0);
         *    geoWaveArr.Add(child1);
         *    geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2;
         *    geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1;
         *
         *
         *
         *
         *    //RECURSION STEP !!!
         *    recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed);
         *    recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed);
         * }
         */
        private SplitProps getTransformedPartitionAllDim(GeoWave parentNode, double error, SplitType splitType)
        {
            double[][] originalNodeData = parentNode.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
            double[][] transformedData;
            //clean columns of categorical variables 2m0rr0w2
            // originalNodeData = Helpers.copyAndRemoveCategoricalColumns(originalNodeData, _rc);
            //result struct
            SplitProps resultProps = new SplitProps();

            switch (splitType)
            {
            case SplitType.LocalPca:
                DimReduction.constructNodePcaByOriginalData(originalNodeData, parentNode);
                transformedData = parentNode.localPca.Transform(originalNodeData);
                break;

            case SplitType.DiffMaps5Percent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.05);
                break;

            case SplitType.DiffMaps1Percent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.01);
                break;

            case SplitType.DiffMapsHalfPercent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.005);
                break;

            case SplitType.MainAxes:
                transformedData = originalNodeData;
                break;

            case SplitType.Categorical:
                transformedData = originalNodeData;
                break;

            default:
                transformedData = null;
                break;
            }

            if (transformedData == null)
            {
                //throw new Exception("******TRANSFORMATION ERROR!!!");
                resultProps.isPartitionOk = false;
                Debug.WriteLine("*********Failed transformation");
                Debug.WriteLine("*********Failed node size: " + parentNode.pointsIdArray.Count);
                return(resultProps);
            }
            parentNode.transformedDim = transformedData.First().Length;
            //save dim of transformed data
            int transformedDim = parentNode.transformedDim;

            double[] errorEachDim       = new double[transformedDim];
            int[]    partitionIdEachDim = new int[transformedDim];
            // _rc.dim replaced by transformedDim dimention
            Helpers.applyFor(0, transformedDim, dim =>
            {
                errorEachDim[dim] = getTransformedDataPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim);
            });
            int bestDim = Enumerable.Range(0, transformedDim)
                          .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b);

            resultProps.splitId = partitionIdEachDim[bestDim];
            //save id's order in transformed data at best dimention
            resultProps.sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention
            List <int> idsClone = new List <int>(resultProps.sortedIds);      // id's in original position

            resultProps.sortedIds.Sort((c1, c2) =>
                                       transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim]));
            //save partition value
            int originalSplitLocation = idsClone.IndexOf(resultProps.splitId);

            if (originalSplitLocation == -1)
            {
                resultProps.isPartitionOk = false;
                return(resultProps);
            }
            resultProps.isPartitionOk = (errorEachDim[bestDim] < error);
            resultProps.splitValue    = transformedData[originalSplitLocation][bestDim];
            resultProps.error         = errorEachDim[bestDim];
            resultProps.type          = splitType;
            resultProps.dimIndex      = bestDim;
            //shift dimention if it was not categorical split 2m0rr0w2

            /*    foreach (int categoricalInd in _rc.indOfCategorical)
             *  {
             *      resultProps.dimIndex = (resultProps.dimIndex == categoricalInd)
             *          ? resultProps.dimIndex++
             *          : resultProps.dimIndex;
             *  }*/
            return(resultProps);
        }
예제 #7
0
        // Transformed data decomposition
        private void recursiveBSP_TransformedData(IList <GeoWave> geoWaveArr, int geoWaveId, List <SplitType> splitTypes)
        {
            GeoWave parentNode = geoWaveArr[geoWaveId];
            double  error      = parentNode.calc_MeanValueReturnError(_trainingLabel, parentNode.pointsIdArray);

            if (error < _rc.approxThresh ||
                parentNode.pointsIdArray.Count() <= _rc.minWaveSize ||
                _rc.boundDepthTree <= parentNode.level)
            {
                return;
            }


            List <SplitProps> resultSplitsProperties = (from splitType in splitTypes
                                                        select getTransformedPartitionAllDim(parentNode, error, splitType)).ToList();

            resultSplitsProperties = resultSplitsProperties.Where(x => x.isPartitionOk).ToList();
            //not exist split that may help
            if (resultSplitsProperties.Count == 0)
            {
                return;
            }
            SplitProps bestSplit = resultSplitsProperties.Aggregate((a, b) => (a.error < b.error) ? a : b);


            if (!bestSplit.isPartitionOk)
            {
                return;
            }
            parentNode.typeTransformed = bestSplit.type;
            GeoWave child0 = new GeoWave(_rc.dim, _rc.labelDim, _rc);
            GeoWave child1 = new GeoWave(_rc.dim, _rc.labelDim, _rc);

            child0.dimIndex = bestSplit.dimIndex;
            child1.dimIndex = bestSplit.dimIndex;
            List <int> sortedIds = bestSplit.sortedIds;
            int        splitId   = bestSplit.splitId;

            //set childs id's
            child0.pointsIdArray = sortedIds.GetRange(0, sortedIds.IndexOf(splitId));
            child1.pointsIdArray = sortedIds.GetRange(sortedIds.IndexOf(splitId), sortedIds.Count - child0.pointsIdArray.Count);
            // set upper split value only
            child0.upperSplitValue = bestSplit.splitValue;
            //set mean values
            setTransformedChildMeanValue(ref child0);
            setTransformedChildMeanValue(ref child1);
            //set parent id
            child0.parentID = geoWaveId;
            child1.parentID = geoWaveId;
            //set level
            child0.level = parentNode.level + 1;
            child1.level = parentNode.level + 1;
            //debug writelines
            Debug.WriteLine("************Parent Size:" + parentNode.pointsIdArray.Count);
            Debug.WriteLine("************Level:" + (parentNode.level + 1));
            Debug.WriteLine("************Type Splitted:" + bestSplit.type);
            Debug.WriteLine("***********************************************************");

            //!!! START DEBUG VISULIZE SPIRAL SPLIT

            /*         double[][] child0Data = child0.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
             *       double[][] child1Data = child1.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
             *       double[][] child0responce = child0.pointsIdArray.Select(id => _trainingLabel[id]).ToArray();
             *       double[][] child1responce = child1.pointsIdArray.Select(id => _trainingLabel[id]).ToArray();
             *       int level = child0.level;
             *       PrintEngine.debugVisualizeSpiralSplit(child0Data, child1Data,
             *           child0responce, child1responce,
             *           level, parentNode.typeTransformed, debugAnalysisFolderName);*/

            //!!! END DEBUG VISUALIZE SPIRAL SPLIT'

            //compute norms
            child0.computeNormOfConsts(parentNode);
            child1.computeNormOfConsts(parentNode);

            child0.meanDiffFromParent = child0.MeanValue[0] - parentNode.MeanValue[0];
            child1.meanDiffFromParent = child1.MeanValue[0] - parentNode.MeanValue[0];

            geoWaveArr.Add(child0);
            geoWaveArr.Add(child1);
            parentNode.child0 = geoWaveArr.IndexOf(child0);
            parentNode.child1 = geoWaveArr.IndexOf(child1);


            //RECURSION STEP !!!
            recursiveBSP_TransformedData(geoWaveArr, parentNode.child0, splitTypes);
            recursiveBSP_TransformedData(geoWaveArr, parentNode.child1, splitTypes);
        }
예제 #8
0
        private double[] getBestPartitionAtSingleDim(int dimIndex, GeoWave geoWave)
        {
            double[] error_n_point = new double[2];  //error index
            if (Form1.MainGrid[dimIndex].Count == 1) //empty feature
            {
                error_n_point[0] = double.MaxValue;
                error_n_point[1] = -1;
                return(error_n_point);
            }
            //sort ids (for labels) acording to position at Form1.MainGrid[dimIndex][index]
            List <int> tmpIDs = new List <int>(geoWave.pointsIdArray);

            tmpIDs.Sort((c1, c2) => _trainingDt[c1][dimIndex].CompareTo(_trainingDt[c2][dimIndex]));

            if (_trainingDt[tmpIDs[0]][dimIndex] == _trainingDt[tmpIDs[tmpIDs.Count - 1]][dimIndex])//all values are the same
            {
                error_n_point[0] = double.MaxValue;
                error_n_point[1] = -1;
                return(error_n_point);
            }

            int    best_ID    = -1;
            double lowest_err = double.MaxValue;

            double[] leftAvg  = new double[geoWave.MeanValue.Count()];
            double[] rightAvg = new double[geoWave.MeanValue.Count()];
            double[] leftErr  = geoWave.calc_MeanValueReturnError(_trainingLabel, geoWave.pointsIdArray, ref leftAvg);//CONTAINES ALL POINTS - AT THE BEGINING
            double[] rightErr = new double[geoWave.MeanValue.Count()];


            double N_points = Convert.ToDouble(tmpIDs.Count);
            double tmp_err;


            for (int i = 0; i < tmpIDs.Count - 1; i++)//we dont calc the last (rightmost) boundary - it equal to the left most
            {
                tmp_err = 0;
                for (int j = 0; j < geoWave.MeanValue.Count(); j++)
                {
                    leftErr[j]  = leftErr[j] - (N_points - i) * (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - leftAvg[j]) * (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - leftAvg[j]) / (N_points - i - 1);
                    leftAvg[j]  = (N_points - i) * leftAvg[j] / (N_points - i - 1) - _trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] / (N_points - i - 1);
                    rightErr[j] = rightErr[j] + (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - rightAvg[j]) * (_trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] - rightAvg[j]) * Convert.ToDouble(i) / Convert.ToDouble(i + 1);
                    rightAvg[j] = rightAvg[j] * Convert.ToDouble(i) / Convert.ToDouble(i + 1) + _trainingLabel[tmpIDs[tmpIDs.Count - i - 1]][j] / Convert.ToDouble(i + 1);
                    tmp_err    += leftErr[j] + rightErr[j];
                }
                //in case some points has the same values - we calc the avarage (relevant for splitting) only after all the points (with same values) had moved to the right
                //we don't alow "improving" the same split with two points with the same position (sort is not unique)
                if (lowest_err > tmp_err && _trainingDt[tmpIDs[tmpIDs.Count - i - 1]][dimIndex] != _trainingDt[tmpIDs[tmpIDs.Count - i - 2]][dimIndex] &&
                    (i + 1) >= _rc.minWaveSize && (i + _rc.minWaveSize) < tmpIDs.Count && !Form1.trainNaTable.ContainsKey(new Tuple <int, int>(tmpIDs[tmpIDs.Count - i - 1], dimIndex)))
                {
                    best_ID    = tmpIDs[tmpIDs.Count - i - 1];
                    lowest_err = tmp_err;
                }
            }


            //errorPointsArr[tmpIDs.Count - 1] = errorPointsArr[0];//we dont calc the last (rightmost) boundary - it equal to the left most

            ////search lowest error
            //int minIndex = Enumerable.Range(0, errorPointsArr.Length).Aggregate((a, b) => (errorPointsArr[a] < errorPointsArr[b]) ? a : b);

            if (best_ID == -1)
            {
                error_n_point[0] = double.MaxValue;
                error_n_point[1] = double.MaxValue;
                return(error_n_point);
            }

            error_n_point[0] = Math.Max(lowest_err, 0);
            error_n_point[1] = _trainingGridIndexDt[best_ID][dimIndex];
            //if (best_ID == tmpIDs[0] || best_ID == tmpIDs[tmpIDs.Count() - 1])//
            //{
            //    long stop = 0;
            //    stop++;
            //}
            //=getMaingridIndex(geoWave.boubdingBox[0][dimIndex], Form1.MainGrid[dimIndex], training_dt[best_ID][dimIndex]);//MaingridIndex
            return(error_n_point);
        }
예제 #9
0
        private void recursiveBSP_WaveletsByConsts(List <GeoWave> geoWaveArr, int geoWaveId, int seed = 0)
        {
            //CALC APPROX_SOLUTION FOR GEO WAVE
            double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray);

            if (error < _rc.approxThresh ||
                geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize ||
                _rc.boundDepthTree <= geoWaveArr[geoWaveId].level)
            {
                return;
            }

            int dimIndex      = -1;
            int Maingridindex = -1;

            bool IsPartitionOK = false;

            switch (_rc.split_type)
            {
            case 0:

                IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
                break;

            case 1:
                IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed);
                break;

            case 2:
            {
                Random ran1         = new Random(seed);
                Random ran2         = new Random(geoWaveId);
                int    one          = ran1.Next(0, int.MaxValue / 10);
                int    two          = ran2.Next(0, int.MaxValue / 10);
                bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
                IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
            }
            break;

            case 3:
                IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
                break;

            case 4:
            {
                Random ran1         = new Random(seed);
                Random ran2         = new Random(geoWaveId);
                int    one          = ran1.Next(0, int.MaxValue / 10);
                int    two          = ran2.Next(0, int.MaxValue / 10);
                bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
                IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
            }
            break;

            case 5:     //local pca at each parent node, recursion inside a case
            {
                recursiveBSP_LocalPCA(geoWaveArr, 0);
                return;
            }
                //break;
            }



            if (!IsPartitionOK)
            {
                return;
            }


            GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
            GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);

            //set partition
            child0.boubdingBox[1][dimIndex] = Maingridindex;
            child1.boubdingBox[0][dimIndex] = Maingridindex;

            //DOCUMENT ON CHILDREN
            child0.dimIndex      = dimIndex;
            child0.Maingridindex = Maingridindex;
            child1.dimIndex      = dimIndex;
            child1.Maingridindex = Maingridindex;

            child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
            child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];

            //calc norm
            //calc mean value

            if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim))
            {
                return;
            }

            //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION)

            setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray);
            //SET TWO CHILDS
            child0.parentID = child1.parentID = geoWaveId;
            child0.child0   = child1.child0 = -1;
            child0.child1   = child1.child1 = -1;
            child0.level    = child1.level = geoWaveArr[geoWaveId].level + 1;

            child0.computeNormOfConsts(geoWaveArr[geoWaveId]);
            child1.computeNormOfConsts(geoWaveArr[geoWaveId]);
            geoWaveArr.Add(child0);
            geoWaveArr.Add(child1);
            geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2;
            geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1;



            //RECURSION STEP !!!
            recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed);
            recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed);
        }