Ejemplo n.º 1
0
        // 2m0rr0w2 , local pca partion, check all dims [START]
        private bool getPcaPartitionAllDim(ref int dimIndex, ref int partitionId, ref double partitionValue
                                           , GeoWave parentNode, double error, ref List <int> sortedIds)
        {
            //construct PCA to parent node and save original (before transformation) data matrix
            double[][] originalNodeData = DimReduction.constructNodePca(_trainingDt, parentNode);
            double[][] transformedData  = parentNode.localPca.Transform(originalNodeData);
            parentNode.pcaDim = parentNode.localPca.Components.Count;
            //break if dimention equivalent to num of points
            if (parentNode.pcaDim == transformedData.Count())
            {
                return(false);
            }
            //save dim of transformed data
            int pcaDim = parentNode.localPca.Components.Count;

            double[] errorEachDim       = new double[pcaDim];
            int[]    partitionIdEachDim = new int[pcaDim];
            // _rc.dim replaced by index of last component
            Helpers.applyFor(0, pcaDim, dim =>
            {
                errorEachDim[dim] = getPcaPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim);
            });
            dimIndex = Enumerable.Range(0, pcaDim)
                       .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b);
            partitionId = partitionIdEachDim[dimIndex];
            //save id's order in transformed data at best dimention
            int bestDim = dimIndex;

            sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention
            List <int> idsClone = new List <int>(sortedIds);      // id's in original position

            sortedIds.Sort((c1, c2) =>
                           transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim]));
            //save partition value
            //start test
            int originalSplitLocation = idsClone.IndexOf(partitionId);

            if (originalSplitLocation == -1)
            {
                return(false);
            }
            //end test
            partitionValue = transformedData[originalSplitLocation][bestDim];
            return(errorEachDim[dimIndex] < error);
        }
Ejemplo n.º 2
0
        private void btnScript_Click(object sender, EventArgs e, StreamWriter statusWriter)
        {
            set2Config();
            Refresh();
            u_config.printConfig(@"C:\Wavelets decomposition\config.txt", null);
            AmazonS3Client client = Helpers.configAmazonS3ClientS3Client();

            UseS3                   = UseS3CB.Checked;
            rumPrallel              = rumPrallelCB.Checked;
            runBoosting             = runBoostingCB.Checked;
            runProoning             = runProoningCB.Checked;
            runBoostingProoning     = runBoostingProoningCB.Checked;
            runRFProoning           = runRFProoningCB.Checked;
            runRf                   = runRfCB.Checked;
            runBoostingLearningRate = runBoostingLearningRateCB.Checked;

            bucketName = bucketTB.Text;
            string results_path = @ResultsTB.Text;
            string db_path      = @DBTB.Text + "\\";//@"C:\Users\Administrator\Dropbox\ADA\ada_valid\"; //"D:\\Phd\\Shai\\code\\tests\\helix tests\\noise_5\\noise_5\\"; // "C:\\reasearch\\tests\\lena\\";


            //get dir
            MainFolderName = results_path;
            Helpers.createMainDirectoryOrResultPath(results_path, bucketName, client);
            //READ DATA
            DB db = new DB();

            db.training_dt   = db.getDataTable(db_path + "trainingData.txt");
            db.testing_dt    = db.getDataTable(db_path + "testingData.txt");
            db.validation_dt = db.getDataTable(db_path + "ValidData.txt");

            db.training_label   = db.getDataTable(db_path + "trainingLabel.txt");
            db.testing_label    = db.getDataTable(db_path + "testingLabel.txt");
            db.validation_label = db.getDataTable(db_path + "ValidLabel.txt");

            upper_label = db.training_label.Max();
            lower_label = db.training_label.Min();

            double trainingPercent = double.Parse(trainingPercentTB.Text);      // 0.02;

            long rowToRemoveFrom = Convert.ToInt64(db.training_dt.Count() * trainingPercent);

            db.training_dt      = db.training_dt.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.training_label   = db.training_label.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.testing_dt       = db.testing_dt.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.testing_label    = db.testing_label.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.validation_dt    = db.training_dt.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.validation_label = db.validation_label.Where((el, i) => i < rowToRemoveFrom).ToArray();


            //REDUCE DIM, GLOBAL PCA
            if (usePCA.Checked)
            {
                DimReduction dimreduction = new DimReduction(db.training_dt);
                db.PCAtraining_dt   = dimreduction.getGlobalPca(db.training_dt);
                db.PCAtesting_dt    = dimreduction.getGlobalPca(db.testing_dt);
                db.PCAvalidation_dt = dimreduction.getGlobalPca(db.validation_dt);
            }
            else
            {
                //de-activate pca for dbg
                db.PCAtraining_dt   = db.training_dt;
                db.PCAtesting_dt    = db.testing_dt;
                db.PCAvalidation_dt = db.validation_dt;
            }

            db.PCAtraining_GridIndex_dt = new long[db.PCAtraining_dt.Count()][];
            for (int i = 0; i < db.PCAtraining_dt.Count(); i++)
            {
                db.PCAtraining_GridIndex_dt[i] = new long[db.PCAtraining_dt[i].Count()];
            }

            //BOUNDING BOX AND MAIN GRID
            boundingBox = db.getboundingBox(db.PCAtraining_dt);
            MainGrid    = db.getMainGrid(db.PCAtraining_dt, boundingBox, ref db.PCAtraining_GridIndex_dt);


            //READ CONFIG
            methodConfig mc     = new methodConfig(true);
            int          Nloops = int.Parse(NloopsTB.Text) - 1;
            int          Kfolds = 0;

            if (int.TryParse(croosValidTB.Text, out Kfolds))
            {
                Nloops = Kfolds - 1;
            }

            for (int k = 0; k < Nloops; k++)
            {
                mc.boostlamda_0.Add(3.8);    // - create variant in number of pixels
            }
            //mc.boostlamda_0.Add(1500);// - create variant in number of pixels
            //mc.boostlamda_0.Add(2500);// - create variant in number of pixels
            //mc.boostlamda_0.Add(3000);// - create variant in number of pixels

            mc.generateRecordConfigArr();
            for (int k = 0; k < mc.recArr.Count(); k++)
            {
                mc.recArr[k].dim               = NfeaturesTB.Text == @"all" ? db.PCAtraining_dt[0].Count() : int.Parse(evaluateString(NfeaturesTB.Text, k));
                mc.recArr[k].approxThresh      = double.Parse(evaluateString(approxThreshTB.Text, k));               // 0.1;
                mc.recArr[k].partitionErrType  = int.Parse(evaluateString(partitionTypeTB.Text, k));                 //2;
                mc.recArr[k].minWaveSize       = int.Parse(evaluateString(minNodeSizeTB.Text, k));                   //1;//CHANGE AFTER DBG
                mc.recArr[k].hopping_size      = int.Parse(evaluateString(waveletsSkipEstimationTB.Text, k));        //25;// 10 + 5 * (k + 1);// +5 * (k % 10);// 1;//25;
                mc.recArr[k].test_error_size   = double.Parse(evaluateString(waveletsPercentEstimationTB.Text, k));  // +0.05 * (k % 10);// 1;// 0.1;//percent of waves to check
                mc.recArr[k].NskipsinKfunc     = double.Parse(evaluateString(boostingKfuncPercentTB.Text, k));       // 0.0025;
                mc.recArr[k].rfBaggingPercent  = double.Parse(evaluateString(bagginPercentTB.Text, k));              // 0.6;
                mc.recArr[k].rfNum             = int.Parse(evaluateString(NrfTB.Text, k));                           // k + 1;//10 + k*10;// 100 / (k + 46) * 2;// int.Parse(Math.Pow(10, k + 1).ToString());
                mc.recArr[k].boostNum          = int.Parse(evaluateString(NboostTB.Text, k));                        // 10;
                mc.recArr[k].boostProoning_0   = int.Parse(evaluateString(NfirstPruninginBoostingTB.Text, k));       //13
                mc.recArr[k].boostlamda_0      = double.Parse(evaluateString(boostingLamda0TB.Text, k));             // 0.01 - (k + 1) * 0.001; //0.05;// 0.0801 + k * 0.001;// Math.Pow(0.1, k);// 0.22 + k*0.005;
                mc.recArr[k].NwaveletsBoosting = int.Parse(evaluateString(NfirstwaveletsBoostingTB.Text, k));        //  4;// k + 1;
                //mc.recArr[k].learningRate = 0;// 0.01;
                mc.recArr[k].boostNumLearningRate  = int.Parse(evaluateString(NboostingLearningRateTB.Text, k));     // 55;// 18;
                mc.recArr[k].percent_training_db   = trainingPercent;
                mc.recArr[k].BoundLevel            = int.Parse(evaluateString(boundLevelTB.Text, k));                //1024;
                mc.recArr[k].NDimsinRF             = NfeaturesrfTB.Text == @"all" ? db.PCAtraining_dt[0].Count() : int.Parse(evaluateString(NfeaturesrfTB.Text, k));
                mc.recArr[k].split_type            = int.Parse(evaluateString(splitTypeTB.Text, k));                 //0
                mc.recArr[k].NormLPType            = int.Parse(evaluateString(errTypeEstimationTB.Text, k));
                mc.recArr[k].RFpruningTestRange[1] = int.Parse(evaluateString(RFpruningEstimationRange1TB.Text, k)); // 12;// k + 9;
                mc.recArr[k].boundDepthTree        = int.Parse(evaluateString(boundDepthTB.Text, k));                //1024;
                mc.recArr[k].CrossValidFold        = k;
                // 2m0rr0w2 save labels dim in confif
                mc.recArr[k].labelDim = db.training_label[0].Count();
                //mc.recArr[k].boostNum =  t ;// tmp to delete !!!!!!!

                //mc.recArr[k].RFwaveletsTestRange[0] = 25;
                //mc.recArr[k].RFwaveletsTestRange[1] = 50;
            }
            Helpers.createOutputDirectories(mc.recArr, client, u_config, bucketName, results_path);
            //SET ID ARRAY LIST
            List <int> trainingID = Enumerable.Range(0, db.PCAtraining_dt.Count()).ToList();
            List <int> testingID  = Enumerable.Range(0, db.PCAtesting_dt.Count()).ToList();

            //cross validation
            List <List <int> > trainingFoldId = new List <List <int> >();
            List <List <int> > testingFoldId  = new List <List <int> >();

            Random     ran           = new Random(2);
            List <int> training_rand = trainingID.OrderBy(x => ran.Next()).ToList().GetRange(0, trainingID.Count);

            //THE LARGEST GROUP IS TRAINING
            if (int.TryParse(croosValidTB.Text, out Kfolds))
            {
                createCrossValid(Kfolds, training_rand, trainingFoldId, testingFoldId);
            }

            //bounding intervals
            int[][] BB = new int[2][];
            BB[0] = new int[boundingBox[0].Count()];
            BB[1] = new int[boundingBox[0].Count()];
            for (int i = 0; i < boundingBox[0].Count(); i++)
            {
                BB[1][i] = MainGrid[i].Count() - 1;    //set last index in each dim
            }


            for (int i = 0; i < mc.recArr.Count; i++)
            {
                Analizer Analizer = new Analizer(MainFolderName + "\\" + mc.recArr[i].getShortName(), MainGrid, db, mc.recArr[i]);
                if (!croosValidCB.Checked)
                {
                    Analizer.analize(trainingID, testingID, BB);
                }
                else
                {
                    Analizer.analize(trainingFoldId[i], testingFoldId[i], BB);    //cross validation
                }
                statusWriter.WriteLine("fold " + i + " ready!!!!");
            }

            //btnScript.BackColor = Color.Green;
        }
Ejemplo n.º 3
0
        /*  private void recursiveBSP_WaveletsByConsts(List<GeoWave> geoWaveArr, int geoWaveId, int seed=0)
         * {
         *    //CALC APPROX_SOLUTION FOR GEO WAVE
         *    double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray);
         *    if (error < _rc.approxThresh ||
         *        geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize ||
         *        _rc.boundDepthTree <=  geoWaveArr[geoWaveId].level)
         *    return;
         *
         *    int dimIndex = -1;
         *    int Maingridindex = -1;
         *
         *    bool IsPartitionOK = false;
         *    switch (_rc.split_type)
         *    {
         *        case 0:
         *           IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
         *            break;
         *        case 1:
         *            IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed);
         *            break;
         *        case 2:
         *        {
         *            Random ran1 = new Random(seed);
         *            Random ran2 = new Random(geoWaveId);
         *            int one = ran1.Next(0, int.MaxValue / 10);
         *            int two = ran2.Next(0, int.MaxValue / 10);
         *            bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
         *            IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
         *        }
         *            break;
         *        case 3:
         *            IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take);
         *            break;
         *        case 4:
         *        {
         *            Random ran1 = new Random(seed);
         *            Random ran2 = new Random(geoWaveId);
         *            int one = ran1.Next(0, int.MaxValue / 10);
         *            int two = ran2.Next(0, int.MaxValue / 10);
         *            bool[] Dim2TakeNode = getDim2Take(_rc, one + two);
         *            IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode);
         *        }
         *            break;
         *
         *
         *    }
         *
         *
         *
         *
         *    if (!IsPartitionOK)
         *        return;
         *
         *
         *    GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
         *    GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc);
         *
         *    //set partition
         *    child0.boubdingBox[1][dimIndex] = Maingridindex;
         *    child1.boubdingBox[0][dimIndex] = Maingridindex;
         *
         *    //DOCUMENT ON CHILDREN
         *    child0.dimIndex = dimIndex;
         *    child0.Maingridindex = Maingridindex;
         *    child1.dimIndex = dimIndex;
         *    child1.Maingridindex = Maingridindex;
         *
         *    child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
         *    child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex];
         *
         *    //calc norm
         *    //calc mean value
         *
         *    if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim))
         *        return;
         *
         *    //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION)
         *
         *    setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray);
         *    //SET TWO CHILDS
         *    child0.parentID = child1.parentID = geoWaveId;
         *    child0.child0 = child1.child0 = -1;
         *    child0.child1 = child1.child1 = -1;
         *    child0.level = child1.level = geoWaveArr[geoWaveId].level + 1;
         *
         *    child0.computeNormOfConsts(geoWaveArr[geoWaveId]);
         *    child1.computeNormOfConsts(geoWaveArr[geoWaveId]);
         *    geoWaveArr.Add(child0);
         *    geoWaveArr.Add(child1);
         *    geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2;
         *    geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1;
         *
         *
         *
         *
         *    //RECURSION STEP !!!
         *    recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed);
         *    recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed);
         * }
         */
        private SplitProps getTransformedPartitionAllDim(GeoWave parentNode, double error, SplitType splitType)
        {
            double[][] originalNodeData = parentNode.pointsIdArray.Select(id => _trainingDt[id]).ToArray();
            double[][] transformedData;
            //clean columns of categorical variables 2m0rr0w2
            // originalNodeData = Helpers.copyAndRemoveCategoricalColumns(originalNodeData, _rc);
            //result struct
            SplitProps resultProps = new SplitProps();

            switch (splitType)
            {
            case SplitType.LocalPca:
                DimReduction.constructNodePcaByOriginalData(originalNodeData, parentNode);
                transformedData = parentNode.localPca.Transform(originalNodeData);
                break;

            case SplitType.DiffMaps5Percent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.05);
                break;

            case SplitType.DiffMaps1Percent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.01);
                break;

            case SplitType.DiffMapsHalfPercent:
                if (originalNodeData.Count() <= _rc.dim)
                {
                    resultProps.isPartitionOk = false;
                    return(resultProps);
                }
                transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.005);
                break;

            case SplitType.MainAxes:
                transformedData = originalNodeData;
                break;

            case SplitType.Categorical:
                transformedData = originalNodeData;
                break;

            default:
                transformedData = null;
                break;
            }

            if (transformedData == null)
            {
                //throw new Exception("******TRANSFORMATION ERROR!!!");
                resultProps.isPartitionOk = false;
                Debug.WriteLine("*********Failed transformation");
                Debug.WriteLine("*********Failed node size: " + parentNode.pointsIdArray.Count);
                return(resultProps);
            }
            parentNode.transformedDim = transformedData.First().Length;
            //save dim of transformed data
            int transformedDim = parentNode.transformedDim;

            double[] errorEachDim       = new double[transformedDim];
            int[]    partitionIdEachDim = new int[transformedDim];
            // _rc.dim replaced by transformedDim dimention
            Helpers.applyFor(0, transformedDim, dim =>
            {
                errorEachDim[dim] = getTransformedDataPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim);
            });
            int bestDim = Enumerable.Range(0, transformedDim)
                          .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b);

            resultProps.splitId = partitionIdEachDim[bestDim];
            //save id's order in transformed data at best dimention
            resultProps.sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention
            List <int> idsClone = new List <int>(resultProps.sortedIds);      // id's in original position

            resultProps.sortedIds.Sort((c1, c2) =>
                                       transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim]));
            //save partition value
            int originalSplitLocation = idsClone.IndexOf(resultProps.splitId);

            if (originalSplitLocation == -1)
            {
                resultProps.isPartitionOk = false;
                return(resultProps);
            }
            resultProps.isPartitionOk = (errorEachDim[bestDim] < error);
            resultProps.splitValue    = transformedData[originalSplitLocation][bestDim];
            resultProps.error         = errorEachDim[bestDim];
            resultProps.type          = splitType;
            resultProps.dimIndex      = bestDim;
            //shift dimention if it was not categorical split 2m0rr0w2

            /*    foreach (int categoricalInd in _rc.indOfCategorical)
             *  {
             *      resultProps.dimIndex = (resultProps.dimIndex == categoricalInd)
             *          ? resultProps.dimIndex++
             *          : resultProps.dimIndex;
             *  }*/
            return(resultProps);
        }