Esempio n. 1
0
        public static void calcTreesVariance(List <int>[] table, double[][] label, int labelindex, string filename)
        {
            List <double> average           = new List <double>();
            List <double> averaged_variance = new List <double>();

            //calc avg
            for (int i = 0; i < table.Count(); i++)
            {
                double tmpAvg = 0;
                for (int j = 0; j < table[i].Count(); j++)
                {
                    tmpAvg += label[table[i][j]][labelindex];
                }
                average.Add(tmpAvg / table[i].Count());
            }

            //calc var
            for (int i = 0; i < table.Count(); i++)
            {
                double tmpVar = 0;
                for (int j = 0; j < table[i].Count(); j++)
                {
                    tmpVar += (label[table[i][j]][labelindex] - average[i]) * (label[table[i][j]][labelindex] - average[i]);
                }
                averaged_variance.Add(tmpVar / table[i].Count());
            }

            PrintEngine.printList(averaged_variance, filename);
        }
Esempio n. 2
0
 public DimReduction(double[][] trainingMatrix)
 {
     //Create the Principal Component Analysis
     _pca = new ModifedPca(trainingMatrix);
     _pca.Compute();
     PrintEngine.printList(_pca.Eigenvalues.ToList(), Form1.MainFolderName + "eigvalues.txt");
 }
 public static void printForestProperties(List <GeoWave>[] rfTreeArr, string analysisFolderName)
 {
     if (Form1.u_config.saveTressCB != "1")
     {
         return;
     }
     if (!Directory.Exists(analysisFolderName + "\\archive"))
     {
         Directory.CreateDirectory(analysisFolderName + "\\archive");
     }
     for (int i = 0; i < rfTreeArr.Count(); i++)
     {
         PrintEngine.printWaveletsProperties(rfTreeArr[i], analysisFolderName + "\\archive\\waveletsPropertiesTree_" + i.ToString() + ".txt");
     }
 }
Esempio n. 4
0
        private void btnScript_Click(object sender, EventArgs e)
        {
            set2Config();
            Refresh();
            u_config.printConfig(@"C:\Wavelets decomposition\config.txt");
            // AmazonS3Client client = Helpers.configAmazonS3ClientS3Client();

            UseS3                   = UseS3CB.Checked;
            rumPrallel              = rumPrallelCB.Checked;
            runBoosting             = runBoostingCB.Checked;
            runProoning             = runProoningCB.Checked;
            runBoostingProoning     = runBoostingProoningCB.Checked;
            runRFProoning           = runRFProoningCB.Checked;
            runRf                   = runRfCB.Checked;
            runBoostingLearningRate = runBoostingLearningRateCB.Checked;

            bucketName = bucketTB.Text;
            string results_path = @ResultsTB.Text;
            string db_path      = @DBTB.Text + "\\";//@"C:\Users\Administrator\Dropbox\ADA\ada_valid\"; //"D:\\Phd\\Shai\\code\\tests\\helix tests\\noise_5\\noise_5\\"; // "C:\\reasearch\\tests\\lena\\";


            //get dir
            MainFolderName = results_path;
            //Helpers.createMainDirectoryOrResultPath(results_path, bucketName, client);
            Helpers.createMainDirectoryOrResultPath(results_path, bucketName);
            //READ DATA
            DB db = new DB();

            db.training_dt   = db.getDataTable(db_path + "trainingData.txt");
            db.testing_dt    = db.getDataTable(db_path + "testingData.txt");
            db.validation_dt = db.getDataTable(db_path + "ValidData.txt");

            db.training_label   = db.getDataTable(db_path + "trainingLabel.txt");
            db.testing_label    = db.getDataTable(db_path + "testingLabel.txt");
            db.validation_label = db.getDataTable(db_path + "ValidLabel.txt");

            upper_label = db.training_label.Max();
            lower_label = db.training_label.Min();


            double trainingPercent = double.Parse(trainingPercentTB.Text);      // 0.02;

            long rowToRemoveFrom = Convert.ToInt64(db.training_dt.Count() * trainingPercent);

            db.training_dt      = db.training_dt.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.training_label   = db.training_label.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.testing_dt       = db.testing_dt.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.testing_label    = db.testing_label.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.validation_dt    = db.training_dt.Where((el, i) => i < rowToRemoveFrom).ToArray();
            db.validation_label = db.validation_label.Where((el, i) => i < rowToRemoveFrom).ToArray();


            //REDUCE DIM, GLOBAL PCA
            if (usePCA.Checked)
            {
                DimReduction dimreduction = new DimReduction(db.training_dt);
                db.PCAtraining_dt   = dimreduction.getGlobalPca(db.training_dt);
                db.PCAtesting_dt    = dimreduction.getGlobalPca(db.testing_dt);
                db.PCAvalidation_dt = dimreduction.getGlobalPca(db.validation_dt);
            }
            else
            {
                //de-activate pca for dbg
                db.PCAtraining_dt   = db.training_dt;
                db.PCAtesting_dt    = db.testing_dt;
                db.PCAvalidation_dt = db.validation_dt;
            }

            db.PCAtraining_GridIndex_dt = new long[db.PCAtraining_dt.Count()][];
            for (int i = 0; i < db.PCAtraining_dt.Count(); i++)
            {
                db.PCAtraining_GridIndex_dt[i] = new long[db.PCAtraining_dt[i].Count()];
            }

            //BOUNDING BOX AND MAIN GRID
            boundingBox = db.getboundingBox(db.PCAtraining_dt);
            MainGrid    = db.getMainGrid(db.PCAtraining_dt, boundingBox, ref db.PCAtraining_GridIndex_dt);


            //READ CONFIG
            methodConfig mc     = new methodConfig(true);
            int          Nloops = int.Parse(NloopsTB.Text) - 1;
            int          Kfolds = 0;

            if (int.TryParse(croosValidTB.Text, out Kfolds))
            {
                Nloops = Kfolds - 1;
            }

            for (int k = 0; k < Nloops; k++)
            {
                mc.boostlamda_0.Add(3.8);    // - create variant in number of pixels
            }
            mc.generateRecordConfigArr();



            for (int k = 0; k < mc.recArr.Count(); k++)
            {
                //manual set indeces of categorical variable
                //mc.recArr[k].indOfCategorical.Add(3);

                mc.recArr[k].learningType = (recordConfig.LearnigType)comboLearningType.SelectedIndex;      // regression, binary class, multy class
                if (mc.recArr[k].learningType == recordConfig.LearnigType.BinaryClassification)
                {
                    mc.recArr[k].binaryMinClass = lower_label;
                    mc.recArr[k].binaryMaxClass = upper_label;
                    mc.recArr[k].midClassValue  = 0.5 * (lower_label + upper_label);
                }
                mc.recArr[k].dim               = NfeaturesTB.Text == @"all" ? db.PCAtraining_dt[0].Count() : int.Parse(evaluateString(NfeaturesTB.Text, k));
                mc.recArr[k].approxThresh      = double.Parse(evaluateString(approxThreshTB.Text, k));               // 0.1;
                mc.recArr[k].partitionErrType  = int.Parse(evaluateString(partitionTypeTB.Text, k));                 //2;
                mc.recArr[k].minWaveSize       = int.Parse(evaluateString(minNodeSizeTB.Text, k));                   //1;//CHANGE AFTER DBG
                mc.recArr[k].hopping_size      = int.Parse(evaluateString(waveletsSkipEstimationTB.Text, k));        //25;// 10 + 5 * (k + 1);// +5 * (k % 10);// 1;//25;
                mc.recArr[k].test_error_size   = double.Parse(evaluateString(waveletsPercentEstimationTB.Text, k));  // +0.05 * (k % 10);// 1;// 0.1;//percent of waves to check
                mc.recArr[k].NskipsinKfunc     = double.Parse(evaluateString(boostingKfuncPercentTB.Text, k));       // 0.0025;
                mc.recArr[k].rfBaggingPercent  = double.Parse(evaluateString(bagginPercentTB.Text, k));              // 0.6;
                mc.recArr[k].rfNum             = int.Parse(evaluateString(NrfTB.Text, k));                           // k + 1;//10 + k*10;// 100 / (k + 46) * 2;// int.Parse(Math.Pow(10, k + 1).ToString());
                mc.recArr[k].boostNum          = int.Parse(evaluateString(NboostTB.Text, k));                        // 10;
                mc.recArr[k].boostProoning_0   = int.Parse(evaluateString(NfirstPruninginBoostingTB.Text, k));       //13
                mc.recArr[k].boostlamda_0      = double.Parse(evaluateString(boostingLamda0TB.Text, k));             // 0.01 - (k + 1) * 0.001; //0.05;// 0.0801 + k * 0.001;// Math.Pow(0.1, k);// 0.22 + k*0.005;
                mc.recArr[k].NwaveletsBoosting = int.Parse(evaluateString(NfirstwaveletsBoostingTB.Text, k));        //  4;// k + 1;
                //mc.recArr[k].learningRate = 0;// 0.01;
                mc.recArr[k].boostNumLearningRate  = int.Parse(evaluateString(NboostingLearningRateTB.Text, k));     // 55;// 18;
                mc.recArr[k].percent_training_db   = trainingPercent;
                mc.recArr[k].BoundLevel            = int.Parse(evaluateString(boundLevelTB.Text, k));                //1024;
                mc.recArr[k].NDimsinRF             = NfeaturesrfTB.Text == @"all" ? db.PCAtraining_dt[0].Count() : int.Parse(evaluateString(NfeaturesrfTB.Text, k));
                mc.recArr[k].split_type            = int.Parse(evaluateString(splitTypeTB.Text, k));                 //0
                mc.recArr[k].NormLPType            = int.Parse(evaluateString(errTypeEstimationTB.Text, k));
                mc.recArr[k].RFpruningTestRange[1] = int.Parse(evaluateString(RFpruningEstimationRange1TB.Text, k)); // 12;// k + 9;
                mc.recArr[k].boundDepthTree        = int.Parse(evaluateString(boundDepthTB.Text, k));                //1024;
                mc.recArr[k].CrossValidFold        = k;
                // 2m0rr0w2 save labels dim in confif
                mc.recArr[k].labelDim = db.training_label[0].Count();
                //mc.recArr[k].boostNum =  t ;// tmp to delete !!!!!!!

                //mc.recArr[k].RFwaveletsTestRange[0] = 25;
                //mc.recArr[k].RFwaveletsTestRange[1] = 50;
            }
            // Helpers.createOutputDirectories(mc.recArr, client, u_config, bucketName, results_path);
            Helpers.createOutputDirectories(mc.recArr, u_config, bucketName, results_path);
            //SET ID ARRAY LIST
            List <int> trainingID = Enumerable.Range(0, db.PCAtraining_dt.Count()).ToList();
            List <int> testingID  = Enumerable.Range(0, db.PCAtesting_dt.Count()).ToList();

            //cross validation
            List <List <int> > trainingFoldId = new List <List <int> >();
            List <List <int> > testingFoldId  = new List <List <int> >();

            Random     ran           = new Random(2);
            List <int> training_rand = trainingID.OrderBy(x => ran.Next()).ToList().GetRange(0, trainingID.Count);

            //THE LARGEST GROUP IS TRAINING
            if (int.TryParse(croosValidTB.Text, out Kfolds))
            {
                createCrossValid(Kfolds, training_rand, trainingFoldId, testingFoldId);
            }

            //bounding intervals
            int[][] BB = new int[2][];
            BB[0] = new int[boundingBox[0].Count()];
            BB[1] = new int[boundingBox[0].Count()];
            for (int i = 0; i < boundingBox[0].Count(); i++)
            {
                BB[1][i] = MainGrid[i].Count() - 1;    //set last index in each dim
            }

            //loop over folds DEBUG i=2
            for (int i = 3; i < mc.recArr.Count; i++)
            {
                recordConfig rc       = mc.recArr[i];
                Analizer     analizer = new Analizer(MainFolderName + "\\" + rc.getShortName(), MainGrid, db, rc);
                //exclude variables one by one
                double[] numFeachuresVSerror  = new double[rc.dim];
                int      indFeachureToExclude = -1;
                double   predResult           = 0;
                //loop over feachures
                for (int j = rc.dim - 1; j >= 0; j--)
                {
                    //DEBUG, DISABLE FEACHURE EXCLUDE
                    // j = 0;

                    if (!croosValidCB.Checked)
                    {
                        analizer.analize(trainingID, testingID, BB, ref indFeachureToExclude, ref predResult);
                    }
                    else
                    {
                        analizer.analize(trainingFoldId[i], testingFoldId[i], BB, ref indFeachureToExclude, ref predResult); //cross validation
                    }
                    numFeachuresVSerror[j] = predResult;                                                                     // index j, used j+1 featchures
                    analizer.excludeFeatureFromDb(indFeachureToExclude);
                }
                PrintEngine.printBestErrorByNumberOfFeatchures(analizer._analysisFolderName, numFeachuresVSerror);
            }

            btnScript.BackColor = Color.Green;
        }