// 2m0rr0w2 , local pca partion, check all dims [START] private bool getPcaPartitionAllDim(ref int dimIndex, ref int partitionId, ref double partitionValue , GeoWave parentNode, double error, ref List <int> sortedIds) { //construct PCA to parent node and save original (before transformation) data matrix double[][] originalNodeData = DimReduction.constructNodePca(_trainingDt, parentNode); double[][] transformedData = parentNode.localPca.Transform(originalNodeData); parentNode.pcaDim = parentNode.localPca.Components.Count; //break if dimention equivalent to num of points if (parentNode.pcaDim == transformedData.Count()) { return(false); } //save dim of transformed data int pcaDim = parentNode.localPca.Components.Count; double[] errorEachDim = new double[pcaDim]; int[] partitionIdEachDim = new int[pcaDim]; // _rc.dim replaced by index of last component Helpers.applyFor(0, pcaDim, dim => { errorEachDim[dim] = getPcaPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim); }); dimIndex = Enumerable.Range(0, pcaDim) .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b); partitionId = partitionIdEachDim[dimIndex]; //save id's order in transformed data at best dimention int bestDim = dimIndex; sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention List <int> idsClone = new List <int>(sortedIds); // id's in original position sortedIds.Sort((c1, c2) => transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim])); //save partition value //start test int originalSplitLocation = idsClone.IndexOf(partitionId); if (originalSplitLocation == -1) { return(false); } //end test partitionValue = transformedData[originalSplitLocation][bestDim]; return(errorEachDim[dimIndex] < error); }
private void btnScript_Click(object sender, EventArgs e, StreamWriter statusWriter) { set2Config(); Refresh(); u_config.printConfig(@"C:\Wavelets decomposition\config.txt", null); AmazonS3Client client = Helpers.configAmazonS3ClientS3Client(); UseS3 = UseS3CB.Checked; rumPrallel = rumPrallelCB.Checked; runBoosting = runBoostingCB.Checked; runProoning = runProoningCB.Checked; runBoostingProoning = runBoostingProoningCB.Checked; runRFProoning = runRFProoningCB.Checked; runRf = runRfCB.Checked; runBoostingLearningRate = runBoostingLearningRateCB.Checked; bucketName = bucketTB.Text; string results_path = @ResultsTB.Text; string db_path = @DBTB.Text + "\\";//@"C:\Users\Administrator\Dropbox\ADA\ada_valid\"; //"D:\\Phd\\Shai\\code\\tests\\helix tests\\noise_5\\noise_5\\"; // "C:\\reasearch\\tests\\lena\\"; //get dir MainFolderName = results_path; Helpers.createMainDirectoryOrResultPath(results_path, bucketName, client); //READ DATA DB db = new DB(); db.training_dt = db.getDataTable(db_path + "trainingData.txt"); db.testing_dt = db.getDataTable(db_path + "testingData.txt"); db.validation_dt = db.getDataTable(db_path + "ValidData.txt"); db.training_label = db.getDataTable(db_path + "trainingLabel.txt"); db.testing_label = db.getDataTable(db_path + "testingLabel.txt"); db.validation_label = db.getDataTable(db_path + "ValidLabel.txt"); upper_label = db.training_label.Max(); lower_label = db.training_label.Min(); double trainingPercent = double.Parse(trainingPercentTB.Text); // 0.02; long rowToRemoveFrom = Convert.ToInt64(db.training_dt.Count() * trainingPercent); db.training_dt = db.training_dt.Where((el, i) => i < rowToRemoveFrom).ToArray(); db.training_label = db.training_label.Where((el, i) => i < rowToRemoveFrom).ToArray(); db.testing_dt = db.testing_dt.Where((el, i) => i < rowToRemoveFrom).ToArray(); db.testing_label = db.testing_label.Where((el, i) => i < rowToRemoveFrom).ToArray(); db.validation_dt = db.training_dt.Where((el, i) => i < rowToRemoveFrom).ToArray(); db.validation_label = db.validation_label.Where((el, i) => i < rowToRemoveFrom).ToArray(); //REDUCE DIM, GLOBAL PCA if (usePCA.Checked) { DimReduction dimreduction = new DimReduction(db.training_dt); db.PCAtraining_dt = dimreduction.getGlobalPca(db.training_dt); db.PCAtesting_dt = dimreduction.getGlobalPca(db.testing_dt); db.PCAvalidation_dt = dimreduction.getGlobalPca(db.validation_dt); } else { //de-activate pca for dbg db.PCAtraining_dt = db.training_dt; db.PCAtesting_dt = db.testing_dt; db.PCAvalidation_dt = db.validation_dt; } db.PCAtraining_GridIndex_dt = new long[db.PCAtraining_dt.Count()][]; for (int i = 0; i < db.PCAtraining_dt.Count(); i++) { db.PCAtraining_GridIndex_dt[i] = new long[db.PCAtraining_dt[i].Count()]; } //BOUNDING BOX AND MAIN GRID boundingBox = db.getboundingBox(db.PCAtraining_dt); MainGrid = db.getMainGrid(db.PCAtraining_dt, boundingBox, ref db.PCAtraining_GridIndex_dt); //READ CONFIG methodConfig mc = new methodConfig(true); int Nloops = int.Parse(NloopsTB.Text) - 1; int Kfolds = 0; if (int.TryParse(croosValidTB.Text, out Kfolds)) { Nloops = Kfolds - 1; } for (int k = 0; k < Nloops; k++) { mc.boostlamda_0.Add(3.8); // - create variant in number of pixels } //mc.boostlamda_0.Add(1500);// - create variant in number of pixels //mc.boostlamda_0.Add(2500);// - create variant in number of pixels //mc.boostlamda_0.Add(3000);// - create variant in number of pixels mc.generateRecordConfigArr(); for (int k = 0; k < mc.recArr.Count(); k++) { mc.recArr[k].dim = NfeaturesTB.Text == @"all" ? db.PCAtraining_dt[0].Count() : int.Parse(evaluateString(NfeaturesTB.Text, k)); mc.recArr[k].approxThresh = double.Parse(evaluateString(approxThreshTB.Text, k)); // 0.1; mc.recArr[k].partitionErrType = int.Parse(evaluateString(partitionTypeTB.Text, k)); //2; mc.recArr[k].minWaveSize = int.Parse(evaluateString(minNodeSizeTB.Text, k)); //1;//CHANGE AFTER DBG mc.recArr[k].hopping_size = int.Parse(evaluateString(waveletsSkipEstimationTB.Text, k)); //25;// 10 + 5 * (k + 1);// +5 * (k % 10);// 1;//25; mc.recArr[k].test_error_size = double.Parse(evaluateString(waveletsPercentEstimationTB.Text, k)); // +0.05 * (k % 10);// 1;// 0.1;//percent of waves to check mc.recArr[k].NskipsinKfunc = double.Parse(evaluateString(boostingKfuncPercentTB.Text, k)); // 0.0025; mc.recArr[k].rfBaggingPercent = double.Parse(evaluateString(bagginPercentTB.Text, k)); // 0.6; mc.recArr[k].rfNum = int.Parse(evaluateString(NrfTB.Text, k)); // k + 1;//10 + k*10;// 100 / (k + 46) * 2;// int.Parse(Math.Pow(10, k + 1).ToString()); mc.recArr[k].boostNum = int.Parse(evaluateString(NboostTB.Text, k)); // 10; mc.recArr[k].boostProoning_0 = int.Parse(evaluateString(NfirstPruninginBoostingTB.Text, k)); //13 mc.recArr[k].boostlamda_0 = double.Parse(evaluateString(boostingLamda0TB.Text, k)); // 0.01 - (k + 1) * 0.001; //0.05;// 0.0801 + k * 0.001;// Math.Pow(0.1, k);// 0.22 + k*0.005; mc.recArr[k].NwaveletsBoosting = int.Parse(evaluateString(NfirstwaveletsBoostingTB.Text, k)); // 4;// k + 1; //mc.recArr[k].learningRate = 0;// 0.01; mc.recArr[k].boostNumLearningRate = int.Parse(evaluateString(NboostingLearningRateTB.Text, k)); // 55;// 18; mc.recArr[k].percent_training_db = trainingPercent; mc.recArr[k].BoundLevel = int.Parse(evaluateString(boundLevelTB.Text, k)); //1024; mc.recArr[k].NDimsinRF = NfeaturesrfTB.Text == @"all" ? db.PCAtraining_dt[0].Count() : int.Parse(evaluateString(NfeaturesrfTB.Text, k)); mc.recArr[k].split_type = int.Parse(evaluateString(splitTypeTB.Text, k)); //0 mc.recArr[k].NormLPType = int.Parse(evaluateString(errTypeEstimationTB.Text, k)); mc.recArr[k].RFpruningTestRange[1] = int.Parse(evaluateString(RFpruningEstimationRange1TB.Text, k)); // 12;// k + 9; mc.recArr[k].boundDepthTree = int.Parse(evaluateString(boundDepthTB.Text, k)); //1024; mc.recArr[k].CrossValidFold = k; // 2m0rr0w2 save labels dim in confif mc.recArr[k].labelDim = db.training_label[0].Count(); //mc.recArr[k].boostNum = t ;// tmp to delete !!!!!!! //mc.recArr[k].RFwaveletsTestRange[0] = 25; //mc.recArr[k].RFwaveletsTestRange[1] = 50; } Helpers.createOutputDirectories(mc.recArr, client, u_config, bucketName, results_path); //SET ID ARRAY LIST List <int> trainingID = Enumerable.Range(0, db.PCAtraining_dt.Count()).ToList(); List <int> testingID = Enumerable.Range(0, db.PCAtesting_dt.Count()).ToList(); //cross validation List <List <int> > trainingFoldId = new List <List <int> >(); List <List <int> > testingFoldId = new List <List <int> >(); Random ran = new Random(2); List <int> training_rand = trainingID.OrderBy(x => ran.Next()).ToList().GetRange(0, trainingID.Count); //THE LARGEST GROUP IS TRAINING if (int.TryParse(croosValidTB.Text, out Kfolds)) { createCrossValid(Kfolds, training_rand, trainingFoldId, testingFoldId); } //bounding intervals int[][] BB = new int[2][]; BB[0] = new int[boundingBox[0].Count()]; BB[1] = new int[boundingBox[0].Count()]; for (int i = 0; i < boundingBox[0].Count(); i++) { BB[1][i] = MainGrid[i].Count() - 1; //set last index in each dim } for (int i = 0; i < mc.recArr.Count; i++) { Analizer Analizer = new Analizer(MainFolderName + "\\" + mc.recArr[i].getShortName(), MainGrid, db, mc.recArr[i]); if (!croosValidCB.Checked) { Analizer.analize(trainingID, testingID, BB); } else { Analizer.analize(trainingFoldId[i], testingFoldId[i], BB); //cross validation } statusWriter.WriteLine("fold " + i + " ready!!!!"); } //btnScript.BackColor = Color.Green; }
/* private void recursiveBSP_WaveletsByConsts(List<GeoWave> geoWaveArr, int geoWaveId, int seed=0) * { * //CALC APPROX_SOLUTION FOR GEO WAVE * double error = geoWaveArr[geoWaveId].calc_MeanValueReturnError(_trainingLabel, geoWaveArr[geoWaveId].pointsIdArray); * if (error < _rc.approxThresh || * geoWaveArr[geoWaveId].pointsIdArray.Count() <= _rc.minWaveSize || * _rc.boundDepthTree <= geoWaveArr[geoWaveId].level) * return; * * int dimIndex = -1; * int Maingridindex = -1; * * bool IsPartitionOK = false; * switch (_rc.split_type) * { * case 0: * IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); * break; * case 1: * IsPartitionOK = getRandPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, seed); * break; * case 2: * { * Random ran1 = new Random(seed); * Random ran2 = new Random(geoWaveId); * int one = ran1.Next(0, int.MaxValue / 10); * int two = ran2.Next(0, int.MaxValue / 10); * bool[] Dim2TakeNode = getDim2Take(_rc, one + two); * IsPartitionOK = getBestPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); * } * break; * case 3: * IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, _dime2Take); * break; * case 4: * { * Random ran1 = new Random(seed); * Random ran2 = new Random(geoWaveId); * int one = ran1.Next(0, int.MaxValue / 10); * int two = ran2.Next(0, int.MaxValue / 10); * bool[] Dim2TakeNode = getDim2Take(_rc, one + two); * IsPartitionOK = getGiniPartitionResult(ref dimIndex, ref Maingridindex, geoWaveArr, geoWaveId, error, Dim2TakeNode); * } * break; * * * } * * * * * if (!IsPartitionOK) * return; * * * GeoWave child0 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); * GeoWave child1 = new GeoWave(geoWaveArr[geoWaveId].boubdingBox, _trainingLabel[0].Count(), geoWaveArr[geoWaveId].rc); * * //set partition * child0.boubdingBox[1][dimIndex] = Maingridindex; * child1.boubdingBox[0][dimIndex] = Maingridindex; * * //DOCUMENT ON CHILDREN * child0.dimIndex = dimIndex; * child0.Maingridindex = Maingridindex; * child1.dimIndex = dimIndex; * child1.Maingridindex = Maingridindex; * * child0.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; * child1.MaingridValue = Form1.MainGrid[dimIndex][Maingridindex]; * * //calc norm * //calc mean value * * if (Form1.isBoxSingular(child0.boubdingBox, _rc.dim) || Form1.isBoxSingular(child1.boubdingBox, _rc.dim)) * return; * * //SHOULD I VERIFY THAT THE CHILD IS NOT ITS PARENT ? (IN CASES WHERE CAN'T MODEFY THE PARTITION) * * setChildrensPointsAndMeanValue(ref child0, ref child1, dimIndex, geoWaveArr[geoWaveId].pointsIdArray); * //SET TWO CHILDS * child0.parentID = child1.parentID = geoWaveId; * child0.child0 = child1.child0 = -1; * child0.child1 = child1.child1 = -1; * child0.level = child1.level = geoWaveArr[geoWaveId].level + 1; * * child0.computeNormOfConsts(geoWaveArr[geoWaveId]); * child1.computeNormOfConsts(geoWaveArr[geoWaveId]); * geoWaveArr.Add(child0); * geoWaveArr.Add(child1); * geoWaveArr[geoWaveId].child0 = geoWaveArr.Count - 2; * geoWaveArr[geoWaveId].child1 = geoWaveArr.Count - 1; * * * * * //RECURSION STEP !!! * recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child0, seed); * recursiveBSP_WaveletsByConsts(geoWaveArr, geoWaveArr[geoWaveId].child1, seed); * } */ private SplitProps getTransformedPartitionAllDim(GeoWave parentNode, double error, SplitType splitType) { double[][] originalNodeData = parentNode.pointsIdArray.Select(id => _trainingDt[id]).ToArray(); double[][] transformedData; //clean columns of categorical variables 2m0rr0w2 // originalNodeData = Helpers.copyAndRemoveCategoricalColumns(originalNodeData, _rc); //result struct SplitProps resultProps = new SplitProps(); switch (splitType) { case SplitType.LocalPca: DimReduction.constructNodePcaByOriginalData(originalNodeData, parentNode); transformedData = parentNode.localPca.Transform(originalNodeData); break; case SplitType.DiffMaps5Percent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.05); break; case SplitType.DiffMaps1Percent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.01); break; case SplitType.DiffMapsHalfPercent: if (originalNodeData.Count() <= _rc.dim) { resultProps.isPartitionOk = false; return(resultProps); } transformedData = DiffusionMaps.getTransformedMatrix(originalNodeData, 0.005); break; case SplitType.MainAxes: transformedData = originalNodeData; break; case SplitType.Categorical: transformedData = originalNodeData; break; default: transformedData = null; break; } if (transformedData == null) { //throw new Exception("******TRANSFORMATION ERROR!!!"); resultProps.isPartitionOk = false; Debug.WriteLine("*********Failed transformation"); Debug.WriteLine("*********Failed node size: " + parentNode.pointsIdArray.Count); return(resultProps); } parentNode.transformedDim = transformedData.First().Length; //save dim of transformed data int transformedDim = parentNode.transformedDim; double[] errorEachDim = new double[transformedDim]; int[] partitionIdEachDim = new int[transformedDim]; // _rc.dim replaced by transformedDim dimention Helpers.applyFor(0, transformedDim, dim => { errorEachDim[dim] = getTransformedDataPartitionSingleDim(dim, transformedData, parentNode, partitionIdEachDim); }); int bestDim = Enumerable.Range(0, transformedDim) .Aggregate((a, b) => (errorEachDim[a] < errorEachDim[b]) ? a : b); resultProps.splitId = partitionIdEachDim[bestDim]; //save id's order in transformed data at best dimention resultProps.sortedIds = new List <int>(parentNode.pointsIdArray); // will be sorted at best split dimention List <int> idsClone = new List <int>(resultProps.sortedIds); // id's in original position resultProps.sortedIds.Sort((c1, c2) => transformedData[idsClone.IndexOf(c1)][bestDim].CompareTo(transformedData[idsClone.IndexOf(c2)][bestDim])); //save partition value int originalSplitLocation = idsClone.IndexOf(resultProps.splitId); if (originalSplitLocation == -1) { resultProps.isPartitionOk = false; return(resultProps); } resultProps.isPartitionOk = (errorEachDim[bestDim] < error); resultProps.splitValue = transformedData[originalSplitLocation][bestDim]; resultProps.error = errorEachDim[bestDim]; resultProps.type = splitType; resultProps.dimIndex = bestDim; //shift dimention if it was not categorical split 2m0rr0w2 /* foreach (int categoricalInd in _rc.indOfCategorical) * { * resultProps.dimIndex = (resultProps.dimIndex == categoricalInd) * ? resultProps.dimIndex++ * : resultProps.dimIndex; * }*/ return(resultProps); }