Exemple #1
0
        /// <summary>
        /// selects top IG features above 0 weight, then saves serialized data to a file.
        /// </summary>
        /// <param name="topIGFeatures"></param>
        /// <param name="data"></param>
        public static void selectIGSerialize(ref Instances data)
        {
            //run ig and get top 1000 or up to 1000 bigger than zero, from tr4
            WekaTrainingMethods.useLowLevelInformationGainFeatureSelection(data);
            GuiPreferences.Instance.setLog(Preferences.Instance.attsel.selectedAttributes().Length.ToString() + " features above zero value selected (including the Class feature)");

            //serialize (save) ALL 204k indices to file.

            //serialize (save) TOP ig indices to file.
            XMLSerializer.serializeArrayToFile <int[]>(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml", Preferences.Instance.attsel.selectedAttributes());
            GuiPreferences.Instance.setLog("saved top + All IG indices to TWO XML files (in the same order as IG gave it)");
            //int [] _trainTopIGFeatures_loaded = DeserializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml");
        }
Exemple #2
0
        public static void saveConfigFile()
        {
            Preferences.Instance.configFile = new Dictionary <string, string>();

            Preferences.Instance.configFile.Add("WorkDirectory", GuiPreferences.Instance.WorkDirectory);
            Preferences.Instance.configFile.Add("ProtocolFile", GuiPreferences.Instance.ProtocolFile);
            Preferences.Instance.configFile.Add("dirList", string.Join(",", Preferences.Instance.dirList.Select(u => u)));
            Preferences.Instance.configFile.Add("dataType.rawValue", GuiPreferences.Instance.FileType.ToString());
            Preferences.Instance.configFile.Add("NudThreshold", GuiPreferences.Instance.NudThreshold.ToString());                         //400
            Preferences.Instance.configFile.Add("NudExtractFromTR", GuiPreferences.Instance.NudExtractFromTR.ToString());                 // = 3;
            Preferences.Instance.configFile.Add("NudExtractToTR", GuiPreferences.Instance.NudExtractToTR.ToString());                     // = 4;
            Preferences.Instance.configFile.Add("NudClassifyUsingTR", GuiPreferences.Instance.NudClassifyUsingTR.ToString());             // = 4;
            Preferences.Instance.configFile.Add("NudIGThreshold", GuiPreferences.Instance.NudIGThreshold.ToString());                     // = 0.15M; //0.15 dona, keren 0.10
            Preferences.Instance.configFile.Add("NudIGVoxelAmount", GuiPreferences.Instance.NudIGVoxelAmount.ToString());                 // = 0.15M; //0.15 dona, keren 0.10
            Preferences.Instance.configFile.Add("NudFilterEyeSlices", GuiPreferences.Instance.NudFilterEyeSlices.ToString());             // = 13; // 13 dona, keren 10
            Preferences.Instance.configFile.Add("NudEyeSliceFirstLines", GuiPreferences.Instance.NudEyeSliceFirstLines.ToString());       // = 80;
            Preferences.Instance.configFile.Add("CbPeekHigherTRsIGChecked", GuiPreferences.Instance.CbPeekHigherTRsIGChecked.ToString()); // = false;
            Preferences.Instance.configFile.Add("NormalizationType", GuiPreferences.Instance.NormalizedType.ToString());                  //formula
            Preferences.Instance.configFile.Add("IgSelectionType", GuiPreferences.Instance.IgSelectionType.ToString());                   //ig type, threshold or voxel amount
            Preferences.Instance.configFile.Add("NudMovingWindow", GuiPreferences.Instance.NudMovingWindow.ToString());                   //window
            XMLSerializer.serializeArrayToFile <Dictionary <string, string> >(GuiPreferences.Instance.WorkDirectory + "config.xml", Preferences.Instance.configFile);
        }
Exemple #3
0
        /// <summary>
        /// copying files to finalData, processing, creating config files/minmax/etc and converting to arff.
        /// </summary>
        /// <param name="_trialProblem"></param>
        /// <returns></returns>
        public static void WekaProcessingPipelineForMultiRuns(List <string> directoryList)
        {
            //create a dir that holds the final DS in C:\
            //GuiPreferences.Instance.WorkDirectory = @"C:\FinalData_" + DateTime.Now.ToLongTimeString().Replace(':', '-');
            GuiPreferences.Instance.WorkDirectory = @"C:\FinalData_" +
                                                    "TR" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "_" +
                                                    GuiPreferences.Instance.NormalizedType.ToString() +
                                                    GuiPreferences.Instance.NudMovingWindow.ToString() + "_";

            if (GuiPreferences.Instance.IgSelectionType == IGType.Threshold)
            {
                GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + "IG_Thr" + GuiPreferences.Instance.NudIGThreshold.ToString() + "_";
            }
            else if (GuiPreferences.Instance.IgSelectionType == IGType.Voxels)
            {
                GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + "IG_Vox" + GuiPreferences.Instance.NudIGVoxelAmount.ToString() + "_";
            }

            //GuiPreferences.Instance.ProtocolFile + "_" +
            GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + Preferences.Instance.events.EventListLastTr.ToString();

            if (GuiPreferences.Instance.CbPeekHigherTRsIGChecked == true)
            {
                GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + "_Peeking";
            }
            GuiPreferences.Instance.setLog(@"Creating Final Directory in: " + GuiPreferences.Instance.WorkDirectory);
            FileDirectoryOperations.CreateDirectory(GuiPreferences.Instance.WorkDirectory);
            GuiPreferences.Instance.WorkDirectory += @"\";

            ConcatenateLibsvmVectorizedPerTR(directoryList);

            //NOTE: min/max values are taken from the param files of each run. which means that you get N max values and N min values.
            //if a median is needed, we have to go over all columns for each feature and concat all values to a huge list that contains all N files and only then do the median.
            //the median code here is not conceptually not right. as we take median out of 4 max values or min values. its a bad way to calculate a median.
            //median code should be done in the normalization class and the code here should reflect the concept behind it.

            //NOTE2: this code goes over N max and min values from each param file and get the MAX(maxes) and MIN(mins).
            //these max and mins are saved to be used as the initial min/max values for the testing stage.
            double[][] feature_max = new double[directoryList.Count][];
            double[][] feature_min = new double[directoryList.Count][];
            int        i           = 0;
            int        max_index   = -1;

            foreach (string directory in directoryList)
            {
                TrainingTesting_SharedVariables._svmscaleTraining.getConfigFileMinMaxValues(
                    //use previous tr min/maxes for median consideration
                    //directory + "TrainSet_" + (GuiPreferences.Instance.NudClassifyUsingTR - 1).ToString() + "th_vectors_scale_paramCS.libsvm",

                    //use current tr for min/max median
                    directory + "TrainSet_" + (GuiPreferences.Instance.NudClassifyUsingTR).ToString() + "th_vectors_scale_paramCS.libsvm",

                    ref feature_max[i], ref feature_min[i], ref max_index);
                i++;
            }


            //calculate Mean + save new min/max param to C:\
            double[] finalFeature_max       = new double[feature_max[0].Length];
            double[] finalFeature_min       = new double[feature_max[0].Length];
            double[] finalFeature_medianMax = new double[feature_max[0].Length];
            double[] finalFeature_medianMin = new double[feature_max[0].Length];

            //create a TEMP list with enough values for the runs, in order to calculate the MIN/MAX median
            var values_max       = new List <double>(feature_max.Length);
            var values_min       = new List <double>(feature_max.Length);
            var values_medianMax = new List <double>(feature_max.Length);
            var values_medianMin = new List <double>(feature_max.Length);

            for (int k = 0; k < feature_max.Length; k++)
            {
                //init zeros
                values_max.Add(0);
                values_min.Add(0);
                values_medianMax.Add(0);
                values_medianMin.Add(0);
            }

            for (int j = 0; j < feature_max[0].Length; j++)
            {
                for (int k = 0; k < feature_max.Length; k++)
                {
                    // for each feature group all run-based values together
                    values_max[k] = feature_max[k][j];
                    values_min[k] = feature_min[k][j];
                }

                //get median of maxes/mins optional here

                /*finalFeature_max[j] = GetMedian(values_max);
                 * finalFeature_min[j] = GetMedian(values_min); */

                //get Max and Min here.
                finalFeature_max[j] = values_max.Max();
                finalFeature_min[j] = values_min.Min();
            }
            //save max/median param file
            TrainingTesting_SharedVariables._svmscaleTraining.saveConfigMinMax_CSharp(
                GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scale_paramCS.libsvm",
                finalFeature_min, finalFeature_max, 204801, 0.0f, 1.0f);

            int lowRangeMinus  = 0;
            int highRangeMinus = 0;

            //calculate median ranges. from baseline.
            for (int j = 1; j < feature_max[0].Length - 1; j++)
            {
                for (int k = 0; k < feature_max.Length; k++)
                {
                    //min = test baseline median - 2nd smallest (training baseline median - training min)
                    //max = test baseline median + 2nd smallest ( training max - training baseline median)
                    values_medianMax[k] = feature_max[k][j] - Preferences.Instance.TrainingBaselineMedians[k].median[j - 1];
                    values_medianMin[k] = Preferences.Instance.TrainingBaselineMedians[k].median[j - 1] - feature_min[k][j];
                }

                double chosenLowRange;
                double chosenHighRange;

                chosenLowRange  = getSecondLowest(values_medianMin);
                chosenHighRange = getSecondLowest(values_medianMax);

                //chosenLowRange = getSecondHighest(values_medianMin);
                //chosenHighRange = getSecondHighest(values_medianMax);

                //chosenLowRange  = StatisticsFeatures.GetMedian(values_medianMin);
                //chosenHighRange = StatisticsFeatures.GetMedian(values_medianMax);


                finalFeature_medianMin[j] = chosenLowRange;
                finalFeature_medianMax[j] = chosenHighRange;

                if (chosenLowRange <= 0)
                {
                    lowRangeMinus++;
                }

                if (chosenHighRange <= 0)
                {
                    highRangeMinus++;
                }
            }
            //////////////////////////////////////////////////////////
            //for verification save ranges (this file it not to be used!)

            TrainingTesting_SharedVariables._svmscaleTraining.saveConfigMinMax_CSharp(
                GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() +
                "th_vectors_scale_MedianRangeFromBaseline.params.txt", finalFeature_medianMin, finalFeature_medianMax, 204801, 0.0f, 1.0f);

            Preferences.Instance.medianRange = new MinMax(finalFeature_medianMin, finalFeature_medianMax);

            XMLSerializer.serializeArrayToFile <MinMax>(GuiPreferences.Instance.WorkDirectory + "TrainSet_" +
                                                        GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_MedianRangeFromBaseline.xml", Preferences.Instance.medianRange);

            GuiPreferences.Instance.setLog("out of 204K features, low range <= 0: " + lowRangeMinus.ToString() + " && high range <= 0: " + highRangeMinus.ToString());
        }
Exemple #4
0
        public static Instances WekaPipeline_Unprocessed(libSVM_ExtendedProblem _trialProblem)
        {
            //export to libsvm file
            if (_trialProblem.samples == null)
            {
                GuiPreferences.Instance.setLog("Export Failed: Problem has no samples!");
                return(null);
            }

            string trainFileName = GuiPreferences.Instance.WorkDirectory /*+ GuiPreferences.Instance.FileName*/ + "TrainSet";


            //todo add proper named to saved files, check if null is logical at all.
            if ((_trialProblem.samples != null))
            {
                _trialProblem.Save(trainFileName + ".libsvm");
                GuiPreferences.Instance.setLog("saved Original Problem LibSVM file: " + trainFileName + ".libsvm");
            }

            //separate DS to 3rd and 4th TR
            ////example: ExecuteSelectKthVectorScript(@"TrainSet", @"H:\My_Dropbox\VERE\MRI_data\Tirosh\20120508.Rapid+NullClass.day2\4\rtp\");
            KthExtractionManager.ExecuteSelectKthVectorScript(/*GuiPreferences.Instance.FileName +*/ "TrainSet", GuiPreferences.Instance.WorkDirectory);
            GuiPreferences.Instance.setLog("Created TR3 & TR4 files");

            //normalize 3rd and 4th TR files.
            NormalizationManager.ScaleTrFiles(GuiPreferences.Instance.WorkDirectory);
            GuiPreferences.Instance.setLog("Normalized TR3 & TR4 files");

            //convert tr4 and tr3 to arff + REMOVE 204801 FAKE FEATURE, THAT WAS PLACES TO MAKE SURE WE GET 204800 FEATURES IN THE ARFF FILE.
            if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS.libsvm", 204800))
            {
                GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_3th_vectors_scaledCS.libsvm");
            }
            if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm", 204800))
            {
                GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_4th_vectors_scaledCS.libsvm");
            }

            //---------------------------------- filter tr3 based on top 1000 from tr4 (the trick) -----------------------------
            //load TR4
            ConverterUtils.DataSource source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm.arff");
            Instances data = source.getDataSet();

            //assign last as index.
            if (data.classIndex() == -1)
            {
                data.setClassIndex(data.numAttributes() - 1);
            }

            //if class not nominal, convert to
            if (!data.classAttribute().isNominal())
            {
                var filter = new weka.filters.unsupervised.attribute.NumericToNominal();

                filter.setOptions(weka.core.Utils.splitOptions("-R last"));
                //filter.setAttributeIndices("last");
                filter.setInputFormat(data);
                data = Filter.useFilter(data, filter);
            }

            //run ig and get top 1000 or up to 1000 bigger than zero, from tr4
            WekaTrainingMethods.useLowLevelInformationGainFeatureSelection(data);

            TrainingTesting_SharedVariables._trainTopIGFeatures = Preferences.Instance.attsel.selectedAttributes();

            //this should be done ONCE
            Preferences.Instance.fastvector = RealTimeProcessing.CreateFastVector(TrainingTesting_SharedVariables._trainTopIGFeatures.Length);
            GuiPreferences.Instance.setLog("created fast vector of length " + TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString());

            //serialize (save) topIG indices to file.
            XMLSerializer.serializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml", TrainingTesting_SharedVariables._trainTopIGFeatures);
            GuiPreferences.Instance.setLog("saved IG indices to a file (in the same order as IG gave it)");
            //int [] _trainTopIGFeatures_loaded = DeserializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS_filteredIG_indices.xml");

            GuiPreferences.Instance.setLog(TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString() + " features above zero value selected (including the Class feature)");

            //load tr3
            source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS.libsvm.arff");
            data   = source.getDataSet();

            //filter top IG
            data = WekaTrainingMethods.useRemoveFilter(data, TrainingTesting_SharedVariables._trainTopIGFeatures, true);

            //after filtering last feature needs to be the class
            if (data.classIndex() == -1)
            {
                data.setClassIndex(data.numAttributes() - 1);
            }

            //save filtered to a file
            WekaCommonFileOperation.SaveLIBSVM(data, GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG");

            return(data);
        }