Exemplo n.º 1
0
        /// <summary>
        /// saves ProblemOriginal to a file, separates into tr3 and tr4, normalizes, and converts the libsvm files into arff.
        /// </summary>
        public static void VectorizeAndNormalize(libSVM_ExtendedProblem problem)
        {
            string trainFileName = GuiPreferences.Instance.WorkDirectory /*+ GuiPreferences.Instance.FileName*/ + "TrainSet";

            //todo add proper named to saved files, check if null is logical at all.
            //if ((Preferences.Instance.ProblemOriginal.samples != null))
            //{
            problem.Save(trainFileName + ".libsvm", 80 * 80 * 32 + 1);
            GuiPreferences.Instance.setLog("saved Original Problem LibSVM file: " + trainFileName + ".libsvm");
            //}

            //separate DS to 3rd and 4th TR
            ////example: ExecuteSelectKthVectorScript(@"TrainSet", @"H:\My_Dropbox\VERE\MRI_data\Tirosh\20120508.Rapid+NullClass.day2\4\rtp\");
            KthExtractionManager.ExecuteSelectKthVectorScript(/*GuiPreferences.Instance.FileName +*/ "TrainSet", GuiPreferences.Instance.WorkDirectory);
            //GuiPreferences.Instance.setLog("Created TR3, TR4, TR5, TR6 files (5+6 depends if added to the python script)");

            //NORMALIZING all TR Files
            NormalizationManager.ScaleTrFiles(GuiPreferences.Instance.WorkDirectory);

            //CONVERTING all TR files
            WekaCommonFileOperation.ConvertToArff(GuiPreferences.Instance.WorkDirectory);
        }
Exemplo n.º 2
0
        public static Instances WekaPipeline_Unprocessed(libSVM_ExtendedProblem _trialProblem)
        {
            //export to libsvm file
            if (_trialProblem.samples == null)
            {
                GuiPreferences.Instance.setLog("Export Failed: Problem has no samples!");
                return(null);
            }

            string trainFileName = GuiPreferences.Instance.WorkDirectory /*+ GuiPreferences.Instance.FileName*/ + "TrainSet";


            //todo add proper named to saved files, check if null is logical at all.
            if ((_trialProblem.samples != null))
            {
                _trialProblem.Save(trainFileName + ".libsvm");
                GuiPreferences.Instance.setLog("saved Original Problem LibSVM file: " + trainFileName + ".libsvm");
            }

            //separate DS to 3rd and 4th TR
            ////example: ExecuteSelectKthVectorScript(@"TrainSet", @"H:\My_Dropbox\VERE\MRI_data\Tirosh\20120508.Rapid+NullClass.day2\4\rtp\");
            KthExtractionManager.ExecuteSelectKthVectorScript(/*GuiPreferences.Instance.FileName +*/ "TrainSet", GuiPreferences.Instance.WorkDirectory);
            GuiPreferences.Instance.setLog("Created TR3 & TR4 files");

            //normalize 3rd and 4th TR files.
            NormalizationManager.ScaleTrFiles(GuiPreferences.Instance.WorkDirectory);
            GuiPreferences.Instance.setLog("Normalized TR3 & TR4 files");

            //convert tr4 and tr3 to arff + REMOVE 204801 FAKE FEATURE, THAT WAS PLACES TO MAKE SURE WE GET 204800 FEATURES IN THE ARFF FILE.
            if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS.libsvm", 204800))
            {
                GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_3th_vectors_scaledCS.libsvm");
            }
            if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm", 204800))
            {
                GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_4th_vectors_scaledCS.libsvm");
            }

            //---------------------------------- filter tr3 based on top 1000 from tr4 (the trick) -----------------------------
            //load TR4
            ConverterUtils.DataSource source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm.arff");
            Instances data = source.getDataSet();

            //assign last as index.
            if (data.classIndex() == -1)
            {
                data.setClassIndex(data.numAttributes() - 1);
            }

            //if class not nominal, convert to
            if (!data.classAttribute().isNominal())
            {
                var filter = new weka.filters.unsupervised.attribute.NumericToNominal();

                filter.setOptions(weka.core.Utils.splitOptions("-R last"));
                //filter.setAttributeIndices("last");
                filter.setInputFormat(data);
                data = Filter.useFilter(data, filter);
            }

            //run ig and get top 1000 or up to 1000 bigger than zero, from tr4
            WekaTrainingMethods.useLowLevelInformationGainFeatureSelection(data);

            TrainingTesting_SharedVariables._trainTopIGFeatures = Preferences.Instance.attsel.selectedAttributes();

            //this should be done ONCE
            Preferences.Instance.fastvector = RealTimeProcessing.CreateFastVector(TrainingTesting_SharedVariables._trainTopIGFeatures.Length);
            GuiPreferences.Instance.setLog("created fast vector of length " + TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString());

            //serialize (save) topIG indices to file.
            XMLSerializer.serializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml", TrainingTesting_SharedVariables._trainTopIGFeatures);
            GuiPreferences.Instance.setLog("saved IG indices to a file (in the same order as IG gave it)");
            //int [] _trainTopIGFeatures_loaded = DeserializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS_filteredIG_indices.xml");

            GuiPreferences.Instance.setLog(TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString() + " features above zero value selected (including the Class feature)");

            //load tr3
            source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS.libsvm.arff");
            data   = source.getDataSet();

            //filter top IG
            data = WekaTrainingMethods.useRemoveFilter(data, TrainingTesting_SharedVariables._trainTopIGFeatures, true);

            //after filtering last feature needs to be the class
            if (data.classIndex() == -1)
            {
                data.setClassIndex(data.numAttributes() - 1);
            }

            //save filtered to a file
            WekaCommonFileOperation.SaveLIBSVM(data, GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG");

            return(data);
        }