/// <summary> /// selects top IG features above 0 weight, then saves serialized data to a file. /// </summary> /// <param name="topIGFeatures"></param> /// <param name="data"></param> public static void selectIGSerialize(ref Instances data) { //run ig and get top 1000 or up to 1000 bigger than zero, from tr4 WekaTrainingMethods.useLowLevelInformationGainFeatureSelection(data); GuiPreferences.Instance.setLog(Preferences.Instance.attsel.selectedAttributes().Length.ToString() + " features above zero value selected (including the Class feature)"); //serialize (save) ALL 204k indices to file. //serialize (save) TOP ig indices to file. XMLSerializer.serializeArrayToFile <int[]>(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml", Preferences.Instance.attsel.selectedAttributes()); GuiPreferences.Instance.setLog("saved top + All IG indices to TWO XML files (in the same order as IG gave it)"); //int [] _trainTopIGFeatures_loaded = DeserializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml"); }
public static void saveConfigFile() { Preferences.Instance.configFile = new Dictionary <string, string>(); Preferences.Instance.configFile.Add("WorkDirectory", GuiPreferences.Instance.WorkDirectory); Preferences.Instance.configFile.Add("ProtocolFile", GuiPreferences.Instance.ProtocolFile); Preferences.Instance.configFile.Add("dirList", string.Join(",", Preferences.Instance.dirList.Select(u => u))); Preferences.Instance.configFile.Add("dataType.rawValue", GuiPreferences.Instance.FileType.ToString()); Preferences.Instance.configFile.Add("NudThreshold", GuiPreferences.Instance.NudThreshold.ToString()); //400 Preferences.Instance.configFile.Add("NudExtractFromTR", GuiPreferences.Instance.NudExtractFromTR.ToString()); // = 3; Preferences.Instance.configFile.Add("NudExtractToTR", GuiPreferences.Instance.NudExtractToTR.ToString()); // = 4; Preferences.Instance.configFile.Add("NudClassifyUsingTR", GuiPreferences.Instance.NudClassifyUsingTR.ToString()); // = 4; Preferences.Instance.configFile.Add("NudIGThreshold", GuiPreferences.Instance.NudIGThreshold.ToString()); // = 0.15M; //0.15 dona, keren 0.10 Preferences.Instance.configFile.Add("NudIGVoxelAmount", GuiPreferences.Instance.NudIGVoxelAmount.ToString()); // = 0.15M; //0.15 dona, keren 0.10 Preferences.Instance.configFile.Add("NudFilterEyeSlices", GuiPreferences.Instance.NudFilterEyeSlices.ToString()); // = 13; // 13 dona, keren 10 Preferences.Instance.configFile.Add("NudEyeSliceFirstLines", GuiPreferences.Instance.NudEyeSliceFirstLines.ToString()); // = 80; Preferences.Instance.configFile.Add("CbPeekHigherTRsIGChecked", GuiPreferences.Instance.CbPeekHigherTRsIGChecked.ToString()); // = false; Preferences.Instance.configFile.Add("NormalizationType", GuiPreferences.Instance.NormalizedType.ToString()); //formula Preferences.Instance.configFile.Add("IgSelectionType", GuiPreferences.Instance.IgSelectionType.ToString()); //ig type, threshold or voxel amount Preferences.Instance.configFile.Add("NudMovingWindow", GuiPreferences.Instance.NudMovingWindow.ToString()); //window XMLSerializer.serializeArrayToFile <Dictionary <string, string> >(GuiPreferences.Instance.WorkDirectory + "config.xml", Preferences.Instance.configFile); }
/// <summary> /// copying files to finalData, processing, creating config files/minmax/etc and converting to arff. /// </summary> /// <param name="_trialProblem"></param> /// <returns></returns> public static void WekaProcessingPipelineForMultiRuns(List <string> directoryList) { //create a dir that holds the final DS in C:\ //GuiPreferences.Instance.WorkDirectory = @"C:\FinalData_" + DateTime.Now.ToLongTimeString().Replace(':', '-'); GuiPreferences.Instance.WorkDirectory = @"C:\FinalData_" + "TR" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "_" + GuiPreferences.Instance.NormalizedType.ToString() + GuiPreferences.Instance.NudMovingWindow.ToString() + "_"; if (GuiPreferences.Instance.IgSelectionType == IGType.Threshold) { GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + "IG_Thr" + GuiPreferences.Instance.NudIGThreshold.ToString() + "_"; } else if (GuiPreferences.Instance.IgSelectionType == IGType.Voxels) { GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + "IG_Vox" + GuiPreferences.Instance.NudIGVoxelAmount.ToString() + "_"; } //GuiPreferences.Instance.ProtocolFile + "_" + GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + Preferences.Instance.events.EventListLastTr.ToString(); if (GuiPreferences.Instance.CbPeekHigherTRsIGChecked == true) { GuiPreferences.Instance.WorkDirectory = GuiPreferences.Instance.WorkDirectory + "_Peeking"; } GuiPreferences.Instance.setLog(@"Creating Final Directory in: " + GuiPreferences.Instance.WorkDirectory); FileDirectoryOperations.CreateDirectory(GuiPreferences.Instance.WorkDirectory); GuiPreferences.Instance.WorkDirectory += @"\"; ConcatenateLibsvmVectorizedPerTR(directoryList); //NOTE: min/max values are taken from the param files of each run. which means that you get N max values and N min values. //if a median is needed, we have to go over all columns for each feature and concat all values to a huge list that contains all N files and only then do the median. //the median code here is not conceptually not right. as we take median out of 4 max values or min values. its a bad way to calculate a median. //median code should be done in the normalization class and the code here should reflect the concept behind it. //NOTE2: this code goes over N max and min values from each param file and get the MAX(maxes) and MIN(mins). //these max and mins are saved to be used as the initial min/max values for the testing stage. double[][] feature_max = new double[directoryList.Count][]; double[][] feature_min = new double[directoryList.Count][]; int i = 0; int max_index = -1; foreach (string directory in directoryList) { TrainingTesting_SharedVariables._svmscaleTraining.getConfigFileMinMaxValues( //use previous tr min/maxes for median consideration //directory + "TrainSet_" + (GuiPreferences.Instance.NudClassifyUsingTR - 1).ToString() + "th_vectors_scale_paramCS.libsvm", //use current tr for min/max median directory + "TrainSet_" + (GuiPreferences.Instance.NudClassifyUsingTR).ToString() + "th_vectors_scale_paramCS.libsvm", ref feature_max[i], ref feature_min[i], ref max_index); i++; } //calculate Mean + save new min/max param to C:\ double[] finalFeature_max = new double[feature_max[0].Length]; double[] finalFeature_min = new double[feature_max[0].Length]; double[] finalFeature_medianMax = new double[feature_max[0].Length]; double[] finalFeature_medianMin = new double[feature_max[0].Length]; //create a TEMP list with enough values for the runs, in order to calculate the MIN/MAX median var values_max = new List <double>(feature_max.Length); var values_min = new List <double>(feature_max.Length); var values_medianMax = new List <double>(feature_max.Length); var values_medianMin = new List <double>(feature_max.Length); for (int k = 0; k < feature_max.Length; k++) { //init zeros values_max.Add(0); values_min.Add(0); values_medianMax.Add(0); values_medianMin.Add(0); } for (int j = 0; j < feature_max[0].Length; j++) { for (int k = 0; k < feature_max.Length; k++) { // for each feature group all run-based values together values_max[k] = feature_max[k][j]; values_min[k] = feature_min[k][j]; } //get median of maxes/mins optional here /*finalFeature_max[j] = GetMedian(values_max); * finalFeature_min[j] = GetMedian(values_min); */ //get Max and Min here. finalFeature_max[j] = values_max.Max(); finalFeature_min[j] = values_min.Min(); } //save max/median param file TrainingTesting_SharedVariables._svmscaleTraining.saveConfigMinMax_CSharp( GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scale_paramCS.libsvm", finalFeature_min, finalFeature_max, 204801, 0.0f, 1.0f); int lowRangeMinus = 0; int highRangeMinus = 0; //calculate median ranges. from baseline. for (int j = 1; j < feature_max[0].Length - 1; j++) { for (int k = 0; k < feature_max.Length; k++) { //min = test baseline median - 2nd smallest (training baseline median - training min) //max = test baseline median + 2nd smallest ( training max - training baseline median) values_medianMax[k] = feature_max[k][j] - Preferences.Instance.TrainingBaselineMedians[k].median[j - 1]; values_medianMin[k] = Preferences.Instance.TrainingBaselineMedians[k].median[j - 1] - feature_min[k][j]; } double chosenLowRange; double chosenHighRange; chosenLowRange = getSecondLowest(values_medianMin); chosenHighRange = getSecondLowest(values_medianMax); //chosenLowRange = getSecondHighest(values_medianMin); //chosenHighRange = getSecondHighest(values_medianMax); //chosenLowRange = StatisticsFeatures.GetMedian(values_medianMin); //chosenHighRange = StatisticsFeatures.GetMedian(values_medianMax); finalFeature_medianMin[j] = chosenLowRange; finalFeature_medianMax[j] = chosenHighRange; if (chosenLowRange <= 0) { lowRangeMinus++; } if (chosenHighRange <= 0) { highRangeMinus++; } } ////////////////////////////////////////////////////////// //for verification save ranges (this file it not to be used!) TrainingTesting_SharedVariables._svmscaleTraining.saveConfigMinMax_CSharp( GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scale_MedianRangeFromBaseline.params.txt", finalFeature_medianMin, finalFeature_medianMax, 204801, 0.0f, 1.0f); Preferences.Instance.medianRange = new MinMax(finalFeature_medianMin, finalFeature_medianMax); XMLSerializer.serializeArrayToFile <MinMax>(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_MedianRangeFromBaseline.xml", Preferences.Instance.medianRange); GuiPreferences.Instance.setLog("out of 204K features, low range <= 0: " + lowRangeMinus.ToString() + " && high range <= 0: " + highRangeMinus.ToString()); }
public static Instances WekaPipeline_Unprocessed(libSVM_ExtendedProblem _trialProblem) { //export to libsvm file if (_trialProblem.samples == null) { GuiPreferences.Instance.setLog("Export Failed: Problem has no samples!"); return(null); } string trainFileName = GuiPreferences.Instance.WorkDirectory /*+ GuiPreferences.Instance.FileName*/ + "TrainSet"; //todo add proper named to saved files, check if null is logical at all. if ((_trialProblem.samples != null)) { _trialProblem.Save(trainFileName + ".libsvm"); GuiPreferences.Instance.setLog("saved Original Problem LibSVM file: " + trainFileName + ".libsvm"); } //separate DS to 3rd and 4th TR ////example: ExecuteSelectKthVectorScript(@"TrainSet", @"H:\My_Dropbox\VERE\MRI_data\Tirosh\20120508.Rapid+NullClass.day2\4\rtp\"); KthExtractionManager.ExecuteSelectKthVectorScript(/*GuiPreferences.Instance.FileName +*/ "TrainSet", GuiPreferences.Instance.WorkDirectory); GuiPreferences.Instance.setLog("Created TR3 & TR4 files"); //normalize 3rd and 4th TR files. NormalizationManager.ScaleTrFiles(GuiPreferences.Instance.WorkDirectory); GuiPreferences.Instance.setLog("Normalized TR3 & TR4 files"); //convert tr4 and tr3 to arff + REMOVE 204801 FAKE FEATURE, THAT WAS PLACES TO MAKE SURE WE GET 204800 FEATURES IN THE ARFF FILE. if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS.libsvm", 204800)) { GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_3th_vectors_scaledCS.libsvm"); } if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm", 204800)) { GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_4th_vectors_scaledCS.libsvm"); } //---------------------------------- filter tr3 based on top 1000 from tr4 (the trick) ----------------------------- //load TR4 ConverterUtils.DataSource source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm.arff"); Instances data = source.getDataSet(); //assign last as index. if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //if class not nominal, convert to if (!data.classAttribute().isNominal()) { var filter = new weka.filters.unsupervised.attribute.NumericToNominal(); filter.setOptions(weka.core.Utils.splitOptions("-R last")); //filter.setAttributeIndices("last"); filter.setInputFormat(data); data = Filter.useFilter(data, filter); } //run ig and get top 1000 or up to 1000 bigger than zero, from tr4 WekaTrainingMethods.useLowLevelInformationGainFeatureSelection(data); TrainingTesting_SharedVariables._trainTopIGFeatures = Preferences.Instance.attsel.selectedAttributes(); //this should be done ONCE Preferences.Instance.fastvector = RealTimeProcessing.CreateFastVector(TrainingTesting_SharedVariables._trainTopIGFeatures.Length); GuiPreferences.Instance.setLog("created fast vector of length " + TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString()); //serialize (save) topIG indices to file. XMLSerializer.serializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml", TrainingTesting_SharedVariables._trainTopIGFeatures); GuiPreferences.Instance.setLog("saved IG indices to a file (in the same order as IG gave it)"); //int [] _trainTopIGFeatures_loaded = DeserializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS_filteredIG_indices.xml"); GuiPreferences.Instance.setLog(TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString() + " features above zero value selected (including the Class feature)"); //load tr3 source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS.libsvm.arff"); data = source.getDataSet(); //filter top IG data = WekaTrainingMethods.useRemoveFilter(data, TrainingTesting_SharedVariables._trainTopIGFeatures, true); //after filtering last feature needs to be the class if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //save filtered to a file WekaCommonFileOperation.SaveLIBSVM(data, GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG"); return(data); }