/// <summary> /// Convert the input case to continuous space. This consists of 2 steps: /// 1. Convert all discrete columns to continuous input space /// 2. Scale the input columns to lie between 0 and 1 /// </summary> /// <param name="inputCase">An input example</param> /// <param name="doubleValues">the output in continous n-dimensional space</param> /// <param name="label">the label of the example</param> /// <param name="attributeSize">the size of the n-dimensional space</param> public void getValues(MiningCase inputCase, out double[] doubleValues, out uint label, out int attributeSize) { //save the input example in a dictionary SortedDictionary <uint, double> dict = new SortedDictionary <uint, double>(); bool bContinue = inputCase.MoveFirst(); while (bContinue) { if (inputCase.Value.IsDouble) { dict.Add(inputCase.Attribute, inputCase.DoubleValue); } else { dict.Add(inputCase.Attribute, inputCase.Value.Index); } bContinue = inputCase.MoveNext(); } //the linked list will save all the values LinkedList <double> values = new LinkedList <double>(); label = 0; attributeSize = 0; //loop through the dictionary and scale the input and store it in the linked list SortedDictionary <uint, double> .Enumerator enumerator = dict.GetEnumerator(); while (enumerator.MoveNext()) { uint attribute = enumerator.Current.Key; double value = enumerator.Current.Value; if (!isNominal(attribute)) { //scaling double max = MarginalStats.GetAttributeStats(attribute).Max; double min = MarginalStats.GetAttributeStats(attribute).Min; value = (value - min) / (max - min); if (Double.IsNaN(value) || Double.IsInfinity(value)) { value = 0; attributeSize++; } values.AddLast(value); } else { if (isTarget(attribute)) { label = (uint)value; } else { for (uint i = 0; i < AttributeSet.GetAttributeStateCount(attribute); i++) { if (i == (uint)value) { values.AddLast(1); } else { values.AddLast(0); } attributeSize++; } } } } this.attributeSize = attributeSize; doubleValues = new double[values.Count]; values.CopyTo(doubleValues, 0); }
/// <summary> /// the real training algorithm /// outline: /// 1. parse the parameters /// 2. determine random which individuals are being used for training /// 3. reading the individuals and storing them into instances[] /// 4. learning all the classifiers; 1 vs 1 learning method /// for each possible predict value, learn a classifier against all other /// possiblie predict value /// </summary> /// <param name="caseSet">The set of trainings examples</param> /// <param name="trainingParams">The parameters added to the algorithm</param> protected override void InsertCases(PushCaseSet caseSet, MiningParameterCollection trainingParams) { //parse the parameters parseParameters(trainingParams); //determine the target attribute TargetAttribute = getTargetAttribute(); //read the trainings examples and store them in the local cache MyCaseProcessor processor = new MyCaseProcessor(this); caseSet.StartCases(processor); Instances[] instances = processor.instances; //the local caches.. order by class //determine the individuals that will be used for learning. if (maximumInput != 0 && (int)MarginalStats.GetTotalCasesCount() - maximumInput > 0) { //filter the classes with no examples int[] scatter = new int[instances.Length]; int zeros = 0; for (int i = 0; i < scatter.Length; i++) { scatter[i] = instances[i].instances.Length; if (scatter[i] == 0) { zeros++; } } //the average amount of examples per class int average = (int)(maximumInput / (instances.Length - zeros)); //determine whether all examples are used for learning bool[] fullUse = new bool[instances.Length]; //the examples that will be fed to the SMO algorithm Instances[] newInstances = new Instances[instances.Length]; //the total number of trianing examples int total = 0; //for all different classes.. apply binary learning between all classes for (int i = 0; i < newInstances.Length; i++) { //create the trainingset newInstances[i] = new Instances(new Instance[] { }, instances[i].labels, instances[i].attributeSize); //randomize the examples instances[i].randomizeInstances(500); for (int j = 0; j < average && j < instances[i].instances.Length; j++) { newInstances[i].addInstance(instances[i].instances[j]); total++; } if (instances[i].instances.Length > average) { fullUse[i] = false; } else { fullUse[i] = true; } } instances = newInstances; } this.instances = instances; //determing the number of labels and //create a dataset for each label uint numberOfLabels = AttributeSet.GetAttributeStateCount(getTargetAttribute()); int[] labels = getLabels(); //for all combinations of labels //learn the classifier classifiers = new SMO.SMO[numberOfLabels][]; for (int i = 0; i < numberOfLabels; i++) { //create a new instance of the SMO algorithm classifiers[i] = new SMO.SMO[numberOfLabels]; //apply the binary learning for (int j = i + 1; j < numberOfLabels; j++) { //construct the data from the datasets and randomize their order Instances data = new Instances(new Instance[] { }, labels, instances[i].attributeSize); int a = 0; foreach (Instance instance in instances[i].instances) { data.addInstance(instance); a++; } int b = 0; foreach (Instance instance in instances[j].instances) { data.addInstance(instance); b++; } data.randomizeInstances(new Random().Next(500)); data.label = j; //create the kernel Kernel kernel = getKernel(data); if (kernel == null) { throw new ApplicationException("Kernel not found"); } //create and learn the classifier classifiers[i][j] = new SMO.SMO(); classifiers[i][j].C = C; classifiers[i][j].buildClassifier(data, i, j, kernel); } } }