public DecisionTreeImplementation(string[,] attributes, List <string> labels) { parent = null; BuildTree(attributes, labels); }
private void BuildTree(string[,] attributes, List <string> labels) { int numInstances = labels.Count; double nodeInformation = numInstances * CalcularEntropia(labels); this.majorityClass = ObtenerValorMasFrecuente(labels); if (nodeInformation == 0 || attributes.GetUpperBound(0) == -1 || attributes.GetUpperBound(1) == -1) { isLeaf = true; return; } int bestAttributeId = -1; string bestAttribute = ""; double bestInformationGain = -1; double bestGainRatio = -1; for (int X = 0; X < attributes.GetUpperBound(1) + 1; X++) { double conditionalInfo = 0; double attributeEntropy = 0; string[] valoresColumna = ObtenerValoresUnicos(attributes, X); double[] attributeCount = new double[valoresColumna.Length]; for (int Y = 0; Y < valoresColumna.Length; Y++) { List <int> ids = IdentificarOcurrencias(ObtenerValoresColumna(attributes, X), valoresColumna[Y]); // get ids of all instances for which attribute X == Y attributeCount[Y] = ids.Count; double entropiaCalculada = CalcularEntropia(ObtenerListaValoresFiltrada(labels, ids)); conditionalInfo += attributeCount[Y] * entropiaCalculada; } double attributeInformationGain = nodeInformation - conditionalInfo; double gainRatio = attributeInformationGain / CalcularEntropia(ObtenerValoresColumna(attributes, X)); if (gainRatio > bestGainRatio) { bestInformationGain = attributeInformationGain; bestGainRatio = gainRatio; bestAttribute = attributes[0, X]; bestAttributeId = X; } } // If no attribute provides andy gain, this node cannot be split further if (bestGainRatio == 0) { @isLeaf = true; return; } // Otherwise split by the best attribute this.bestAttributeId = bestAttributeId; this.bestAttribute = bestAttribute; this.nodeGainRatio = bestGainRatio; this.nodeInformationGain = bestInformationGain; string[] valoresMejorColumna = ObtenerValoresUnicos(attributes, bestAttributeId); for (int Y = 0; Y < valoresMejorColumna.Length; Y++) { List <string> valores = ObtenerValoresColumna(attributes, bestAttributeId); string valorEvaluar = valoresMejorColumna[Y]; List <int> ids = IdentificarOcurrencias(valores, valorEvaluar); //this.children.Add(valoresMejorColumna[Y], new DecisionTree(ObtenerMatrizValoresFiltrada(attributes, ids), ObtenerListaValoresFiltrada(labels, ids))); //this.children[valoresMejorColumna[Y]].parent = this; // NUEVO string[,] atributosNuevoArbol = EliminarColumna(attributes, bestAttributeId); atributosNuevoArbol = ObtenerMatrizValoresFiltrada(atributosNuevoArbol, ids); //List<string> nuevasEtiquetas = ObtenerListaValoresFiltrada(valores, ids); List <string> nuevasEtiquetas = ObtenerListaValoresFiltrada(labels, ids); this.children.Add(valoresMejorColumna[Y], new DecisionTreeImplementation(atributosNuevoArbol, nuevasEtiquetas)); this.children[valoresMejorColumna[Y]].parent = this; } return; }