public static void BulidTree(DecisionTreeNode currentNode, Dictionary <string, string> searchedColumns, List <DataRow> rows) { List <DataRow> tempRows = new List <DataRow>(rows); string decColumn = Dt.Columns[Dt.Columns.Count - 1].ColumnName; var decisions = tempRows.Select(d => d.Field <string>(decColumn)) .Distinct().ToList(); //Przefiltrowanie tablicy w zależności od poprzednich atrybutów foreach (var data in searchedColumns) { tempRows = tempRows.Where(i => (string)i[data.Key] == data.Value).ToList(); } //obliczanie I(X) var decisionCount = tempRows .GroupBy(row => row.Field <string>(decColumn)).ToDictionary(g => g.Key, g => g.Count()); NormalizeDictionary(decisionCount, decisions); double iValue = CalculateGain(decisionCount); //warunek stopu if (iValue == 0) { var decision = decisionCount.Where(i => i.Value != 0).Select(i => i.Key).FirstOrDefault(); currentNode.Attribute = decision; currentNode.IsDecision = true; return; } else if (GetColumnsWithoutLast().Count == searchedColumns.Count) { var decision = decisionCount.Where(i => i.Value == decisionCount.Max(x => x.Value)).Select(i => i.Key).FirstOrDefault(); currentNode.Attribute = decision; currentNode.IsDecision = true; return; } else { Dictionary <string, double> atrrGains = new Dictionary <string, double>(); //Wybór atrybutu foreach (string column in GetColumnsWithoutLast().Where(i => !searchedColumns.Keys.Contains(i))) { var values = rows.Select(d => d.Field <string>(column)) .Distinct(); List <GainData> gainWithTotalValues = new List <GainData>(); foreach (var value in values) { var decisionCountForAttributes = tempRows .Where(i => (string)i[column] == value) .GroupBy(row => row.Field <string>(decColumn)).ToDictionary(g => g.Key, g => g.Count()); NormalizeDictionary(decisionCountForAttributes, decisions); double maxCount = decisionCountForAttributes.Sum(i => i.Value); gainWithTotalValues.Add(new GainData { TotalValues = maxCount, Gain = CalculateGain(decisionCountForAttributes) }); } double totalGain = 0; foreach (var val in gainWithTotalValues) { totalGain += (val.TotalValues / gainWithTotalValues.Sum(i => i.TotalValues)) * val.Gain; } totalGain = iValue - totalGain; atrrGains.Add(column, totalGain); } if (atrrGains.Count() == 0) { } var bestAttribute = atrrGains.Where(i => i.Value == atrrGains.Max(x => x.Value)).FirstOrDefault().Key; currentNode.Attribute = bestAttribute; currentNode.IsDecision = false; foreach (var value in rows.Select(d => d.Field <string>(bestAttribute)) .Distinct()) { var tempSearched = new Dictionary <string, string>(searchedColumns); tempSearched.Add(bestAttribute, value); DecisionTreeNode child = new DecisionTreeNode(); currentNode.LinkedNodes.Add(value, child); BulidTree(child, tempSearched, rows); } } }
public static void BeginBulidTree() { DecisionTreeRoot = new DecisionTreeNode(); BulidTree(DecisionTreeRoot, new Dictionary <string, string>(), Dt.Rows.Cast <DataRow>().ToList()); }