public ID3Node GetID3Node(IEnumerable<Tuple<List<String>, String>> data, List<int> UsedFeatures) { ID3Node node = new ID3Node(); node.TargetClassProbabilities = TargetClassProbabilities(data); node.Entropy = Entropy(node.TargetClassProbabilities); if (node.Entropy > 0) { node.DecisionVariableIndex = GetDecisionVariable(node, data, UsedFeatures); if (node.DecisionVariableIndex != -1) { List<String> DecisionVariableValues = ((from tuple in data select tuple.Item1[node.DecisionVariableIndex]).Distinct()).ToList<String>(); if (DecisionVariableValues.Count > 1) { node.Children = new Dictionary<string, ID3Node>(); UsedFeatures.Add(node.DecisionVariableIndex); foreach (String DecisionVariableValue in DecisionVariableValues) { var subset = from tuple in data where tuple.Item1[node.DecisionVariableIndex] == DecisionVariableValue select tuple; node.Children[DecisionVariableValue] = GetID3Node(subset,UsedFeatures); } UsedFeatures.Remove(node.DecisionVariableIndex); } } } return node; }
public int GetDecisionVariable(ID3Node node, IEnumerable<Tuple<List<String>, String>> data, List<int> UsedFeatures) { int NumFeatures = data.First().Item1.Count; if (UsedFeatures.Count < NumFeatures) { Dictionary<int, double> Gain = new Dictionary<int, double>(); for (int i = 0; i < NumFeatures; i++) { if (UsedFeatures.Contains(i)) continue; Gain[i] = InformationGain(node.Entropy, data, i); } double MaxGain = Gain.Values.Max(); foreach (int i in Gain.Keys) { if (Gain[i] == MaxGain) return i; } } return -1; }
public int GetDecisionVariable(ID3Node node, IEnumerable <Tuple <List <String>, String> > data, List <int> UsedFeatures) { int NumFeatures = data.First().Item1.Count; if (UsedFeatures.Count < NumFeatures) { Dictionary <int, double> Gain = new Dictionary <int, double>(); for (int i = 0; i < NumFeatures; i++) { if (UsedFeatures.Contains(i)) { continue; } Gain[i] = InformationGain(node.Entropy, data, i); } double MaxGain = Gain.Values.Max(); foreach (int i in Gain.Keys) { if (Gain[i] == MaxGain) { return(i); } } } return(-1); }
public ID3Node GetID3Node(IEnumerable <Tuple <List <String>, String> > data, List <int> UsedFeatures) { ID3Node node = new ID3Node(); node.TargetClassProbabilities = TargetClassProbabilities(data); node.Entropy = Entropy(node.TargetClassProbabilities); if (node.Entropy > 0) { node.DecisionVariableIndex = GetDecisionVariable(node, data, UsedFeatures); if (node.DecisionVariableIndex != -1) { List <String> DecisionVariableValues = ((from tuple in data select tuple.Item1[node.DecisionVariableIndex]).Distinct()).ToList <String>(); if (DecisionVariableValues.Count > 1) { node.Children = new Dictionary <string, ID3Node>(); UsedFeatures.Add(node.DecisionVariableIndex); foreach (String DecisionVariableValue in DecisionVariableValues) { var subset = from tuple in data where tuple.Item1[node.DecisionVariableIndex] == DecisionVariableValue select tuple; node.Children[DecisionVariableValue] = GetID3Node(subset, UsedFeatures); } UsedFeatures.Remove(node.DecisionVariableIndex); } } } return(node); }
public void Train(String TrainingDataFile, char FieldSeparator, bool ContainsHeader, int TargetVariablePositionZeroBased) { IEnumerable <Tuple <List <String>, String> > data = from lines in File.ReadAllLines(TrainingDataFile) .Skip(ContainsHeader ? 1 : 0) let columns = lines.Split(new[] { FieldSeparator }).ToList <String>() let target = columns[TargetVariablePositionZeroBased] let features = GetFeaturesAfterPruning(columns, TargetVariablePositionZeroBased) select new Tuple <List <String>, String>(features, target); List <int> UsedFeatures = new List <int>(); Root = GetID3Node(data, UsedFeatures); }
public void Train(String TrainingDataFile, char FieldSeparator, bool ContainsHeader, int TargetVariablePositionZeroBased) { IEnumerable<Tuple<List<String>, String>> data = from lines in File.ReadAllLines(TrainingDataFile) .Skip(ContainsHeader ? 1 : 0) let columns = lines.Split(new[] { FieldSeparator }).ToList<String>() let target = columns[TargetVariablePositionZeroBased] let features = GetFeaturesAfterPruning(columns, TargetVariablePositionZeroBased) select new Tuple<List<String>, String>(features, target); List<int> UsedFeatures = new List<int>(); Root = GetID3Node(data, UsedFeatures); }