private void DetermineFeature() { Feature = InformationGains.OrderByDescending(x => x.Value).First().Key; if (InformationGains.All(x => x.Value == 0)) { for (int i = 1; i < 67693; i++) { if (!FeaturesTaken.Contains(i)) { Feature = i; } } } }
private void SetInformationGain() { //Counts(True Lable)(Feature Label) 67692 Dictionary <int, double> Counts11 = new Dictionary <int, double>(); Dictionary <int, double> Counts10 = new Dictionary <int, double>(); Dictionary <int, double> Counts01 = new Dictionary <int, double>(); Dictionary <int, double> Counts00 = new Dictionary <int, double>(); double Positive_Labels = 0; double Negative_Labels = 0; foreach (var example in TrainingData) { if (example.Sign == 1) { for (int i = 1; i < 67693; i++) { if (example.Vector.ContainsKey(i)) //This means that the feature is +1 and the true label is +1 { if (Counts11.ContainsKey(i)) { Counts11[i] = Counts11[i] + 1; } else { Counts11.Add(i, 1); } } else //This means that the feature is -1 and the true label is +1 { if (Counts10.ContainsKey(i)) { Counts10[i] = Counts10[i] + 1; } else { Counts10.Add(i, 1); } } } Positive_Labels++; } else { for (int i = 1; i < 67693; i++) { if (example.Vector.ContainsKey(i)) //This means that the feature is +1 and the true label is -1 { if (Counts01.ContainsKey(i)) { Counts01[i] = Counts01[i] + 1; } else { Counts01.Add(i, 1); } } else //This means that the feature is -1 and the true label is -1 { if (Counts00.ContainsKey(i)) { Counts00[i] = Counts00[i] + 1; } else { Counts00.Add(i, 1); } } } Negative_Labels++; } } if (!Naive_Bayes) //Do Decision tree Stuff { for (int i = 1; i < 67693; i++) { double PosLabel_PosFeature = Counts11.ContainsKey(i) ? Counts11[i] : 0; double NegLabel_PosFeature = Counts01.ContainsKey(i) ? Counts01[i] : 0; double PosLabel_NegFeature = Counts10.ContainsKey(i) ? Counts10[i] : 0; double NegLabel_NegFeature = Counts00.ContainsKey(i) ? Counts00[i] : 0; InformationGains.Add(i, CalculateInformationGain(Positive_Labels, Negative_Labels, PosLabel_PosFeature, NegLabel_PosFeature, PosLabel_NegFeature, NegLabel_NegFeature)); } } #region Naive Bayes else // Do Naive Bayes Stuff { double Prob_Yes = Positive_Labels / TrainingData.Count; double Prob_No = Negative_Labels / TrainingData.Count; //the Si is equal to 2, So i just put 2 there. double bottom_Pos = Positive_Labels + (2 * Smoothing_Term); double bottom_Neg = Negative_Labels + (2 * Smoothing_Term); for (int i = 1; i < 67693; i++) { if (Counts11.ContainsKey(i)) { Counts11[i] = (Counts11[i] + Smoothing_Term) / bottom_Pos; } else { Counts11.Add(i, Smoothing_Term / bottom_Pos); } if (Counts10.ContainsKey(i)) { Counts10[i] = (Counts10[i] + Smoothing_Term) / bottom_Pos; } else { Counts10.Add(i, Smoothing_Term / bottom_Pos); } if (Counts01.ContainsKey(i)) { Counts01[i] = (Counts01[i] + Smoothing_Term) / bottom_Neg; } else { Counts01.Add(i, Smoothing_Term / bottom_Pos); } if (Counts00.ContainsKey(i)) { Counts00[i] = (Counts00[i] + Smoothing_Term) / bottom_Neg; } else { Counts00.Add(i, Smoothing_Term / bottom_Pos); } } int correct_values = 0; int poss = 0; int inff = 0; foreach (var example in TrainingData) { double Pos = Math.Log10(Prob_Yes); // * double.MaxValue * 1.5; double Neg = Math.Log10(Prob_No); // * double.MaxValue * 1.5; for (int i = 1; i < 67693; i++) { if (example.Vector.ContainsKey(i)) // That means that feature value is 1 { Pos = Pos + Math.Log10(Counts11[i]); // * 1.022; } else //That means the feature value is -1 { Pos = Pos + Math.Log10(Counts10[i]); // * 1.022; } if (example.Vector.ContainsKey(i)) // That means that feature value is 1 { Neg = Neg + Math.Log10(Counts01[i]); // * 1.022; } else //That means the feature value is -1 { Neg = Neg + Math.Log10(Counts00[i]);// * 1.022; } } int yguess; if (Pos == 0) { poss++; } if (Neg == 0) { poss++; } if (double.IsInfinity(Pos)) { inff++; } if (double.IsInfinity(Neg)) { inff++; } if (Pos >= Neg) { yguess = 1; } else { yguess = -1; } Labels.Add(yguess); if (yguess == example.Sign) { correct_values++; } } //Console.WriteLine("0: \t" + poss); //Console.WriteLine("inf: \t" + inff); Accuracy = correct_values / Convert.ToDouble(TrainingData.Count); //Test Data correct_values = 0; foreach (var example in TestData) { double Pos = Math.Log10(Prob_Yes); double Neg = Math.Log10(Prob_No); for (int i = 1; i < 67693; i++) { if (example.Vector.ContainsKey(i)) // That means that feature value is 1 { Pos = Pos + Math.Log10(Counts11[i]); } else //That means the feature value is -1 { Pos = Pos + Math.Log10(Counts10[i]); } if (example.Vector.ContainsKey(i)) // That means that feature value is 1 { Neg = Neg + Math.Log10(Counts01[i]); } else //That means the feature value is -1 { Neg = Neg + Math.Log10(Counts00[i]); } } int yguess; if (Pos >= Neg) { yguess = 1; } else { yguess = -1; } Test_Labels.Add(yguess); if (yguess == example.Sign) { correct_values++; } } Test_Accuracy = correct_values / Convert.ToDouble(TestData.Count); } #endregion }