Esempio n. 1
0
        private void DetermineFeature()
        {
            Feature = InformationGains.OrderByDescending(x => x.Value).First().Key;

            if (InformationGains.All(x => x.Value == 0))
            {
                for (int i = 1; i < 67693; i++)
                {
                    if (!FeaturesTaken.Contains(i))
                    {
                        Feature = i;
                    }
                }
            }
        }
Esempio n. 2
0
        private void SetInformationGain()
        {
            //Counts(True Lable)(Feature Label) 67692
            Dictionary <int, double> Counts11 = new Dictionary <int, double>();
            Dictionary <int, double> Counts10 = new Dictionary <int, double>();
            Dictionary <int, double> Counts01 = new Dictionary <int, double>();
            Dictionary <int, double> Counts00 = new Dictionary <int, double>();
            double Positive_Labels            = 0;
            double Negative_Labels            = 0;

            foreach (var example in TrainingData)
            {
                if (example.Sign == 1)
                {
                    for (int i = 1; i < 67693; i++)
                    {
                        if (example.Vector.ContainsKey(i)) //This means that the feature is +1 and the true label is +1
                        {
                            if (Counts11.ContainsKey(i))
                            {
                                Counts11[i] = Counts11[i] + 1;
                            }
                            else
                            {
                                Counts11.Add(i, 1);
                            }
                        }
                        else //This means that the feature is -1 and the true label is +1
                        {
                            if (Counts10.ContainsKey(i))
                            {
                                Counts10[i] = Counts10[i] + 1;
                            }
                            else
                            {
                                Counts10.Add(i, 1);
                            }
                        }
                    }
                    Positive_Labels++;
                }
                else
                {
                    for (int i = 1; i < 67693; i++)
                    {
                        if (example.Vector.ContainsKey(i)) //This means that the feature is +1 and the true label is -1
                        {
                            if (Counts01.ContainsKey(i))
                            {
                                Counts01[i] = Counts01[i] + 1;
                            }
                            else
                            {
                                Counts01.Add(i, 1);
                            }
                        }
                        else //This means that the feature is -1 and the true label is -1
                        {
                            if (Counts00.ContainsKey(i))
                            {
                                Counts00[i] = Counts00[i] + 1;
                            }
                            else
                            {
                                Counts00.Add(i, 1);
                            }
                        }
                    }
                    Negative_Labels++;
                }
            }

            if (!Naive_Bayes) //Do Decision tree Stuff
            {
                for (int i = 1; i < 67693; i++)
                {
                    double PosLabel_PosFeature = Counts11.ContainsKey(i) ? Counts11[i] : 0;
                    double NegLabel_PosFeature = Counts01.ContainsKey(i) ? Counts01[i] : 0;
                    double PosLabel_NegFeature = Counts10.ContainsKey(i) ? Counts10[i] : 0;
                    double NegLabel_NegFeature = Counts00.ContainsKey(i) ? Counts00[i] : 0;
                    InformationGains.Add(i, CalculateInformationGain(Positive_Labels, Negative_Labels, PosLabel_PosFeature, NegLabel_PosFeature, PosLabel_NegFeature, NegLabel_NegFeature));
                }
            }
            #region Naive Bayes
            else // Do Naive Bayes Stuff
            {
                double Prob_Yes = Positive_Labels / TrainingData.Count;
                double Prob_No  = Negative_Labels / TrainingData.Count;
                //the Si is equal to 2, So i just put 2 there.
                double bottom_Pos = Positive_Labels + (2 * Smoothing_Term);
                double bottom_Neg = Negative_Labels + (2 * Smoothing_Term);
                for (int i = 1; i < 67693; i++)
                {
                    if (Counts11.ContainsKey(i))
                    {
                        Counts11[i] = (Counts11[i] + Smoothing_Term) / bottom_Pos;
                    }
                    else
                    {
                        Counts11.Add(i, Smoothing_Term / bottom_Pos);
                    }

                    if (Counts10.ContainsKey(i))
                    {
                        Counts10[i] = (Counts10[i] + Smoothing_Term) / bottom_Pos;
                    }
                    else
                    {
                        Counts10.Add(i, Smoothing_Term / bottom_Pos);
                    }

                    if (Counts01.ContainsKey(i))
                    {
                        Counts01[i] = (Counts01[i] + Smoothing_Term) / bottom_Neg;
                    }
                    else
                    {
                        Counts01.Add(i, Smoothing_Term / bottom_Pos);
                    }

                    if (Counts00.ContainsKey(i))
                    {
                        Counts00[i] = (Counts00[i] + Smoothing_Term) / bottom_Neg;
                    }
                    else
                    {
                        Counts00.Add(i, Smoothing_Term / bottom_Pos);
                    }
                }
                int correct_values = 0;
                int poss           = 0;
                int inff           = 0;
                foreach (var example in TrainingData)
                {
                    double Pos = Math.Log10(Prob_Yes); // * double.MaxValue * 1.5;
                    double Neg = Math.Log10(Prob_No);  // * double.MaxValue * 1.5;
                    for (int i = 1; i < 67693; i++)
                    {
                        if (example.Vector.ContainsKey(i))       // That means that feature value is 1
                        {
                            Pos = Pos + Math.Log10(Counts11[i]); // * 1.022;
                        }
                        else //That means the feature value is -1
                        {
                            Pos = Pos + Math.Log10(Counts10[i]); // * 1.022;
                        }
                        if (example.Vector.ContainsKey(i))       // That means that feature value is 1
                        {
                            Neg = Neg + Math.Log10(Counts01[i]); // * 1.022;
                        }
                        else //That means the feature value is -1
                        {
                            Neg = Neg + Math.Log10(Counts00[i]);// * 1.022;
                        }
                    }
                    int yguess;
                    if (Pos == 0)
                    {
                        poss++;
                    }
                    if (Neg == 0)
                    {
                        poss++;
                    }
                    if (double.IsInfinity(Pos))
                    {
                        inff++;
                    }
                    if (double.IsInfinity(Neg))
                    {
                        inff++;
                    }
                    if (Pos >= Neg)
                    {
                        yguess = 1;
                    }
                    else
                    {
                        yguess = -1;
                    }
                    Labels.Add(yguess);
                    if (yguess == example.Sign)
                    {
                        correct_values++;
                    }
                }
                //Console.WriteLine("0: \t" + poss);
                //Console.WriteLine("inf: \t" + inff);

                Accuracy = correct_values / Convert.ToDouble(TrainingData.Count);

                //Test Data
                correct_values = 0;
                foreach (var example in TestData)
                {
                    double Pos = Math.Log10(Prob_Yes);
                    double Neg = Math.Log10(Prob_No);
                    for (int i = 1; i < 67693; i++)
                    {
                        if (example.Vector.ContainsKey(i)) // That means that feature value is 1
                        {
                            Pos = Pos + Math.Log10(Counts11[i]);
                        }
                        else //That means the feature value is -1
                        {
                            Pos = Pos + Math.Log10(Counts10[i]);
                        }
                        if (example.Vector.ContainsKey(i)) // That means that feature value is 1
                        {
                            Neg = Neg + Math.Log10(Counts01[i]);
                        }
                        else //That means the feature value is -1
                        {
                            Neg = Neg + Math.Log10(Counts00[i]);
                        }
                    }
                    int yguess;
                    if (Pos >= Neg)
                    {
                        yguess = 1;
                    }
                    else
                    {
                        yguess = -1;
                    }
                    Test_Labels.Add(yguess);
                    if (yguess == example.Sign)
                    {
                        correct_values++;
                    }
                }
                Test_Accuracy = correct_values / Convert.ToDouble(TestData.Count);
            }
            #endregion
        }