public static Tuple <double, double> ObtainProbabilityOfZeroAndOne(List <int> testValues, Dictionary <string, BucketCount> trainingEmails, double totalProbabilityOne)
        {
            double probabilityZero = 0, probabilityOne = 0;
            double totalProbabilityZero = 1 - totalProbabilityOne;

            for (int index = 0; index < testValues.Count; index++)
            {
                var    testValue = testValues[index];
                string key       = NaiveBayesDataTransform.BuildKey(index, testValue);
                // Count all the times this word was spam
                BucketCount bucketCount;
                if (!trainingEmails.TryGetValue(key, out bucketCount))
                {
                    // laplace smoothing
                    bucketCount = new BucketCount();
                }

                const double smoothingNum = 0.1;                 // Feel free to change this #

                double probabilityOfExampleBeingZero = (1.0 * bucketCount.ZeroCount + smoothingNum) /
                                                       (bucketCount.ZeroCount + bucketCount.OneCount + smoothingNum);
                double probabilityOfExampleBeingOne = (1.0 * bucketCount.OneCount + smoothingNum) /
                                                      (bucketCount.ZeroCount + bucketCount.OneCount + smoothingNum);

                probabilityZero += Math.Log(probabilityOfExampleBeingZero);
                probabilityOne  += Math.Log(probabilityOfExampleBeingOne);
            }

            probabilityZero += Math.Log(totalProbabilityZero);
            probabilityOne  += Math.Log(totalProbabilityOne);

            return(new Tuple <double, double>(probabilityZero, probabilityOne));
        }
Esempio n. 2
0
        public static Dictionary <string, BucketCount> CountSamples(List <DataSetValue> rows)
        {
            var bucketCounts = new Dictionary <string, BucketCount>();

            for (int rowIndex = 0; rowIndex < rows.Count; rowIndex++)
            {
                var row = rows[rowIndex];

                bool output = row.Output;
                for (int index = 0; index < row.Values.Count; index++)
                {
                    var    rowValue  = row.Values[index];
                    string bucketKey = BuildKey(index, rowValue);

                    BucketCount bucketCount;
                    if (!bucketCounts.TryGetValue(bucketKey, out bucketCount))
                    {
                        bucketCount = new BucketCount();
                        bucketCounts.Add(bucketKey, bucketCount);
                    }
                    bucketCount.Add(output);
                }
            }

            return(bucketCounts);
        }