public static Tuple <double, double> ObtainProbabilityOfZeroAndOne(List <int> testValues, Dictionary <string, BucketCount> trainingEmails, double totalProbabilityOne) { double probabilityZero = 0, probabilityOne = 0; double totalProbabilityZero = 1 - totalProbabilityOne; for (int index = 0; index < testValues.Count; index++) { var testValue = testValues[index]; string key = NaiveBayesDataTransform.BuildKey(index, testValue); // Count all the times this word was spam BucketCount bucketCount; if (!trainingEmails.TryGetValue(key, out bucketCount)) { // laplace smoothing bucketCount = new BucketCount(); } const double smoothingNum = 0.1; // Feel free to change this # double probabilityOfExampleBeingZero = (1.0 * bucketCount.ZeroCount + smoothingNum) / (bucketCount.ZeroCount + bucketCount.OneCount + smoothingNum); double probabilityOfExampleBeingOne = (1.0 * bucketCount.OneCount + smoothingNum) / (bucketCount.ZeroCount + bucketCount.OneCount + smoothingNum); probabilityZero += Math.Log(probabilityOfExampleBeingZero); probabilityOne += Math.Log(probabilityOfExampleBeingOne); } probabilityZero += Math.Log(totalProbabilityZero); probabilityOne += Math.Log(totalProbabilityOne); return(new Tuple <double, double>(probabilityZero, probabilityOne)); }
public static Dictionary <string, BucketCount> CountSamples(List <DataSetValue> rows) { var bucketCounts = new Dictionary <string, BucketCount>(); for (int rowIndex = 0; rowIndex < rows.Count; rowIndex++) { var row = rows[rowIndex]; bool output = row.Output; for (int index = 0; index < row.Values.Count; index++) { var rowValue = row.Values[index]; string bucketKey = BuildKey(index, rowValue); BucketCount bucketCount; if (!bucketCounts.TryGetValue(bucketKey, out bucketCount)) { bucketCount = new BucketCount(); bucketCounts.Add(bucketKey, bucketCount); } bucketCount.Add(output); } } return(bucketCounts); }