Beispiel #1
0
 public IDataTableAnalysis GetAnalysis()
 {
     if (_analysis == null)
     {
         var analysis = new DataTableAnalysis(this);
         Process(analysis);
         _analysis = analysis;
     }
     return(_analysis);
 }
Beispiel #2
0
        public static NaiveBayes Train(IDataTable table)
        {
            // analyse the table to get the set of class values
            var classColumnIndex = table.TargetColumnIndex;
            var analysis         = new DataTableAnalysis(table, classColumnIndex);

            table.Process(analysis);

            var classInfo = analysis.ColumnInfo.Single();

            if (classInfo.DistinctValues == null)
            {
                throw new Exception("Too many class values");
            }

            // analyse the data per class
            var classBasedFrequency = classInfo.DistinctValues.Select(cv => Tuple.Create <string, IRowProcessor>(cv.ToString(), new FrequencyAnalysis(table, classColumnIndex)));
            var frequencyAnalysis   = new ClassificationBasedRowProcessor(classBasedFrequency, classColumnIndex);

            table.Process(frequencyAnalysis);

            // create the per-class summaries from the frequency table
            var classList = new List <NaiveBayes.ClassSummary>();

            foreach (var classSummary in frequencyAnalysis.All)
            {
                var classLabel = classSummary.Item1;
                var frequency  = (FrequencyAnalysis)classSummary.Item2;
                var columnList = new List <NaiveBayes.Column>();
                foreach (var column in frequency.ColumnInfo)
                {
                    var continuous = column as NumberCollector;
                    if (column is FrequencyCollector categorical)
                    {
                        var total = (double)categorical.Total;
                        if (total > 0)
                        {
                            var list = new List <NaiveBayes.CategorialProbability>();
                            foreach (var item in categorical.Frequency)
                            {
                                var categoryProbability = item.Value / total;
                                list.Add(new NaiveBayes.CategorialProbability {
                                    Category       = item.Key,
                                    LogProbability = Math.Log(categoryProbability),
                                    Probability    = categoryProbability
                                });
                            }
                            columnList.Add(new NaiveBayes.Column {
                                Type        = NaiveBayes.ColumnType.Categorical,
                                ColumnIndex = categorical.ColumnIndex,
                                Probability = list
                            });
                        }
                    }
                    else
                    {
                        var variance = continuous?.Variance;
                        if (variance != null)
                        {
                            var mean = continuous.Mean;
                            columnList.Add(new NaiveBayes.Column {
                                Type        = NaiveBayes.ColumnType.ContinuousGaussian,
                                ColumnIndex = continuous.ColumnIndex,
                                Mean        = mean,
                                Variance    = variance.Value
                            });
                        }
                    }
                }

                var probability = frequencyAnalysis.GetProbability(classLabel);
                classList.Add(new NaiveBayes.ClassSummary {
                    Label         = classLabel,
                    ColumnSummary = columnList,
                    LogPrior      = Math.Log(probability),
                    Prior         = probability
                });
            }

            return(new NaiveBayes {
                Class = classList
            });
        }