Ejemplo n.º 1
0
        // This method is called by multiple threads to modify the same list of elements and is not synchronized.
        // However, it is thread safe due to mutation slicing: each invocation will modify exactly one index of the values
        public static void AssignProbabilities(DiscreteAttribute attribute, List <Record> elements, List <DiscreteAttribute> outliers = null)
        {
            var counts = new Dictionary <string /*value*/, double /*count*/>();

            Array.ForEach(attribute.Values, str => counts.Add(str, 0));
            foreach (var element in elements)
            {
                double count;
                if (counts.TryGetValue(element[attribute], out count))
                {
                    counts[element[attribute]] = count + 1;
                }
            }

            var picker = new ProbabilityPicker(ConvertToProbabilities(counts));

            if (!picker.IsSignificant() && outliers != null)
            {
                lock (outliers)
                    outliers.Add(attribute);
            }

            foreach (var element in elements)
            {
                if (!counts.ContainsKey(element[attribute]))
                {
                    element[attribute] = picker.Pick();
                }
            }
        }
Ejemplo n.º 2
0
        public static void AssignProbabilitiesByClass(DiscreteAttribute attribute, List <Record> trainingSet)
        {
            var groups = trainingSet.GroupBy(elem => elem.IsPositive);

            foreach (var group in groups)
            {
                AssignProbabilities(attribute, group.ToList(), new List <DiscreteAttribute>());
            }
        }
Ejemplo n.º 3
0
        static double CalculateRatio(List <Record> recordsSet, DiscreteAttribute header)
        {
            double result = 0.0d;

            foreach (var grouping in recordsSet.GroupBy(record => record[header]))
            {
                result = result + CalcPartEntropy(grouping.Count(), recordsSet.Count);
            }
            return(result);
        }
Ejemplo n.º 4
0
        static double CalculateGain(List <Record> samplesSet, DiscreteAttribute attribute)
        {
            double entropyAfter = 0;

            foreach (var @group in samplesSet.GroupBy(sample => sample[attribute]))
            {
                //if (attribute.Values.Contains(@group.Key))
                {
                    int groupTotals    = @group.Count();
                    int groupPositives = CountPositiveExamples(@group);
                    entropyAfter += CalculateEntropy(groupPositives, groupTotals - groupPositives) * groupTotals /
                                    samplesSet.Count;
                }
            }

            int    positives     = CountPositiveExamples(samplesSet);
            double entropyBefore = CalculateEntropy(positives, samplesSet.Count - positives);

            return(entropyBefore - entropyAfter);
        }
Ejemplo n.º 5
0
        public static DiscreteAttribute GetBestAttribute(List <Record> recordsSet, List <DiscreteAttribute> headersSet)
        {
            if (!headersSet.Any())
            {
                return(null);
            }

            double            bestGain      = 0;
            DiscreteAttribute bestAttribute = null;

            headersSet.ForEach(header =>
            {
                var gain = CalculateGain(recordsSet, header);
                if (gain > bestGain)
                {
                    bestGain      = gain;
                    bestAttribute = header;
                }
            });

            return(bestAttribute);
        }
Ejemplo n.º 6
0
        public TreeNode(TreeNode parent, string value, List <DiscreteAttribute> attributes, List <Record> records)
        {
            this.parent = parent;
            Value       = value;
            children    = new ConcurrentDictionary <string, TreeNode>();
            if (DecideTrue(records))
            {
                return;
            }
            if (DecideFalse(records))
            {
                return;
            }

            DecideWithProbability(records);

            splitAttribute = DecisionTree.GetBestAttribute(records, attributes);
            if (IsLeafNode())
            {
                return;
            }

            BuildChildNodes(attributes, records);
        }
Ejemplo n.º 7
0
Archivo: Record.cs Proyecto: volend/ML
 public string this[DiscreteAttribute attr]
 {
     get { return(this[attr.Index]); }
     set { this[attr.Index] = value; }
 }