Пример #1
0
        // This method is called by multiple threads to modify the same list of elements and is not synchronized.
        // However, it is thread safe due to mutation slicing: each invocation will modify exactly one index of the values
        public static void AssignProbabilities(DiscreteAttribute attribute, List <Record> elements, List <DiscreteAttribute> outliers = null)
        {
            var counts = new Dictionary <string /*value*/, double /*count*/>();

            Array.ForEach(attribute.Values, str => counts.Add(str, 0));
            foreach (var element in elements)
            {
                double count;
                if (counts.TryGetValue(element[attribute], out count))
                {
                    counts[element[attribute]] = count + 1;
                }
            }

            var picker = new ProbabilityPicker(ConvertToProbabilities(counts));

            if (!picker.IsSignificant() && outliers != null)
            {
                lock (outliers)
                    outliers.Add(attribute);
            }

            foreach (var element in elements)
            {
                if (!counts.ContainsKey(element[attribute]))
                {
                    element[attribute] = picker.Pick();
                }
            }
        }
Пример #2
0
        public static void AssignProbabilitiesByClass(DiscreteAttribute attribute, List <Record> trainingSet)
        {
            var groups = trainingSet.GroupBy(elem => elem.IsPositive);

            foreach (var group in groups)
            {
                AssignProbabilities(attribute, group.ToList(), new List <DiscreteAttribute>());
            }
        }
Пример #3
0
        static double CalculateRatio(List <Record> recordsSet, DiscreteAttribute header)
        {
            double result = 0.0d;

            foreach (var grouping in recordsSet.GroupBy(record => record[header]))
            {
                result = result + CalcPartEntropy(grouping.Count(), recordsSet.Count);
            }
            return(result);
        }
Пример #4
0
        public static DiscreteAttribute GetBestAttribute(List <Record> recordsSet, List <DiscreteAttribute> headersSet)
        {
            if (!headersSet.Any())
            {
                return(null);
            }

            double            bestGain      = 0;
            DiscreteAttribute bestAttribute = null;

            Parallel.ForEach(headersSet.ToArray(), header =>
            {
                var gain = CalculateGain(recordsSet, header);
                //double ratio = CalculateRatio(recordsSet, header);
                if (gain.Item1)
                //if (true)
                {
                    lock (typeof(DecisionTree))
                    {
                        if (gain.Item2 > bestGain)
                        {
                            bestGain      = gain.Item2;
                            bestAttribute = header;
                        }
                    }
                }
                else
                {
                    lock (headersSet)
                    {
                        headersSet.Remove(header);
                    }
                }
            });

            return(bestAttribute);
        }
Пример #5
0
        public TreeNode(TreeNode parent, string value, List <DiscreteAttribute> attributes, List <Record> records, double fracCertainty)
        {
            this.parent = parent;
            Value       = value;
            children    = new ConcurrentDictionary <string, TreeNode>();
            if (DecideTrue(records))
            {
                return;
            }
            if (DecideFalse(records))
            {
                return;
            }

            DecideWithProbability(records);

            splitAttribute = DecisionTree.GetBestAttribute(records, attributes);
            if (IsLeafNode())
            {
                return;
            }

            BuildChildNodes(attributes, records, fracCertainty);
        }
Пример #6
0
 public string this[DiscreteAttribute attr]
 {
     get { return(this[attr.Index]); }
     set { this[attr.Index] = value; }
 }
Пример #7
0
        static Tuple <bool /*significant*/, double> CalculateGain(List <Record> samplesSet, DiscreteAttribute attribute)
        {
            double entropyAfter = 0;

            foreach (var @group in samplesSet.GroupBy(sample => sample[attribute]))
            {
                //if (attribute.Values.Contains(@group.Key))
                {
                    int groupTotals    = @group.Count();
                    int groupPositives = CountPositiveExamples(@group);
                    entropyAfter += CalculateEntropy(groupPositives, groupTotals - groupPositives) * groupTotals /
                                    samplesSet.Count;
                }
            }

            int    positives     = CountPositiveExamples(samplesSet);
            double entropyBefore = CalculateEntropy(positives, samplesSet.Count - positives);

            return(new Tuple <bool, double>(entropyAfter < entropyBefore, entropyBefore - entropyAfter));
        }