public static /*List<int[,]>*/ IEnumerable <double> _GetDistOfClassifier(DataTable data)
        {
            //var knownValues = nilnul.data.tbl.col.cels.vals._DistinctX. Distinct(data, data.Columns.Count-1);
            var bag = new nilnul.txt.Bag1(
                nilnul.data.tbl.col.cels._ValsX._Txts_assumeIndexInRange(data, data.Columns.Count - 1)

                )
            ;
            var dist = nilnul.stat.dist_.finite._FroOccursX._Dbls_assumeTotalPositive(bag, nilnul.txt.Comp.Singleton);

            return(dist);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="data">assume : classEntropy not nil, attr entropy is not nil</param>
        /// <param name="candidateColIndex"></param>
        /// <param name="entropyOfDataset">the entropy of the whole dataset with respect of the classifier</param>
        /// <returns></returns>
        public static double GetGainRatioForCol(DataTable data, int candidateColIndex, double entropyOfDataset)
        {
            var totalRows = data.Rows.Count;
            var amountForDifferentValue = _GetDistByVal(data, candidateColIndex);

            var candidate_andClassStat = _GetClassStatByCandidateVal(data, candidateColIndex);

            //var dist = _GetDistOfCol(data, colIndex);
            var candidateStat = new nilnul.txt.Bag1();

            candidate_andClassStat.Select(kv => new KeyValuePair <string, Num1>(kv.Key, kv.Value.cardinality)).ForEach(
                x => candidateStat.Add(x.Key, x.Value)

                );


            var candidateDistribution = nilnul.stat.dist_.finite._FroBagX._ProbInVowedDbl_assumeTotalPositive(
                candidateStat

                );



            var candidateEntropy = nilnul.stat.dist_.finite._EntropyX.Dbl_ofAssumeDistribution(
                candidateDistribution.Values.Cast <ProbDbl>()
                );

            var eachCandidate_with_ClassEntropy = candidate_andClassStat.Select(
                d => new KeyValuePair <string, double>(
                    d.Key,

                    nilnul.stat.dist_.finite._EntropyX.Entropy_ofAssumeDistribution(
                        nilnul.stat.dist_.finite._FroBagX._ProbInDbl_assumeTotalPositive(
                            d.Value
                            ).Values.Cast <double>()
                        )
                    )
                );
            var conditionalEntropyOfClassOnCandidate = eachCandidate_with_ClassEntropy.Select(
                candidate => candidateDistribution[candidate.Key] //prob of candidate
                *
                candidate.Value                                   //entropy
                ).Sum();


            var stepsForCalculation = new List <double>();

            foreach (var item in amountForDifferentValue)
            {
                // helper for calculation
                var firstDivision  = item[0, 1] / (double)item[0, 0];
                var secondDivision = (item[0, 0] - item[0, 1]) / (double)item[0, 0];

                // prevent dividedByZeroException
                if (firstDivision == 0 || secondDivision == 0)
                {
                    stepsForCalculation.Add(0.0);
                }
                else
                {
                    stepsForCalculation.Add(-firstDivision * Math.Log(firstDivision, 2) - secondDivision * Math.Log(secondDivision, 2));
                }
            }



            /// to change to ratio
            var gain = stepsForCalculation.Select((t, i) => amountForDifferentValue[i][0, 0] / (double)totalRows * t).Sum();



            gain = entropyOfDataset - gain;

            var gain1 = entropyOfDataset - conditionalEntropyOfClassOnCandidate;


            var gainRatio = gain1 / candidateEntropy;

            return(gainRatio);


            return(gain);
        }
Ejemplo n.º 3
0
        public IEnumerable <(nilnul.data.mining._associater.Association <string>, double)> getRules(
            IEnumerable <Observation> observations
            )
        {
            var minSupport = (observations.Count() * _support);

            var itemCountS = new nilnul.txt.Bag1(
                observations.SelectMany(s => s)
                );

            var supportedItems = new nilnul.txt.Bag1(
                itemCountS.Where(x => (double)x.Value.en >= minSupport)
                );

            var frequentItemSetS = new nilnul.obj.Bag1 <IEnumerable <string> >(
                new NotNull2 <IEqualityComparer <IEnumerable <string> > >(
                    new nilnul.obj.str_.seq.Eq <string>()
                    )
                );

            supportedItems.Each(
                component =>
            {
                frequentItemSetS.add(
                    new[] { component.Key }
                    );
            }
                );

            var itemSetCardinality = 1;

            while (true)
            {
                var itemsInConsideration = new nilnul.txt.Set(frequentItemSetS.Keys.SelectMany(x => x));

                var newFreqItemSets = new nilnul.obj.Bag1 <IEnumerable <string> >(
                    new NotNull2 <IEqualityComparer <IEnumerable <string> > >(
                        new nilnul.obj.str_.seq.Eq <string>()
                        )
                    );

                itemSetCardinality++;

                observations.Each(
                    observation =>
                {
                    var intersected = nilnul.set.op_.binary_._IntersectX.Intersect(
                        itemsInConsideration
                        ,
                        observation
                        );

                    var combinated = nilnul.set.family.op_.of_.set_.combinate_._ByIndexsX._Cord_assumeDistinct(
                        intersected,
                        (itemSetCardinality)
                        );

                    combinated.Each(
                        combinatedInstance =>
                        newFreqItemSets.add(
                            combinatedInstance
                            )
                        );
                }
                    );
                newFreqItemSets.removeKeys_ofFinite(
                    newFreqItemSets.Where(x => (double)x.Value.en < minSupport).Select(y => y.Key).ToArray()
                    );

                if (newFreqItemSets.None())
                {
                    ///The algorithm gets terminated when the frequent itemsets cannot be extended further.
                    break;
                }
                else
                {
                    frequentItemSetS = newFreqItemSets;
                }
            }

            var rules = new List <(nilnul.data.mining._associater.Association <string>, double)>();

            ///now we get the frequent itemSetS.
            ///to extract rules from each set.
            ///
            foreach (var frequentSet in frequentItemSetS)
            {
                for (int i = 1 /*0*/; i < /*=*/ frequentSet.Key.Count(); i++)
                {
                    foreach (
                        var combinated in nilnul.set.family.op_.of_.set_.combinate_._ByIndexsX._Cord_assumeDistinct(
                            frequentSet.Key
                            ,
                            i
                            )
                        )
                    {
                        var complement =
                            frequentSet.Key.Except(combinated)
                        ;
                        rules.Add(
                            (
                                new mining._associater.Association <string>(
                                    combinated
                                    ,
                                    complement
                                    )
                                ,
                                nilnul.stat.dist_.finite_.multivar_.binary.observation.str._ConfidenceX.Confidence(
                                    observations.Select(s => new HashSet <string>(s))
                                    ,
                                    combinated,
                                    complement
                                    )
                            )
                            );
                    }
                }
            }
            ///now we get the ruleGrpS
            ///
            return(rules.Where(x => x.Item2 >= this._confidence));
        }