Exemplo n.º 1
0
        /// <summary>
        /// Generate the decision rules based on the specisied FP-tree and the minimal
        /// confidence.
        /// </summary>
        /// <param name="tree">FP-tree</param>
        /// <param name="minConfidence">The minimal confidence</param>
        /// <returns></returns>
        public List <DecisionRule <T> > GenerateRuleSet(FpTree <T> tree, double minConfidence)
        {
            FpGrowth(tree, new List <T>());

            var decisionRules = new List <DecisionRule <T> >();

            foreach (var frequentItemSet in FrequentItemSets.Keys)
            {
                if (frequentItemSet.ItemSet.Count < 2)
                {
                    continue;
                }

                var subSets = EnumerableHelper.GetSubsets(frequentItemSet.ItemSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet <T>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet <T>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <T> .SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (FrequentItemSets.ContainsKey(leftSide))
                        {
                            var confidence = (double)FrequentItemSets[frequentItemSet] / FrequentItemSets[leftSide];
                            if (confidence >= minConfidence)
                            {
                                var rule = new DecisionRule <T>(leftSide.ItemSet, rightSide.ItemSet, FrequentItemSets[frequentItemSet], confidence);
                                decisionRules.Add(rule);
                            }
                        }
                    }
                }
            }
            return(decisionRules);
        }
        public override void Run(ExecutionSettings executionSettings, bool printRules)
        {
            builder = new MsDataBuilder();
            var data = builder.BuildInstance(executionSettings);

            var frequentSets = data.Elements.Keys.Select(element => new List <int> {
                element
            }).AsParallel().ToList();

            frequentSets = frequentSets.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).AsParallel().ToList();
            var frequentItemSets = frequentSets.AsParallel().ToDictionary(set => new FrequentItemSet <int>(set),
                                                                          set => set.GetSupport(data.Transactions));
            List <List <int> > candidates;

            while ((candidates = GenerateCandidates(frequentSets)).Count > 0)
            {
                //! sprawdź czy któryś podzbiór k-1 elementowy kadydatów nie jest w frequentSets => wywal go!

                // leave only these sets which are frequent
                candidates =
                    candidates.Where(set => set.IsFrequentParallel(data.Transactions, executionSettings.MinSup)).AsParallel().ToList();

                if (candidates.Count > 0)
                {
                    frequentSets = candidates;
                    foreach (var candidate in candidates)
                    {
                        frequentItemSets.Add(new FrequentItemSet <int>(candidate), candidate.GetSupportParallel(data.Transactions));
                    }
                }
                else
                {
                    // we don't have any more candidates
                    break;
                }
            }

            //here we should do something with the candidates
            var decisionRules = new List <DecisionRule <int> >();

            foreach (var frequentSet in frequentSets)
            {
                var subSets = EnumerableHelper.GetSubsets(frequentSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet <int>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet <int>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <int> .SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (!frequentItemSets.ContainsKey(leftSide))
                        {
                            continue;
                        }
                        var confidence = (double)frequentItemSets[new FrequentItemSet <int>(frequentSet)] / frequentItemSets[leftSide];
                        if (confidence >= executionSettings.MinConf)
                        {
                            var rule = new DecisionRule <int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet <int>(frequentSet)], confidence);
                            decisionRules.Add(rule);
                        }
                    }
                }
            }

            if (!printRules)
            {
                return;
            }

            var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements);

            Console.WriteLine(result);
        }
Exemplo n.º 3
0
        public override void Run(ExecutionSettings executionSettings, bool printRules)
        {
            builder = new MsDataBuilder();
            var data                = builder.BuildInstance(executionSettings);
            var elementsList        = data.Elements.Keys.ToList();
            var transactionsList    = data.Transactions.Keys.ToList();
            var bitmapWrapper       = PrepareBitmapWrapper(data, elementsList, transactionsList);
            var elementsFrequencies = CalculateElementsFrequencies(bitmapWrapper);
            var frequentSets        = elementsList
                                      .Where(e => elementsFrequencies[elementsList.IndexOf(e)] >= executionSettings.MinSup * transactionsList.Count)
                                      .Select(element => new List <int> {
                element
            })
                                      .ToList();
            var frequentItemSets = frequentSets.ToDictionary(set => new FrequentItemSet <int>(set),
                                                             set => elementsFrequencies[elementsList.IndexOf(set[0])]);
            List <List <int> > candidates;

            if (frequentSets.Count == 0)
            {
                return;
            }

            var bitmapTransposed = new Bitmap(transactionsList.Count, frequentSets.Count);
            var newElementsList  = new List <int>(frequentSets.Count);
            var jj = 0;

            foreach (var set in frequentSets)
            {
                newElementsList.Add(set[0]);

                for (var i = 0; i < transactionsList.Count; i++)
                {
                    var pixel = bitmapWrapper.Bitmap.GetPixel(elementsList.IndexOf(set[0]), i);

                    bitmapTransposed.SetPixel(i, jj, pixel);
                }
                jj++;
            }
            var newBitmapWrapper = BitmapWrapper.ConvertBitmap(bitmapTransposed);

            while ((candidates = GenerateCandidates(frequentSets)).Count > 0)
            {
                // 1. tranlate into elements Id's
                foreach (var candidate in candidates)
                {
                    for (var i = 0; i < candidate.Count; i++)
                    {
                        candidate[i] = newElementsList.IndexOf(candidate[i]);
                    }
                }

                // 2. execute CUDA counting
                candidates = GetFrequentSets(candidates, executionSettings.MinSup, newBitmapWrapper, transactionsList.Count);

                // 3. translate back from elements Id's
                foreach (var candidate in candidates)
                {
                    for (var i = 0; i < candidate.Count; i++)
                    {
                        candidate[i] = newElementsList[candidate[i]];
                    }
                }

                if (candidates.Count > 0)
                {
                    var sw = new Stopwatch();
                    sw.Start();
                    frequentSets = candidates;
                    foreach (var candidate in candidates)
                    {
                        frequentItemSets.Add(new FrequentItemSet <int>(candidate), candidate.GetSupport(data.Transactions));
                    }
                    sw.Stop();
                    //Console.WriteLine("CAND: {0}", sw.ElapsedMilliseconds);
                }
                else
                {
                    // we don't have any more candidates
                    break;
                }
            }

            //here we should do something with the candidates
            var decisionRules = new List <DecisionRule <int> >();

            foreach (var frequentSet in frequentSets)
            {
                var subSets = EnumerableHelper.GetSubsets(frequentSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet <int>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet <int>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <int> .SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (frequentItemSets.ContainsKey(leftSide))
                        {
                            var confidence = (double)frequentItemSets[new FrequentItemSet <int>(frequentSet)] / frequentItemSets[leftSide];
                            if (confidence >= executionSettings.MinConf)
                            {
                                var rule = new DecisionRule <int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet <int>(frequentSet)], confidence);
                                decisionRules.Add(rule);
                            }
                        }
                    }
                }
            }

            if (!printRules)
            {
                return;
            }

            var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements);

            Console.WriteLine(result);
        }