Exemple #1
0
        // generates frequent item set for k= 3+
        public List <FrequentItemSet> GenerateFrequentItemSet(List <FrequentItemSet> frequentItem, List <TransactionSet> hashedTransactionSet, int currentTupple, int support)
        {
            List <FrequentItemSet>  frequentitemSet         = new List <FrequentItemSet>();
            GenerateCandidateSet    gcs                     = new GenerateCandidateSet();
            List <CandidateItemSet> candidateItemSetAtLevel = gcs.GenerateCandidate(frequentItem, hashedTransactionSet, currentTupple, support);

            if (candidateItemSetAtLevel.Count() > 0)
            {
                foreach (var c in candidateItemSetAtLevel)
                {
                    int count = GetSupportCount(c, hashedTransactionSet, currentTupple);
                    if (count >= support)
                    {
                        FrequentItemSet fs = new FrequentItemSet();
                        fs.Values = new List <int>();
                        fs.Values.AddRange(c.Values);
                        fs.Count = count;
                        fs.Values.Sort();
                        frequentitemSet.Add(fs);
                    }
                }
            }

            return(frequentitemSet);
        }
Exemple #2
0
        /// <summary>
        /// Fp-growth procedure for specified tree and the conditional base pattern.
        /// </summary>
        /// <param name="tree">FP-tree</param>
        /// <param name="alpha">The cinditional base pattern</param>
        private void FpGrowth(FpTree <T> tree, IEnumerable <T> alpha)
        {
            if (tree.HasSinglePath())
            {
                var path = tree.GetSinglePath();

                var elements = path.Select(node => node.Value).ToList();

                var values = EnumerableHelper.GetSubsets(elements);
                foreach (var list in values)
                {
                    if (list.Count <= 0)
                    {
                        continue;
                    }

                    var support = int.MaxValue;
                    foreach (var node in path)
                    {
                        if (node.TransactionCounter < support)
                        {
                            support = node.TransactionCounter;
                        }
                    }

                    var itemSet = new List <T>(list);
                    itemSet.AddRange(alpha);
                    var frequentItemSet = new FrequentItemSet <T>(itemSet);
                    FrequentItemSets.Add(frequentItemSet, support);
                }
                return;
            }

            var sortedFrequencyList = tree.GetFrequencyList();

            foreach (var list in sortedFrequencyList)
            {
                foreach (var value in list.Value)
                {
                    var beta = new List <T>(alpha);
                    beta.Insert(0, value);
                    var patternBase = tree.GetCondPatternBase(value);
                    FrequentItemSets.Add(new FrequentItemSet <T>(beta), tree.GetSupport(value));

                    var baseGenerator = new CondPatternBaseGenerator <T>();
                    var database      = baseGenerator.Generate(patternBase, _minSupport);
                    if (database.Count > 0)
                    {
                        var newTree = new FpTree <T>();
                        newTree.BuildConditionalTree(database);
                        FpGrowth(newTree, beta);
                    }
                }
            }
        }
Exemple #3
0
        // generates frequent item set for k=2. Logic for K= is different as it doesnt need subset to be checked
        public List <FrequentItemSet> GenerateFrequentItemSet(List <HashMap> hashitems, List <TransactionSet> hashedTransactionSet, int currentTupple, int support)
        {
            List <FrequentItemSet>  fisATlevelk            = new List <FrequentItemSet>();
            GenerateCandidateSet    gcs                    = new GenerateCandidateSet();
            List <CandidateItemSet> frequentItemSetAtLevel = gcs.GenerateCandidate(hashitems, hashedTransactionSet, currentTupple, support);

            if (frequentItemSetAtLevel.Count > 0)
            {
                foreach (var c in frequentItemSetAtLevel)
                {
                    FrequentItemSet fs = new FrequentItemSet();
                    fs.Values = new List <int>();
                    fs.Values.AddRange(c.Values);
                    fs.Count         = c.Count;
                    fs.TransactionID = c.TransactionID;
                    fs.Values.Sort();
                    fisATlevelk.Add(fs);
                }
            }
            return(fisATlevelk);
        }
Exemple #4
0
        /// <summary>
        /// Generate the decision rules based on the specisied FP-tree and the minimal
        /// confidence.
        /// </summary>
        /// <param name="tree">FP-tree</param>
        /// <param name="minConfidence">The minimal confidence</param>
        /// <returns></returns>
        public List <DecisionRule <T> > GenerateRuleSet(FpTree <T> tree, double minConfidence)
        {
            FpGrowth(tree, new List <T>());

            var decisionRules = new List <DecisionRule <T> >();

            foreach (var frequentItemSet in FrequentItemSets.Keys)
            {
                if (frequentItemSet.ItemSet.Count < 2)
                {
                    continue;
                }

                var subSets = EnumerableHelper.GetSubsets(frequentItemSet.ItemSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet <T>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet <T>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <T> .SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (FrequentItemSets.ContainsKey(leftSide))
                        {
                            var confidence = (double)FrequentItemSets[frequentItemSet] / FrequentItemSets[leftSide];
                            if (confidence >= minConfidence)
                            {
                                var rule = new DecisionRule <T>(leftSide.ItemSet, rightSide.ItemSet, FrequentItemSets[frequentItemSet], confidence);
                                decisionRules.Add(rule);
                            }
                        }
                    }
                }
            }
            return(decisionRules);
        }
        public override void Run(ExecutionSettings executionSettings, bool printRules)
        {
            builder = new MsDataBuilder();
            var data = builder.BuildInstance(executionSettings);

            var frequentSets = data.Elements.Keys.Select(element => new List <int> {
                element
            }).AsParallel().ToList();

            frequentSets = frequentSets.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).AsParallel().ToList();
            var frequentItemSets = frequentSets.AsParallel().ToDictionary(set => new FrequentItemSet <int>(set),
                                                                          set => set.GetSupport(data.Transactions));
            List <List <int> > candidates;

            while ((candidates = GenerateCandidates(frequentSets)).Count > 0)
            {
                //! sprawdź czy któryś podzbiór k-1 elementowy kadydatów nie jest w frequentSets => wywal go!

                // leave only these sets which are frequent
                candidates =
                    candidates.Where(set => set.IsFrequentParallel(data.Transactions, executionSettings.MinSup)).AsParallel().ToList();

                if (candidates.Count > 0)
                {
                    frequentSets = candidates;
                    foreach (var candidate in candidates)
                    {
                        frequentItemSets.Add(new FrequentItemSet <int>(candidate), candidate.GetSupportParallel(data.Transactions));
                    }
                }
                else
                {
                    // we don't have any more candidates
                    break;
                }
            }

            //here we should do something with the candidates
            var decisionRules = new List <DecisionRule <int> >();

            foreach (var frequentSet in frequentSets)
            {
                var subSets = EnumerableHelper.GetSubsets(frequentSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet <int>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet <int>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <int> .SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (!frequentItemSets.ContainsKey(leftSide))
                        {
                            continue;
                        }
                        var confidence = (double)frequentItemSets[new FrequentItemSet <int>(frequentSet)] / frequentItemSets[leftSide];
                        if (confidence >= executionSettings.MinConf)
                        {
                            var rule = new DecisionRule <int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet <int>(frequentSet)], confidence);
                            decisionRules.Add(rule);
                        }
                    }
                }
            }

            if (!printRules)
            {
                return;
            }

            var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements);

            Console.WriteLine(result);
        }
        static void Main(string[] args)
        {
            Console.WriteLine("Please enter your text file path");
            String a = Console.ReadLine();

            Console.WriteLine("Please enter support count");
            string b = Console.ReadLine();

            Console.WriteLine("Please enter tuple length");
            String       tupp = Console.ReadLine();
            StreamReader input;

            if (File.Exists(a))
            {
                Console.WriteLine("Processing Started.....");
                TransactionSetGenerator ts               = new TransactionSetGenerator();
                GenerateFrequentSet     gfs              = new GenerateFrequentSet();
                List <FrequentItemSet>  fisATlevelk      = new List <FrequentItemSet>();
                List <FrequentItemSet>  finalFrequentSet = new List <FrequentItemSet>();
                int  supportCount       = Convert.ToInt32(b);
                int  targetTuppleLenght = Convert.ToInt32(tupp);
                int  currTuppleLenght   = 1;
                bool terminate          = false;

                // Read the input file
                input = File.OpenText(a);

                // break input file at each line end to identify the transactions
                List <string> source = input.ReadToEnd().Split(new string[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries).ToList();

                //This line generates string to interger hash. also gets the count of each individual item.
                //This represents itemSet 'I'
                List <HashMap> hashedUniqueItems = ts.GenerateHashMap(source);

                // This represents itemset 'T' all transactions
                List <TransactionSet> hashedTransactionSet = ts.GenerateHashedTransactionSet(hashedUniqueItems, source);

                //Initial pass. K=1 and gives items which has count> support count
                List <HashMap> frequentItemAtLevel1 = gfs.InitialFrequentItemSetK1(hashedUniqueItems, supportCount);

                // logic to loops till the candidate set is empty
                if (targetTuppleLenght == 1)
                {
                    if (frequentItemAtLevel1.Count() != 0)
                    {
                        List <FrequentItemSet> fisATlevel1 = new List <FrequentItemSet>();
                        foreach (var c in frequentItemAtLevel1)
                        {
                            FrequentItemSet fs = new FrequentItemSet();
                            fs.Values = new List <int>();
                            fs.Values.Add(c.Order);
                            fs.Count = c.Count;
                            fs.Values.Sort();
                            fisATlevel1.Add(fs);
                        }
                        //terminate = true;
                        finalFrequentSet.AddRange(fisATlevel1);
                    }
                }

                while (terminate != true)
                {
                    currTuppleLenght++;
                    if (currTuppleLenght == 2)
                    {
                        GenerateFrequentSet cg = new GenerateFrequentSet();
                        fisATlevelk = cg.GenerateFrequentItemSet(frequentItemAtLevel1, hashedTransactionSet, currTuppleLenght, supportCount);
                        if (fisATlevelk.Count == 0)
                        {
                            terminate = true;
                        }
                        if (currTuppleLenght >= targetTuppleLenght && fisATlevelk.Count != 0)
                        {
                            finalFrequentSet.AddRange(fisATlevelk);
                        }
                    }

                    else if (currTuppleLenght >= 2)
                    {
                        GenerateFrequentSet cg = new GenerateFrequentSet();
                        fisATlevelk = cg.GenerateFrequentItemSet(fisATlevelk, hashedTransactionSet, currTuppleLenght, supportCount);
                        if (fisATlevelk.Count == 0)
                        {
                            terminate = true;
                        }
                        if (currTuppleLenght >= targetTuppleLenght && fisATlevelk.Count != 0)
                        {
                            finalFrequentSet.AddRange(fisATlevelk);
                        }
                    }
                }

                // this section takes the final frequent set and writes it to a text file
                StringBuilder fileContents = new StringBuilder();
                foreach (var c in finalFrequentSet)
                {
                    StringBuilder final = new StringBuilder();
                    foreach (var str in c.Values)
                    {
                        string orderValue = hashedUniqueItems.Where(h => h.Order == str).Select(h => h.UniqueString).FirstOrDefault();
                        final.Append(orderValue).Append(" ");
                    }
                    final.Append("(").Append(c.Count).Append(")");
                    fileContents.AppendLine(final.ToString());
                }
                Console.WriteLine("Please enter your output text file path");
                String outputpath           = Console.ReadLine();
                System.IO.StreamWriter file = new System.IO.StreamWriter(outputpath);
                file.WriteLine(fileContents);
                file.Dispose();
            }
        }
Exemple #7
0
        public override void Run(ExecutionSettings executionSettings, bool printRules)
        {
            builder = new MsDataBuilder();
            var data                = builder.BuildInstance(executionSettings);
            var elementsList        = data.Elements.Keys.ToList();
            var transactionsList    = data.Transactions.Keys.ToList();
            var bitmapWrapper       = PrepareBitmapWrapper(data, elementsList, transactionsList);
            var elementsFrequencies = CalculateElementsFrequencies(bitmapWrapper);
            var frequentSets        = elementsList
                                      .Where(e => elementsFrequencies[elementsList.IndexOf(e)] >= executionSettings.MinSup * transactionsList.Count)
                                      .Select(element => new List <int> {
                element
            })
                                      .ToList();
            var frequentItemSets = frequentSets.ToDictionary(set => new FrequentItemSet <int>(set),
                                                             set => elementsFrequencies[elementsList.IndexOf(set[0])]);
            List <List <int> > candidates;

            if (frequentSets.Count == 0)
            {
                return;
            }

            var bitmapTransposed = new Bitmap(transactionsList.Count, frequentSets.Count);
            var newElementsList  = new List <int>(frequentSets.Count);
            var jj = 0;

            foreach (var set in frequentSets)
            {
                newElementsList.Add(set[0]);

                for (var i = 0; i < transactionsList.Count; i++)
                {
                    var pixel = bitmapWrapper.Bitmap.GetPixel(elementsList.IndexOf(set[0]), i);

                    bitmapTransposed.SetPixel(i, jj, pixel);
                }
                jj++;
            }
            var newBitmapWrapper = BitmapWrapper.ConvertBitmap(bitmapTransposed);

            while ((candidates = GenerateCandidates(frequentSets)).Count > 0)
            {
                // 1. tranlate into elements Id's
                foreach (var candidate in candidates)
                {
                    for (var i = 0; i < candidate.Count; i++)
                    {
                        candidate[i] = newElementsList.IndexOf(candidate[i]);
                    }
                }

                // 2. execute CUDA counting
                candidates = GetFrequentSets(candidates, executionSettings.MinSup, newBitmapWrapper, transactionsList.Count);

                // 3. translate back from elements Id's
                foreach (var candidate in candidates)
                {
                    for (var i = 0; i < candidate.Count; i++)
                    {
                        candidate[i] = newElementsList[candidate[i]];
                    }
                }

                if (candidates.Count > 0)
                {
                    var sw = new Stopwatch();
                    sw.Start();
                    frequentSets = candidates;
                    foreach (var candidate in candidates)
                    {
                        frequentItemSets.Add(new FrequentItemSet <int>(candidate), candidate.GetSupport(data.Transactions));
                    }
                    sw.Stop();
                    //Console.WriteLine("CAND: {0}", sw.ElapsedMilliseconds);
                }
                else
                {
                    // we don't have any more candidates
                    break;
                }
            }

            //here we should do something with the candidates
            var decisionRules = new List <DecisionRule <int> >();

            foreach (var frequentSet in frequentSets)
            {
                var subSets = EnumerableHelper.GetSubsets(frequentSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet <int>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet <int>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <int> .SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (frequentItemSets.ContainsKey(leftSide))
                        {
                            var confidence = (double)frequentItemSets[new FrequentItemSet <int>(frequentSet)] / frequentItemSets[leftSide];
                            if (confidence >= executionSettings.MinConf)
                            {
                                var rule = new DecisionRule <int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet <int>(frequentSet)], confidence);
                                decisionRules.Add(rule);
                            }
                        }
                    }
                }
            }

            if (!printRules)
            {
                return;
            }

            var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements);

            Console.WriteLine(result);
        }
Exemple #8
0
        public override void Run(ExecutionSettings executionSettings)
        {
            builder = new MsDataBuilder();
            var data = builder.BuildInstance(executionSettings);

            var frequentSets = data.Elements.Keys.Select(element => new List<int> { element }).ToList();

            frequentSets = frequentSets.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).ToList();
            var frequentItemSets = frequentSets.ToDictionary(set => new FrequentItemSet<int>(set),
                                                             set => set.GetSupport(data.Transactions));
            List<List<int>> candidates;

            while ((candidates = GenerateCandidates(frequentSets)).Count > 0)
            {
                //! sprawdź czy któryś podzbiór k-1 elementowy kadydatów nie jest w frequentSets => wywal go!

                // leave only these sets which are frequent
                candidates =
                    candidates.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).ToList();

                if (candidates.Count > 0)
                {
                    frequentSets = candidates;
                    foreach (var candidate in candidates)
                    {
                        frequentItemSets.Add(new FrequentItemSet<int>(candidate), candidate.GetSupport(data.Transactions));
                    }
                }
                else
                {
                    // we don't have any more candidates
                    break;
                }
            }

            //here we should do something with the candidates
            var decisionRules = new List<DecisionRule<int>>();

            foreach (var frequentSet in frequentSets)
            {
                var subSets = EnumerableHelper.GetSubsets(frequentSet);

                foreach (var t in subSets)
                {
                    var leftSide = new FrequentItemSet<int>(t);
                    for (var j = 0; j < subSets.Count; j++)
                    {
                        var rightSide = new FrequentItemSet<int>(subSets[j]);
                        if (rightSide.ItemSet.Count != 1 || !FrequentItemSet<int>.SetsSeparated(rightSide, leftSide))
                        {
                            continue;
                        }

                        if (frequentItemSets.ContainsKey(leftSide))
                        {
                            var confidence = (double)frequentItemSets[new FrequentItemSet<int>(frequentSet)] / frequentItemSets[leftSide];
                            if (confidence >= executionSettings.MinConf)
                            {
                                var rule = new DecisionRule<int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet<int>(frequentSet)], confidence);
                                decisionRules.Add(rule);
                            }
                        }
                    }
                }
            }

            var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements);
            Console.WriteLine(result);
        }