// generates frequent item set for k= 3+ public List <FrequentItemSet> GenerateFrequentItemSet(List <FrequentItemSet> frequentItem, List <TransactionSet> hashedTransactionSet, int currentTupple, int support) { List <FrequentItemSet> frequentitemSet = new List <FrequentItemSet>(); GenerateCandidateSet gcs = new GenerateCandidateSet(); List <CandidateItemSet> candidateItemSetAtLevel = gcs.GenerateCandidate(frequentItem, hashedTransactionSet, currentTupple, support); if (candidateItemSetAtLevel.Count() > 0) { foreach (var c in candidateItemSetAtLevel) { int count = GetSupportCount(c, hashedTransactionSet, currentTupple); if (count >= support) { FrequentItemSet fs = new FrequentItemSet(); fs.Values = new List <int>(); fs.Values.AddRange(c.Values); fs.Count = count; fs.Values.Sort(); frequentitemSet.Add(fs); } } } return(frequentitemSet); }
/// <summary> /// Fp-growth procedure for specified tree and the conditional base pattern. /// </summary> /// <param name="tree">FP-tree</param> /// <param name="alpha">The cinditional base pattern</param> private void FpGrowth(FpTree <T> tree, IEnumerable <T> alpha) { if (tree.HasSinglePath()) { var path = tree.GetSinglePath(); var elements = path.Select(node => node.Value).ToList(); var values = EnumerableHelper.GetSubsets(elements); foreach (var list in values) { if (list.Count <= 0) { continue; } var support = int.MaxValue; foreach (var node in path) { if (node.TransactionCounter < support) { support = node.TransactionCounter; } } var itemSet = new List <T>(list); itemSet.AddRange(alpha); var frequentItemSet = new FrequentItemSet <T>(itemSet); FrequentItemSets.Add(frequentItemSet, support); } return; } var sortedFrequencyList = tree.GetFrequencyList(); foreach (var list in sortedFrequencyList) { foreach (var value in list.Value) { var beta = new List <T>(alpha); beta.Insert(0, value); var patternBase = tree.GetCondPatternBase(value); FrequentItemSets.Add(new FrequentItemSet <T>(beta), tree.GetSupport(value)); var baseGenerator = new CondPatternBaseGenerator <T>(); var database = baseGenerator.Generate(patternBase, _minSupport); if (database.Count > 0) { var newTree = new FpTree <T>(); newTree.BuildConditionalTree(database); FpGrowth(newTree, beta); } } } }
// generates frequent item set for k=2. Logic for K= is different as it doesnt need subset to be checked public List <FrequentItemSet> GenerateFrequentItemSet(List <HashMap> hashitems, List <TransactionSet> hashedTransactionSet, int currentTupple, int support) { List <FrequentItemSet> fisATlevelk = new List <FrequentItemSet>(); GenerateCandidateSet gcs = new GenerateCandidateSet(); List <CandidateItemSet> frequentItemSetAtLevel = gcs.GenerateCandidate(hashitems, hashedTransactionSet, currentTupple, support); if (frequentItemSetAtLevel.Count > 0) { foreach (var c in frequentItemSetAtLevel) { FrequentItemSet fs = new FrequentItemSet(); fs.Values = new List <int>(); fs.Values.AddRange(c.Values); fs.Count = c.Count; fs.TransactionID = c.TransactionID; fs.Values.Sort(); fisATlevelk.Add(fs); } } return(fisATlevelk); }
/// <summary> /// Generate the decision rules based on the specisied FP-tree and the minimal /// confidence. /// </summary> /// <param name="tree">FP-tree</param> /// <param name="minConfidence">The minimal confidence</param> /// <returns></returns> public List <DecisionRule <T> > GenerateRuleSet(FpTree <T> tree, double minConfidence) { FpGrowth(tree, new List <T>()); var decisionRules = new List <DecisionRule <T> >(); foreach (var frequentItemSet in FrequentItemSets.Keys) { if (frequentItemSet.ItemSet.Count < 2) { continue; } var subSets = EnumerableHelper.GetSubsets(frequentItemSet.ItemSet); foreach (var t in subSets) { var leftSide = new FrequentItemSet <T>(t); for (var j = 0; j < subSets.Count; j++) { var rightSide = new FrequentItemSet <T>(subSets[j]); if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <T> .SetsSeparated(rightSide, leftSide)) { continue; } if (FrequentItemSets.ContainsKey(leftSide)) { var confidence = (double)FrequentItemSets[frequentItemSet] / FrequentItemSets[leftSide]; if (confidence >= minConfidence) { var rule = new DecisionRule <T>(leftSide.ItemSet, rightSide.ItemSet, FrequentItemSets[frequentItemSet], confidence); decisionRules.Add(rule); } } } } } return(decisionRules); }
public override void Run(ExecutionSettings executionSettings, bool printRules) { builder = new MsDataBuilder(); var data = builder.BuildInstance(executionSettings); var frequentSets = data.Elements.Keys.Select(element => new List <int> { element }).AsParallel().ToList(); frequentSets = frequentSets.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).AsParallel().ToList(); var frequentItemSets = frequentSets.AsParallel().ToDictionary(set => new FrequentItemSet <int>(set), set => set.GetSupport(data.Transactions)); List <List <int> > candidates; while ((candidates = GenerateCandidates(frequentSets)).Count > 0) { //! sprawdź czy któryś podzbiór k-1 elementowy kadydatów nie jest w frequentSets => wywal go! // leave only these sets which are frequent candidates = candidates.Where(set => set.IsFrequentParallel(data.Transactions, executionSettings.MinSup)).AsParallel().ToList(); if (candidates.Count > 0) { frequentSets = candidates; foreach (var candidate in candidates) { frequentItemSets.Add(new FrequentItemSet <int>(candidate), candidate.GetSupportParallel(data.Transactions)); } } else { // we don't have any more candidates break; } } //here we should do something with the candidates var decisionRules = new List <DecisionRule <int> >(); foreach (var frequentSet in frequentSets) { var subSets = EnumerableHelper.GetSubsets(frequentSet); foreach (var t in subSets) { var leftSide = new FrequentItemSet <int>(t); for (var j = 0; j < subSets.Count; j++) { var rightSide = new FrequentItemSet <int>(subSets[j]); if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <int> .SetsSeparated(rightSide, leftSide)) { continue; } if (!frequentItemSets.ContainsKey(leftSide)) { continue; } var confidence = (double)frequentItemSets[new FrequentItemSet <int>(frequentSet)] / frequentItemSets[leftSide]; if (confidence >= executionSettings.MinConf) { var rule = new DecisionRule <int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet <int>(frequentSet)], confidence); decisionRules.Add(rule); } } } } if (!printRules) { return; } var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements); Console.WriteLine(result); }
static void Main(string[] args) { Console.WriteLine("Please enter your text file path"); String a = Console.ReadLine(); Console.WriteLine("Please enter support count"); string b = Console.ReadLine(); Console.WriteLine("Please enter tuple length"); String tupp = Console.ReadLine(); StreamReader input; if (File.Exists(a)) { Console.WriteLine("Processing Started....."); TransactionSetGenerator ts = new TransactionSetGenerator(); GenerateFrequentSet gfs = new GenerateFrequentSet(); List <FrequentItemSet> fisATlevelk = new List <FrequentItemSet>(); List <FrequentItemSet> finalFrequentSet = new List <FrequentItemSet>(); int supportCount = Convert.ToInt32(b); int targetTuppleLenght = Convert.ToInt32(tupp); int currTuppleLenght = 1; bool terminate = false; // Read the input file input = File.OpenText(a); // break input file at each line end to identify the transactions List <string> source = input.ReadToEnd().Split(new string[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries).ToList(); //This line generates string to interger hash. also gets the count of each individual item. //This represents itemSet 'I' List <HashMap> hashedUniqueItems = ts.GenerateHashMap(source); // This represents itemset 'T' all transactions List <TransactionSet> hashedTransactionSet = ts.GenerateHashedTransactionSet(hashedUniqueItems, source); //Initial pass. K=1 and gives items which has count> support count List <HashMap> frequentItemAtLevel1 = gfs.InitialFrequentItemSetK1(hashedUniqueItems, supportCount); // logic to loops till the candidate set is empty if (targetTuppleLenght == 1) { if (frequentItemAtLevel1.Count() != 0) { List <FrequentItemSet> fisATlevel1 = new List <FrequentItemSet>(); foreach (var c in frequentItemAtLevel1) { FrequentItemSet fs = new FrequentItemSet(); fs.Values = new List <int>(); fs.Values.Add(c.Order); fs.Count = c.Count; fs.Values.Sort(); fisATlevel1.Add(fs); } //terminate = true; finalFrequentSet.AddRange(fisATlevel1); } } while (terminate != true) { currTuppleLenght++; if (currTuppleLenght == 2) { GenerateFrequentSet cg = new GenerateFrequentSet(); fisATlevelk = cg.GenerateFrequentItemSet(frequentItemAtLevel1, hashedTransactionSet, currTuppleLenght, supportCount); if (fisATlevelk.Count == 0) { terminate = true; } if (currTuppleLenght >= targetTuppleLenght && fisATlevelk.Count != 0) { finalFrequentSet.AddRange(fisATlevelk); } } else if (currTuppleLenght >= 2) { GenerateFrequentSet cg = new GenerateFrequentSet(); fisATlevelk = cg.GenerateFrequentItemSet(fisATlevelk, hashedTransactionSet, currTuppleLenght, supportCount); if (fisATlevelk.Count == 0) { terminate = true; } if (currTuppleLenght >= targetTuppleLenght && fisATlevelk.Count != 0) { finalFrequentSet.AddRange(fisATlevelk); } } } // this section takes the final frequent set and writes it to a text file StringBuilder fileContents = new StringBuilder(); foreach (var c in finalFrequentSet) { StringBuilder final = new StringBuilder(); foreach (var str in c.Values) { string orderValue = hashedUniqueItems.Where(h => h.Order == str).Select(h => h.UniqueString).FirstOrDefault(); final.Append(orderValue).Append(" "); } final.Append("(").Append(c.Count).Append(")"); fileContents.AppendLine(final.ToString()); } Console.WriteLine("Please enter your output text file path"); String outputpath = Console.ReadLine(); System.IO.StreamWriter file = new System.IO.StreamWriter(outputpath); file.WriteLine(fileContents); file.Dispose(); } }
public override void Run(ExecutionSettings executionSettings, bool printRules) { builder = new MsDataBuilder(); var data = builder.BuildInstance(executionSettings); var elementsList = data.Elements.Keys.ToList(); var transactionsList = data.Transactions.Keys.ToList(); var bitmapWrapper = PrepareBitmapWrapper(data, elementsList, transactionsList); var elementsFrequencies = CalculateElementsFrequencies(bitmapWrapper); var frequentSets = elementsList .Where(e => elementsFrequencies[elementsList.IndexOf(e)] >= executionSettings.MinSup * transactionsList.Count) .Select(element => new List <int> { element }) .ToList(); var frequentItemSets = frequentSets.ToDictionary(set => new FrequentItemSet <int>(set), set => elementsFrequencies[elementsList.IndexOf(set[0])]); List <List <int> > candidates; if (frequentSets.Count == 0) { return; } var bitmapTransposed = new Bitmap(transactionsList.Count, frequentSets.Count); var newElementsList = new List <int>(frequentSets.Count); var jj = 0; foreach (var set in frequentSets) { newElementsList.Add(set[0]); for (var i = 0; i < transactionsList.Count; i++) { var pixel = bitmapWrapper.Bitmap.GetPixel(elementsList.IndexOf(set[0]), i); bitmapTransposed.SetPixel(i, jj, pixel); } jj++; } var newBitmapWrapper = BitmapWrapper.ConvertBitmap(bitmapTransposed); while ((candidates = GenerateCandidates(frequentSets)).Count > 0) { // 1. tranlate into elements Id's foreach (var candidate in candidates) { for (var i = 0; i < candidate.Count; i++) { candidate[i] = newElementsList.IndexOf(candidate[i]); } } // 2. execute CUDA counting candidates = GetFrequentSets(candidates, executionSettings.MinSup, newBitmapWrapper, transactionsList.Count); // 3. translate back from elements Id's foreach (var candidate in candidates) { for (var i = 0; i < candidate.Count; i++) { candidate[i] = newElementsList[candidate[i]]; } } if (candidates.Count > 0) { var sw = new Stopwatch(); sw.Start(); frequentSets = candidates; foreach (var candidate in candidates) { frequentItemSets.Add(new FrequentItemSet <int>(candidate), candidate.GetSupport(data.Transactions)); } sw.Stop(); //Console.WriteLine("CAND: {0}", sw.ElapsedMilliseconds); } else { // we don't have any more candidates break; } } //here we should do something with the candidates var decisionRules = new List <DecisionRule <int> >(); foreach (var frequentSet in frequentSets) { var subSets = EnumerableHelper.GetSubsets(frequentSet); foreach (var t in subSets) { var leftSide = new FrequentItemSet <int>(t); for (var j = 0; j < subSets.Count; j++) { var rightSide = new FrequentItemSet <int>(subSets[j]); if (rightSide.ItemSet.Count != 1 || !FrequentItemSet <int> .SetsSeparated(rightSide, leftSide)) { continue; } if (frequentItemSets.ContainsKey(leftSide)) { var confidence = (double)frequentItemSets[new FrequentItemSet <int>(frequentSet)] / frequentItemSets[leftSide]; if (confidence >= executionSettings.MinConf) { var rule = new DecisionRule <int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet <int>(frequentSet)], confidence); decisionRules.Add(rule); } } } } } if (!printRules) { return; } var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements); Console.WriteLine(result); }
public override void Run(ExecutionSettings executionSettings) { builder = new MsDataBuilder(); var data = builder.BuildInstance(executionSettings); var frequentSets = data.Elements.Keys.Select(element => new List<int> { element }).ToList(); frequentSets = frequentSets.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).ToList(); var frequentItemSets = frequentSets.ToDictionary(set => new FrequentItemSet<int>(set), set => set.GetSupport(data.Transactions)); List<List<int>> candidates; while ((candidates = GenerateCandidates(frequentSets)).Count > 0) { //! sprawdź czy któryś podzbiór k-1 elementowy kadydatów nie jest w frequentSets => wywal go! // leave only these sets which are frequent candidates = candidates.Where(set => set.IsFrequent(data.Transactions, executionSettings.MinSup)).ToList(); if (candidates.Count > 0) { frequentSets = candidates; foreach (var candidate in candidates) { frequentItemSets.Add(new FrequentItemSet<int>(candidate), candidate.GetSupport(data.Transactions)); } } else { // we don't have any more candidates break; } } //here we should do something with the candidates var decisionRules = new List<DecisionRule<int>>(); foreach (var frequentSet in frequentSets) { var subSets = EnumerableHelper.GetSubsets(frequentSet); foreach (var t in subSets) { var leftSide = new FrequentItemSet<int>(t); for (var j = 0; j < subSets.Count; j++) { var rightSide = new FrequentItemSet<int>(subSets[j]); if (rightSide.ItemSet.Count != 1 || !FrequentItemSet<int>.SetsSeparated(rightSide, leftSide)) { continue; } if (frequentItemSets.ContainsKey(leftSide)) { var confidence = (double)frequentItemSets[new FrequentItemSet<int>(frequentSet)] / frequentItemSets[leftSide]; if (confidence >= executionSettings.MinConf) { var rule = new DecisionRule<int>(leftSide.ItemSet, rightSide.ItemSet, frequentItemSets[new FrequentItemSet<int>(frequentSet)], confidence); decisionRules.Add(rule); } } } } } var result = PrintRules(decisionRules, executionSettings.DataSourcePath, executionSettings.MinSup, executionSettings.MinConf, data.Transactions.Keys.Count, data.Elements); Console.WriteLine(result); }