public ISimpleItemset GetItemset(Itemset i) { //return (Itemset)dualStruct[i]; return(dualStruct[i.GetItem(0), i.GetItem(1)]); }
void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, int maxGroupLength, MineResults mineResult) { Itemset newHead; List <int> newTail; if (head.Count >= maxLength) { return; } for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; if (head.Count == 0) { newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(); newHead.AddItem(i); newHead.itemGroup[newHead.Count - 1] = 1; newHead.groupLength = 1; RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult); } else { int currentItemGroup = head.itemGroup[head.Count - 1]; for (int loopGroups = 0; loopGroups < 2; loopGroups++) { // Don't try to add the same item to the same group twice if ((loopGroups == 0) && (i < head.GetItem(head.Count - 1))) { continue; } // Don't create too long group if ((loopGroups == 0) && (head.groupLength >= maxGroupLength)) { continue; } // Add 0 / 1 depends on the loop int newItemGroup = currentItemGroup + loopGroups; FastSparseBitArray bitArray = null; // Perform AND with all previous group members - calc support for (int loopHead = 0; loopHead < head.Count; loopHead++) { if (head.itemGroup[loopHead] == newItemGroup - 1) { if (bitArray == null) { bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions(); } else { bitArray = bitArray.And(_dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions()); } } } if (currentItemGroup > 1) { bitArray = bitArray.And(head.GetTransactions()); } bool validSupport = false; if (bitArray == null) { validSupport = true; } else if (bitArray.CountElements() >= support) { validSupport = true; } if (validSupport == false) { tail.RemoveAt(loopTail); loopTail--; break; } else //if (validSupport == true) { newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(head); newHead.AddItem(i); newHead.itemGroup[newHead.Count - 1] = newItemGroup; if (loopGroups == 0) { newHead.groupLength = head.groupLength + 1; } else { newHead.groupLength = 1; } newHead.SetTransactions(bitArray); if (bitArray != null) { newHead.support = bitArray.CountElements(); } RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult); // Add the new item as 'found' itemset only if 1. it's above minSupport, 2. it contains more then the first group if ((newHead.Count >= minLength) && (newHead.Count > newHead.groupLength)) { mineResult.Add(newHead); } newHead.SetTransactions(null); } } } } }
// Mine with skip-errors void RecurseMining(Itemset head, List <int> tail, int minSupport, int minLength, int maxLength, int maxMistakes, MineResults mineResult) { Itemset newHead; List <int> newTail; MistakesBitMask mistakes; if (head.Count >= maxLength) { return; } List <FastSparseBitArray> newSupportVectorBitMask = new List <FastSparseBitArray>(); List <MistakesBitMask> newBitMask = new List <MistakesBitMask>(); List <int> newSupport = new List <int>(); for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; mistakes = new MistakesBitMask(maxMistakes); FastSparseBitArray bitArray = _dualComp.GetItemset(head.GetLastItem(), i).GetTransactions(); mistakes.mistakes[0] = bitArray; if (head.Count > 1) { FastSparseBitArray bitMaskTwoBack = _dualComp.GetItemset(head.GetItem(head.Count - 2), i).GetTransactions(); MistakesBitMask existingMistakes = head.GetMistakes(); mistakes.mistakes[0] = mistakes.mistakes[0].And(existingMistakes.mistakes[0]); // ************* Build candidate ************** // Build the mistakes array for (int loopMistakes = 1; loopMistakes < maxMistakes + 1; loopMistakes++) { if (existingMistakes.mistakes[loopMistakes] != null) { FastSparseBitArray noNewErrors = bitArray.And(existingMistakes.mistakes[loopMistakes]); FastSparseBitArray newError = noNewErrors; // = head.GetParent().GetMistakes().mistakes[loopMistakes - 1].And(bitMaskTwoBack); int upwardCount = 1; Itemset upwardTraversal = head; upwardTraversal = upwardTraversal.GetParent(); while ((upwardTraversal != null) && (loopMistakes - upwardCount >= 0)) { FastSparseBitArray bitTIDs = _dualComp.GetItemset(upwardTraversal.GetLastItem(), i).GetTransactions(); newError = newError.Or(upwardTraversal.GetMistakes().mistakes[loopMistakes - upwardCount].And(bitTIDs)); upwardTraversal = upwardTraversal.GetParent(); upwardCount++; } mistakes.mistakes[loopMistakes] = newError; //.Or(noNewErrors); } else { FastSparseBitArray bitTIDs; Itemset ancestor = head; FastSparseBitArray newError = bitArray; while (ancestor.GetParent() != null) { ancestor = ancestor.GetParent(); bitTIDs = _dualComp.GetItemset(ancestor.GetLastItem(), i).GetTransactions(); newError = newError.Or(bitTIDs); } mistakes.mistakes[loopMistakes] = newError; //.Or(noNewErrors); break; } } } // *********** Calculate the support ************* int support = 999999; // The support is the Union of the last 'maxMistakes' mistakes // vectors, to get the last mistakes vector the algorithm preforms // a traversal backword on the last items developed (backtracks the DFS) FastSparseBitArray currentSupportVector = mistakes.mistakes[maxMistakes]; if (currentSupportVector != null) { Itemset upwardTraversal = head; for (int upward = 0; upward < maxMistakes; upward++) { if (upwardTraversal == null) { break; } currentSupportVector = currentSupportVector.Or(upwardTraversal.GetMistakes().mistakes[maxMistakes - upward - 1]); upwardTraversal = upwardTraversal.GetParent(); } if (upwardTraversal != null) { support = currentSupportVector.CountElements(); } } if (support >= minSupport) { newSupportVectorBitMask.Add(currentSupportVector); newBitMask.Add(mistakes); newSupport.Add(support); } else // Just remove the item as it will not be a memeber later... { tail.RemoveAt(loopTail); loopTail--; } } // Do Recurse call if (head.Count > 0) { for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(head); newHead.AddItem(tail[loopTail]); newHead.support = newSupport[loopTail]; newHead.SetTransactions((Utils.FastSparseBitArray)newSupportVectorBitMask[loopTail]); newHead.SetMistakes((MistakesBitMask)newBitMask[loopTail]); newHead.SetParent(head); RecurseMining(newHead, newTail, minSupport, minLength, maxLength, maxMistakes, mineResult); if (newHead.Count >= minLength) { mineResult.Add(newHead); /* * System.IO.FileStream fs = new System.IO.FileStream("res.txt", * System.IO.FileMode.Append); * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs); * tw.WriteLine(newHead.ToString()); * tw.Close(); * fs.Close(); */ } newHead.SetParent(null); newHead.SetMistakes(null); newHead.SetTransactions(null); } } }