void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, MineResults mineResult) { Itemset newHead; List <int> newTail; if (head.Count >= maxLength) { return; } for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; if (head.Count == 0) { newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(); newHead.AddItem(i); RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult); } else { for (int loopHead = 0; loopHead < head.Count; loopHead++) { for (int loopReverseOrder = 0; loopReverseOrder < 1; loopReverseOrder++) { FastSparseBitArray bitArray; if (loopReverseOrder == 0) { if (head.outDegree[loopHead] >= MAX_OUT_RANK) { break; } bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions(); } else { if (head.inDegree[loopHead] >= MAX_IN_RANK) { break; } bitArray = _dualComp.GetItemset(i, head.GetItem(loopHead)).GetTransactions(); } if (head.Count > 1) { bitArray = bitArray.And(head.GetTransactions()); } if (bitArray.CountElements() >= support) { newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(head); newHead.AddItem(i); newHead.SetTransactions(bitArray); newHead.support = bitArray.CountElements(); if (loopReverseOrder == 0) { newHead.inDegree[newHead.Count - 1] = 1; newHead.outDegree[loopHead]++; newHead.customStringDisplay += "" + head.GetItem(loopHead) + "=>" + i + "; "; } else { newHead.inDegree[loopHead]++; newHead.outDegree[newHead.Count - 1] = 1; newHead.customStringDisplay += "" + i + "=>" + head.GetItem(loopHead) + "; "; } RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult); if (newHead.Count >= minLength) { mineResult.Add(newHead); /* * System.IO.FileStream fs = new System.IO.FileStream("res.txt", * System.IO.FileMode.Append); * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs); * tw.WriteLine(newHead.ToString()); * tw.Close(); * fs.Close();*/ } newHead.SetTransactions(null); } } } } } }
void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, MineResults mineResult, int maxLayerDiff) { if (head.Count >= maxLength) { return; } List <Candidate> candidates = new List <Candidate>(); // Find the layer with the minimal number of items int minLayerSize = head.layersItemsSum[0]; for (int loopLayer = 1; loopLayer < head.layersItemsSum.Length; loopLayer++) { minLayerSize = Math.Min(minLayerSize, head.layersItemsSum[loopLayer]); } for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; // TODO : Calculation of the support should be according to the items from the // lower and upper layers. FastSparseBitArray bitArray = null; for (int loop = 0; loop < head.Count; loop++) { int headItem = head.GetItem(loop); if (_itemsLayers[headItem] < _itemsLayers[i]) { if (bitArray == null) { bitArray = _dualComp.GetItemset(headItem, i).GetTransactions(); } else { bitArray = bitArray.And(_dualComp.GetItemset(headItem, i).GetTransactions()); } } else if (_itemsLayers[headItem] > _itemsLayers[i]) { if (bitArray == null) { bitArray = _dualComp.GetItemset(i, headItem).GetTransactions(); } else { bitArray = bitArray.And(_dualComp.GetItemset(i, headItem).GetTransactions()); } } } if (head.GetTransactions() != null) { if (bitArray == null) { bitArray = head.GetTransactions(); } else { bitArray = bitArray.And(head.GetTransactions()); } } int currentSupport = Int32.MaxValue; if (bitArray != null) { currentSupport = bitArray.CountElements(); } /* * FastSparseBitArray bitArray = dualComp.GetItemset(head.GetLastItem(), i).GetTransactions(); * * if (head.Count > 1) * bitArray = bitArray.And(head.GetTransactions()); * * int currentSupport = bitArray.CountElements(); */ if (currentSupport >= support) { Candidate cand = new Candidate(); cand.item = i; cand.support = currentSupport; // If the new tail creates and itemset with unbalanced layers // set support to infinity so the itemset will not be traversed if (head.layersItemsSum[_itemsLayers[i]] + 1 > minLayerSize + maxLayerDiff) { cand.support = Int32.MaxValue; } cand.bitArray = bitArray; candidates.Add(cand); } } // Dynamic Reordering candidates.Sort(); // Rebuild tail List <int> newTail = new List <int>(); for (int loop = 0; loop < candidates.Count; loop++) { newTail.Add(candidates[loop].item); } Itemset newHead; for (int loopTail = 0; loopTail < candidates.Count; loopTail++) { // Stop recursing when all the tail left are MaxInt (e.g. from the same layer) if (candidates[loopTail].support == Int32.MaxValue) { break; } int i = newTail[0]; newTail.RemoveAt(0); newHead = new Itemset(head); newHead.AddItem(i); newHead.layersItemsSum[_itemsLayers[i]]++; FastSparseBitArray bitArray = candidates[loopTail].bitArray; newHead.SetTransactions(bitArray); newHead.support = candidates[loopTail].support; RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult, maxLayerDiff); if (newHead.Count >= minLength) { mineResult.Add(newHead); } newHead.SetTransactions(null); } }
void RecurseMining(List <DFSLevelItem> levelItems, int support, int minLength, int maxLength, MineResults mineResult) { // Simple pattern cut-off: if the pattern is not long enough, or too long for (int i = levelItems.Count - 1; i >= 0; i--) { if (levelItems[i].Head.Count >= maxLength) { levelItems.RemoveAt(i); continue; } if (levelItems[i].Head.Count + levelItems[i].Tail.Count < minLength) { levelItems.RemoveAt(i); continue; } } if (levelItems.Count == 0) { return; } Dictionary <int, int> lookAheadPrune = new Dictionary <int, int>(); for (int levelItemsLoop = 0; levelItemsLoop < levelItems.Count; levelItemsLoop++) { IntList tail = levelItems[levelItemsLoop].Tail; ItemsetBasic head = levelItems[levelItemsLoop].Head; List <FastSparseBitArray> bitArrays = new List <FastSparseBitArray>(tail.Count); for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; FastSparseBitArray bitArray = null; if (_dualComp != null) { ISimpleItemset dualItemset = _dualComp.GetItemset(head.GetLastItem(), i); if (dualItemset == null) { tail.RemoveAt(loopTail); loopTail--; continue; } bitArray = dualItemset.GetTransactions(); } else { bitArray = FastSparseBitArrayPool.Instance.Allocate(); _ds.BuildBitVector(head.GetLastItem(), i, bitArray); } if (head.Count > 1) { bitArray = bitArray.And(head.GetTransactions()); } bitArray.frequency = bitArray.CountElements(); if (bitArray.frequency >= support) { bitArrays.Add(bitArray); // Prune look-ahead if (bitArray.frequency == head.support) { int lookAheadSupport; if (lookAheadPrune.TryGetValue(i, out lookAheadSupport) == false) { lookAheadPrune.Add(i, bitArray.frequency); } else { lookAheadPrune[i] = Math.Max(lookAheadSupport, bitArray.frequency); } } } else { // Don't release bit vectors from O2 matrix, or there is no // O2 matrix if ((head.Count > 1) || (_dualComp == null)) { FastSparseBitArrayPool.Instance.Release(bitArray); } tail.RemoveAt(loopTail); loopTail--; } } levelItems[levelItemsLoop].TailBitArrays = bitArrays; } for (int levelItemsLoop = 0; levelItemsLoop < levelItems.Count; levelItemsLoop++) { ItemsetBasic head = levelItems[levelItemsLoop].Head; List <FastSparseBitArray> bitArrays = levelItems[levelItemsLoop].TailBitArrays; int lookAheadSupport; if (lookAheadPrune.TryGetValue(head.GetLastItem(), out lookAheadSupport) == true) { if (lookAheadSupport == head.support) { if (head.Count > 2) { for (int j = 0; j < bitArrays.Count; j++) { FastSparseBitArrayPool.Instance.Release(bitArrays[j]); } } continue; } } IntList tail = levelItems[levelItemsLoop].Tail; List <DFSLevelItem> newLevelItems = new List <DFSLevelItem>(); for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; IntList newTail = (IntList)tail.Clone(); newTail.RemoveAt(loopTail); ItemsetBasic newHead = new ItemsetBasic(head); newHead.AddItem(i); FastSparseBitArray bitArray = bitArrays[loopTail]; newHead.SetTransactions(bitArray); newHead.support = bitArray.frequency; if (newHead.Count >= minLength) { mineResult.Add(newHead); } newLevelItems.Add(new DFSLevelItem(newHead, newTail)); } RecurseMining(newLevelItems, support, minLength, maxLength, mineResult); // Release IntList for (int j = 0; j < newLevelItems.Count; j++) { IntListPool.Instance.Release(newLevelItems[j].Tail); } // Release FastSparseBitArray if (head.Count > 2) { for (int j = 0; j < bitArrays.Count; j++) { FastSparseBitArrayPool.Instance.Release(bitArrays[j]); } } } }
void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, int maxGroupLength, MineResults mineResult) { Itemset newHead; List <int> newTail; if (head.Count >= maxLength) { return; } for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; if (head.Count == 0) { newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(); newHead.AddItem(i); newHead.itemGroup[newHead.Count - 1] = 1; newHead.groupLength = 1; RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult); } else { int currentItemGroup = head.itemGroup[head.Count - 1]; for (int loopGroups = 0; loopGroups < 2; loopGroups++) { // Don't try to add the same item to the same group twice if ((loopGroups == 0) && (i < head.GetItem(head.Count - 1))) { continue; } // Don't create too long group if ((loopGroups == 0) && (head.groupLength >= maxGroupLength)) { continue; } // Add 0 / 1 depends on the loop int newItemGroup = currentItemGroup + loopGroups; FastSparseBitArray bitArray = null; // Perform AND with all previous group members - calc support for (int loopHead = 0; loopHead < head.Count; loopHead++) { if (head.itemGroup[loopHead] == newItemGroup - 1) { if (bitArray == null) { bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions(); } else { bitArray = bitArray.And(_dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions()); } } } if (currentItemGroup > 1) { bitArray = bitArray.And(head.GetTransactions()); } bool validSupport = false; if (bitArray == null) { validSupport = true; } else if (bitArray.CountElements() >= support) { validSupport = true; } if (validSupport == false) { tail.RemoveAt(loopTail); loopTail--; break; } else //if (validSupport == true) { newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(head); newHead.AddItem(i); newHead.itemGroup[newHead.Count - 1] = newItemGroup; if (loopGroups == 0) { newHead.groupLength = head.groupLength + 1; } else { newHead.groupLength = 1; } newHead.SetTransactions(bitArray); if (bitArray != null) { newHead.support = bitArray.CountElements(); } RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult); // Add the new item as 'found' itemset only if 1. it's above minSupport, 2. it contains more then the first group if ((newHead.Count >= minLength) && (newHead.Count > newHead.groupLength)) { mineResult.Add(newHead); } newHead.SetTransactions(null); } } } } }
// Mine with skip-errors void RecurseMining(Itemset head, List <int> tail, int minSupport, int minLength, int maxLength, int maxMistakes, MineResults mineResult) { Itemset newHead; List <int> newTail; MistakesBitMask mistakes; if (head.Count >= maxLength) { return; } List <FastSparseBitArray> newSupportVectorBitMask = new List <FastSparseBitArray>(); List <MistakesBitMask> newBitMask = new List <MistakesBitMask>(); List <int> newSupport = new List <int>(); for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; mistakes = new MistakesBitMask(maxMistakes); FastSparseBitArray bitArray = _dualComp.GetItemset(head.GetLastItem(), i).GetTransactions(); mistakes.mistakes[0] = bitArray; if (head.Count > 1) { FastSparseBitArray bitMaskTwoBack = _dualComp.GetItemset(head.GetItem(head.Count - 2), i).GetTransactions(); MistakesBitMask existingMistakes = head.GetMistakes(); mistakes.mistakes[0] = mistakes.mistakes[0].And(existingMistakes.mistakes[0]); // ************* Build candidate ************** // Build the mistakes array for (int loopMistakes = 1; loopMistakes < maxMistakes + 1; loopMistakes++) { if (existingMistakes.mistakes[loopMistakes] != null) { FastSparseBitArray noNewErrors = bitArray.And(existingMistakes.mistakes[loopMistakes]); FastSparseBitArray newError = noNewErrors; // = head.GetParent().GetMistakes().mistakes[loopMistakes - 1].And(bitMaskTwoBack); int upwardCount = 1; Itemset upwardTraversal = head; upwardTraversal = upwardTraversal.GetParent(); while ((upwardTraversal != null) && (loopMistakes - upwardCount >= 0)) { FastSparseBitArray bitTIDs = _dualComp.GetItemset(upwardTraversal.GetLastItem(), i).GetTransactions(); newError = newError.Or(upwardTraversal.GetMistakes().mistakes[loopMistakes - upwardCount].And(bitTIDs)); upwardTraversal = upwardTraversal.GetParent(); upwardCount++; } mistakes.mistakes[loopMistakes] = newError; //.Or(noNewErrors); } else { FastSparseBitArray bitTIDs; Itemset ancestor = head; FastSparseBitArray newError = bitArray; while (ancestor.GetParent() != null) { ancestor = ancestor.GetParent(); bitTIDs = _dualComp.GetItemset(ancestor.GetLastItem(), i).GetTransactions(); newError = newError.Or(bitTIDs); } mistakes.mistakes[loopMistakes] = newError; //.Or(noNewErrors); break; } } } // *********** Calculate the support ************* int support = 999999; // The support is the Union of the last 'maxMistakes' mistakes // vectors, to get the last mistakes vector the algorithm preforms // a traversal backword on the last items developed (backtracks the DFS) FastSparseBitArray currentSupportVector = mistakes.mistakes[maxMistakes]; if (currentSupportVector != null) { Itemset upwardTraversal = head; for (int upward = 0; upward < maxMistakes; upward++) { if (upwardTraversal == null) { break; } currentSupportVector = currentSupportVector.Or(upwardTraversal.GetMistakes().mistakes[maxMistakes - upward - 1]); upwardTraversal = upwardTraversal.GetParent(); } if (upwardTraversal != null) { support = currentSupportVector.CountElements(); } } if (support >= minSupport) { newSupportVectorBitMask.Add(currentSupportVector); newBitMask.Add(mistakes); newSupport.Add(support); } else // Just remove the item as it will not be a memeber later... { tail.RemoveAt(loopTail); loopTail--; } } // Do Recurse call if (head.Count > 0) { for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; newTail = new List <int>(tail); newTail.RemoveAt(loopTail); newHead = new Itemset(head); newHead.AddItem(tail[loopTail]); newHead.support = newSupport[loopTail]; newHead.SetTransactions((Utils.FastSparseBitArray)newSupportVectorBitMask[loopTail]); newHead.SetMistakes((MistakesBitMask)newBitMask[loopTail]); newHead.SetParent(head); RecurseMining(newHead, newTail, minSupport, minLength, maxLength, maxMistakes, mineResult); if (newHead.Count >= minLength) { mineResult.Add(newHead); /* * System.IO.FileStream fs = new System.IO.FileStream("res.txt", * System.IO.FileMode.Append); * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs); * tw.WriteLine(newHead.ToString()); * tw.Close(); * fs.Close(); */ } newHead.SetParent(null); newHead.SetMistakes(null); newHead.SetTransactions(null); } } }
void RecurseMining(ItemsetBasic head, IntList tail, int support, int minLength, int maxLength, MineResults mineResult) { if (head.Count >= maxLength) { return; } if (head.Count + tail.Count < minLength) { return; } List <FastSparseBitArray> bitArrays = new List <FastSparseBitArray>(tail.Count); for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; if (head.Count == 0) { IntList newTail = (IntList)tail.Clone(); // new IntList(tail); newTail.RemoveAt(loopTail); ItemsetBasic newHead = new ItemsetBasic(tail.Count); newHead.AddItem(i); RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult); IntListPool.Instance.Release(newTail); } else { FastSparseBitArray bitArray = null; if (_dualComp != null) { ISimpleItemset dualItemset = _dualComp.GetItemset(head.GetLastItem(), i); if (dualItemset == null) { tail.RemoveAt(loopTail); loopTail--; continue; } bitArray = dualItemset.GetTransactions(); } else { bitArray = FastSparseBitArrayPool.Instance.Allocate(); _ds.BuildBitVector(head.GetLastItem(), i, bitArray); } if (head.Count > 1) { bitArray = bitArray.And(head.GetTransactions()); } bitArray.frequency = bitArray.CountElements(); if (bitArray.frequency >= support) { bitArrays.Add(bitArray); } else { // Don't release bit vectors from O2 matrix, or there is no // O2 matrix if ((head.Count > 1) || (_dualComp == null)) { FastSparseBitArrayPool.Instance.Release(bitArray); } tail.RemoveAt(loopTail); loopTail--; } } } if (head.Count > 0) { for (int loopTail = 0; loopTail < tail.Count; loopTail++) { int i = tail[loopTail]; IntList newTail = (IntList)tail.Clone(); // new IntList(tail); newTail.RemoveAt(loopTail); // Create 'head' restore point FastSparseBitArray restoreBitArray = head.GetTransactions(); int restoreSupport = head.support; head.AddItem(i); FastSparseBitArray bitArray = bitArrays[loopTail]; head.SetTransactions(bitArray); head.support = bitArray.frequency; RecurseMining(head, newTail, support, minLength, maxLength, mineResult); IntListPool.Instance.Release(newTail); if (head.Count >= minLength) { mineResult.Add(head); } head.SetTransactions(null); if (head.Count > 2) { FastSparseBitArrayPool.Instance.Release(bitArray); } // Restore 'head' head.RemoveLastItem(); head.SetTransactions(restoreBitArray); head.support = restoreSupport; } } }