/// <summary> /// Creates a new sequence bitmap by apply I-Step with a given item-bitmap. /// </summary> internal SeqBitmap CreateNewByIStep(SeqBitmap itemBitmap) { var newBitmap = new SeqBitmap(); if (_bitmap8 != null) { newBitmap._bitmap8 = _bitmap8.CreateNewByIStep(itemBitmap._bitmap8); } if (_bitmap16 != null) { newBitmap._bitmap16 = _bitmap16.CreateNewByIStep(itemBitmap._bitmap16); } if (_bitmap32 != null) { newBitmap._bitmap32 = _bitmap32.CreateNewByIStep(itemBitmap._bitmap32); } if (_bitmap64 != null) { newBitmap._bitmap64 = _bitmap64.CreateNewByIStep(itemBitmap._bitmap64); } if (_bitmap128 != null) { newBitmap._bitmap128 = _bitmap128.CreateNewByIStep(itemBitmap._bitmap128); } return(newBitmap); }
public static IDictionary<uint, SeqBitmap> ConvertSequenceDatabaseToVerticalFormat(SequenceDatabase sequenceDatabase, IDictionary<uint, int> frequentItems) { var verticalDb = new Dictionary<uint, SeqBitmap>(); SeqBitmap seqBitmap; foreach (Sequence seq in sequenceDatabase) { foreach (var item in frequentItems) { if (!verticalDb.TryGetValue((uint) item.Key, out seqBitmap)) { seqBitmap = new SeqBitmap(); seqBitmap.Support = item.Value; verticalDb.Add((uint) item.Key, seqBitmap); } seqBitmap.Addsequence(seq.Size); } for (int tid = 0; tid < seq.Size; tid++) { foreach (uint item in seq[tid]) { verticalDb[item].SetTransaction(tid); } } } return verticalDb; }
/// <summary> /// Converts sequence database to the vertical format. /// </summary> public VerticalSequenceDatabase ConvertToVerticalFormat(IDictionary <uint, int> frequentItems) { var verticalDb = new VerticalSequenceDatabase(); SeqBitmap seqBitmap; foreach (Sequence seq in this) { foreach (var item in frequentItems) { if (!verticalDb.TryGetValue(item.Key, out seqBitmap)) { seqBitmap = new SeqBitmap { Support = item.Value }; verticalDb.Add(item.Key, seqBitmap); } seqBitmap.AddSequence(seq.Size); } for (int tid = 0; tid < seq.Size; tid++) { foreach (uint item in seq[tid]) { verticalDb[item].SetTransaction(tid); } } } return(verticalDb); }
public static IDictionary <uint, SeqBitmap> ConvertSequenceDatabaseToVerticalFormat(SequenceDatabase sequenceDatabase, IDictionary <uint, int> frequentItems) { var verticalDb = new Dictionary <uint, SeqBitmap>(); SeqBitmap seqBitmap; foreach (Sequence seq in sequenceDatabase) { foreach (var item in frequentItems) { if (!verticalDb.TryGetValue((uint)item.Key, out seqBitmap)) { seqBitmap = new SeqBitmap(); seqBitmap.Support = item.Value; verticalDb.Add((uint)item.Key, seqBitmap); } seqBitmap.Addsequence(seq.Size); } for (int tid = 0; tid < seq.Size; tid++) { foreach (uint item in seq[tid]) { verticalDb[item].SetTransaction(tid); } } } return(verticalDb); }
/// <summary> /// LAPIN-SPAM algorithm depth first traversal of the lexicografic tree. /// </summary> private void LapinSpam(Sequence prefix, SeqBitmap prefixBitmap, IList<uint> Sn, IList<uint> In) { if (Stopped) return; _allSequentialPatterns.Add(prefix); LogPattern(prefix); // S-STEP List<uint> sTemp; if (Sn.Count > 0) { sTemp = _itemIsExistTable.GetFrequentItems(prefixBitmap, Sn); for (int i = 0, j = sTemp.Count; i < j; i++) { SeqBitmap newBitmap = prefixBitmap.CreateNewBySStep(_remappedSequenceDatabase[(int)sTemp[i]]); Sequence newPrefix = Sequence.CreateNewBySStep(prefix, _itemOrderToIdMapping[sTemp[i]], newBitmap.Support); LapinSpam(newPrefix, newBitmap, sTemp, sTemp.GetRange(i + 1, j - i - 1)); } } else { sTemp = new List<uint>(); } // I-STEP int inCandidatesCount = In.Count; if (inCandidatesCount <= 0) return; var iTemp = new List<uint>(inCandidatesCount); var iTempBitmaps = new List<SeqBitmap>(inCandidatesCount); for (int i = 0; i < inCandidatesCount; i++) { SeqBitmap newSeqBitmap = prefixBitmap.CreateNewByIStep(_remappedSequenceDatabase[(int)In[i]]); if (newSeqBitmap.Support < _minSupport) continue; iTemp.Add(In[i]); iTempBitmaps.Add(newSeqBitmap); } for (int i = 0, j = iTemp.Count; i < j; i++) { SeqBitmap newSeqBitmap = iTempBitmaps[i]; Sequence newPrefix = Sequence.CreateNewByIStep(prefix, _itemOrderToIdMapping[iTemp[i]], newSeqBitmap.Support); LapinSpam(newPrefix, newSeqBitmap, sTemp, iTemp.GetRange(i + 1, j - i - 1)); } }
/// <summary> /// SPAM algorithm first step of the DFS pruning process. /// </summary> private void Spam() { _allSequentialPatterns.Clear(); // Traverse lexicografic tree according to a frequent items. var frequentItems = new List <uint>(_sequenceDatabase.Keys.OrderBy(item => item)); for (int i = 0, j = frequentItems.Count; i < j; i++) { SeqBitmap seqBitmap = _sequenceDatabase[frequentItems[i]]; var prefix = new Sequence(frequentItems[i], seqBitmap.Support); Spam(prefix, seqBitmap, frequentItems, frequentItems.GetRange(i + 1, j - i - 1)); } }
/// <summary> /// SPAM algorithm depth first traversal of the lexicografic tree. /// </summary> private void Spam(Sequence prefix, SeqBitmap prefixBitmap, IList <uint> Sn, IList <uint> In) { if (Stopped) { return; } _allSequentialPatterns.Add(prefix); LogPattern(prefix); // S-STEP int snCandidatesCount = Sn.Count; var sTemp = new List <uint>(snCandidatesCount); if (snCandidatesCount > 0) { var sTempBitmaps = new List <SeqBitmap>(snCandidatesCount); for (int i = 0; i < snCandidatesCount; i++) { SeqBitmap newSeqBitmap = prefixBitmap.CreateNewBySStep(_sequenceDatabase[Sn[i]]); if (newSeqBitmap.Support < _minSupport) { continue; } sTemp.Add(Sn[i]); sTempBitmaps.Add(newSeqBitmap); } for (int i = 0, j = sTemp.Count; i < j; i++) { SeqBitmap newSeqBitmap = sTempBitmaps[i]; Sequence newPrefix = Sequence.CreateNewBySStep(prefix, sTemp[i], newSeqBitmap.Support); Spam(newPrefix, newSeqBitmap, sTemp, sTemp.GetRange(i + 1, j - i - 1)); } } // I-STEP int inCandidatesCount = In.Count; if (inCandidatesCount <= 0) { return; } var iTemp = new List <uint>(inCandidatesCount); var iTempBitmaps = new List <SeqBitmap>(inCandidatesCount); for (int i = 0; i < inCandidatesCount; i++) { SeqBitmap newSeqBitmap = prefixBitmap.CreateNewByIStep(_sequenceDatabase[In[i]]); if (newSeqBitmap.Support < _minSupport) { continue; } iTemp.Add(In[i]); iTempBitmaps.Add(newSeqBitmap); } for (int i = 0, j = iTemp.Count; i < j; i++) { SeqBitmap newSeqBitmap = iTempBitmaps[i]; Sequence newPrefix = Sequence.CreateNewByIStep(prefix, iTemp[i], newSeqBitmap.Support); Spam(newPrefix, newSeqBitmap, sTemp, iTemp.GetRange(i + 1, j - i - 1)); } }
/// <summary> /// LAPIN-SPAM algorithm depth first traversal of the lexicografic tree. /// </summary> private void LapinSpam(Sequence prefix, SeqBitmap prefixBitmap, IList <uint> Sn, IList <uint> In) { if (Stopped) { return; } _allSequentialPatterns.Add(prefix); LogPattern(prefix); // S-STEP List <uint> sTemp; if (Sn.Count > 0) { sTemp = _itemIsExistTable.GetFrequentItems(prefixBitmap, Sn); for (int i = 0, j = sTemp.Count; i < j; i++) { SeqBitmap newBitmap = prefixBitmap.CreateNewBySStep(_remappedSequenceDatabase[(int)sTemp[i]]); Sequence newPrefix = Sequence.CreateNewBySStep(prefix, _itemOrderToIdMapping[sTemp[i]], newBitmap.Support); LapinSpam(newPrefix, newBitmap, sTemp, sTemp.GetRange(i + 1, j - i - 1)); } } else { sTemp = new List <uint>(); } // I-STEP int inCandidatesCount = In.Count; if (inCandidatesCount <= 0) { return; } var iTemp = new List <uint>(inCandidatesCount); var iTempBitmaps = new List <SeqBitmap>(inCandidatesCount); for (int i = 0; i < inCandidatesCount; i++) { SeqBitmap newSeqBitmap = prefixBitmap.CreateNewByIStep(_remappedSequenceDatabase[(int)In[i]]); if (newSeqBitmap.Support < _minSupport) { continue; } iTemp.Add(In[i]); iTempBitmaps.Add(newSeqBitmap); } for (int i = 0, j = iTemp.Count; i < j; i++) { SeqBitmap newSeqBitmap = iTempBitmaps[i]; Sequence newPrefix = Sequence.CreateNewByIStep(prefix, _itemOrderToIdMapping[iTemp[i]], newSeqBitmap.Support); LapinSpam(newPrefix, newSeqBitmap, sTemp, iTemp.GetRange(i + 1, j - i - 1)); } }
/// <summary> /// For a given prefix returns the frequent candidates. /// </summary> public List<uint> GetFrequentItems(SeqBitmap prefixBitmap, IList<uint> candidates) { int transactions = 0; var transactionSBitVectors = new byte[ prefixBitmap.Sequences8Count + prefixBitmap.Sequences16Count + prefixBitmap.Sequences32Count + prefixBitmap.Sequences64Count + prefixBitmap.Sequences128Count][]; if (prefixBitmap.Sequences8Count > 0) { var prefixBitmaps8 = prefixBitmap.Bitmaps8; for (int sid = 0; sid < prefixBitmap.Sequences8Count; sid++) { if (prefixBitmaps8[sid] == 0) continue; transactionSBitVectors[transactions++] = _bitVectors8[sid, DeBruijn.FirstSetBit(prefixBitmaps8[sid])]; } } if (prefixBitmap.Sequences16Count > 0) { var prefixBitmaps16 = prefixBitmap.Bitmaps16; for (int sid = 0; sid < prefixBitmap.Sequences16Count; sid++) { if (prefixBitmaps16[sid] == 0) continue; transactionSBitVectors[transactions++] = _bitVectors16[sid, DeBruijn.FirstSetBit(prefixBitmaps16[sid])]; } } if (prefixBitmap.Sequences32Count > 0) { var prefixBitmaps32 = prefixBitmap.Bitmaps32; for (int sid = 0; sid < prefixBitmap.Sequences32Count; sid++) { if (prefixBitmaps32[sid] == 0) continue; transactionSBitVectors[transactions++] = _bitVectors32[sid, DeBruijn.FirstSetBit(prefixBitmaps32[sid])]; } } if (prefixBitmap.Sequences64Count > 0) { var prefixBitmaps64 = prefixBitmap.Bitmaps64; for (int sid = 0; sid < prefixBitmap.Sequences64Count; sid++) { if (prefixBitmaps64[sid] == 0) continue; transactionSBitVectors[transactions++] = _bitVectors64[sid, DeBruijn.FirstSetBit(prefixBitmaps64[sid])]; } } if (prefixBitmap.Sequences128Count > 0) { var prefixBitmaps128 = prefixBitmap.Bitmaps128; for (int sid = 0; sid < prefixBitmap.Sequences128Count; sid++) { if (prefixBitmaps128[sid] == 0) continue; transactionSBitVectors[transactions++] = _bitVectors128[sid, DeBruijn.FirstSetBit(prefixBitmaps128[sid])]; } } var frequetItems = new List<uint>(); for (int i = 0, j = candidates.Count; i < j; i++) { var candidate = candidates[i]; int candidateSupport = 0; for (int tid = 0; tid < transactions; tid++) { candidateSupport += transactionSBitVectors[tid][candidate]; if (candidateSupport < _minSupport) continue; frequetItems.Add(candidate); break; } } return frequetItems; }
/// <summary> /// For a given prefix returns the frequent candidates. /// </summary> public List <uint> GetFrequentItems(SeqBitmap prefixBitmap, IList <uint> candidates) { int transactions = 0; var transactionSBitVectors = new byte[ prefixBitmap.Sequences8Count + prefixBitmap.Sequences16Count + prefixBitmap.Sequences32Count + prefixBitmap.Sequences64Count + prefixBitmap.Sequences128Count][]; if (prefixBitmap.Sequences8Count > 0) { var prefixBitmaps8 = prefixBitmap.Bitmaps8; for (int sid = 0; sid < prefixBitmap.Sequences8Count; sid++) { if (prefixBitmaps8[sid] == 0) { continue; } transactionSBitVectors[transactions++] = _bitVectors8[sid, DeBruijn.FirstSetBit(prefixBitmaps8[sid])]; } } if (prefixBitmap.Sequences16Count > 0) { var prefixBitmaps16 = prefixBitmap.Bitmaps16; for (int sid = 0; sid < prefixBitmap.Sequences16Count; sid++) { if (prefixBitmaps16[sid] == 0) { continue; } transactionSBitVectors[transactions++] = _bitVectors16[sid, DeBruijn.FirstSetBit(prefixBitmaps16[sid])]; } } if (prefixBitmap.Sequences32Count > 0) { var prefixBitmaps32 = prefixBitmap.Bitmaps32; for (int sid = 0; sid < prefixBitmap.Sequences32Count; sid++) { if (prefixBitmaps32[sid] == 0) { continue; } transactionSBitVectors[transactions++] = _bitVectors32[sid, DeBruijn.FirstSetBit(prefixBitmaps32[sid])]; } } if (prefixBitmap.Sequences64Count > 0) { var prefixBitmaps64 = prefixBitmap.Bitmaps64; for (int sid = 0; sid < prefixBitmap.Sequences64Count; sid++) { if (prefixBitmaps64[sid] == 0) { continue; } transactionSBitVectors[transactions++] = _bitVectors64[sid, DeBruijn.FirstSetBit(prefixBitmaps64[sid])]; } } if (prefixBitmap.Sequences128Count > 0) { var prefixBitmaps128 = prefixBitmap.Bitmaps128; for (int sid = 0; sid < prefixBitmap.Sequences128Count; sid++) { if (prefixBitmaps128[sid] == 0) { continue; } transactionSBitVectors[transactions++] = _bitVectors128[sid, DeBruijn.FirstSetBit(prefixBitmaps128[sid])]; } } var frequetItems = new List <uint>(); for (int i = 0, j = candidates.Count; i < j; i++) { var candidate = candidates[i]; int candidateSupport = 0; for (int tid = 0; tid < transactions; tid++) { candidateSupport += transactionSBitVectors[tid][candidate]; if (candidateSupport < _minSupport) { continue; } frequetItems.Add(candidate); break; } } return(frequetItems); }
/// <summary> /// SPAM algorithm depth first traversal of the lexicografic tree. /// </summary> private void Spam(Sequence prefix, SeqBitmap prefixBitmap, IList<uint> Sn, IList<uint> In) { if(Stopped) return; _allSequentialPatterns.Add(prefix); LogPattern(prefix); // S-STEP int snCandidatesCount = Sn.Count; var sTemp = new List<uint>(snCandidatesCount); if (snCandidatesCount > 0) { var sTempBitmaps = new List<SeqBitmap>(snCandidatesCount); for (int i = 0; i < snCandidatesCount; i++) { SeqBitmap newSeqBitmap = prefixBitmap.CreateNewBySStep(_sequenceDatabase[Sn[i]]); if (newSeqBitmap.Support < _minSupport) continue; sTemp.Add(Sn[i]); sTempBitmaps.Add(newSeqBitmap); } for (int i = 0, j = sTemp.Count; i < j; i++) { SeqBitmap newSeqBitmap = sTempBitmaps[i]; Sequence newPrefix = Sequence.CreateNewBySStep(prefix, sTemp[i], newSeqBitmap.Support); Spam(newPrefix, newSeqBitmap, sTemp, sTemp.GetRange(i + 1, j - i - 1)); } } // I-STEP int inCandidatesCount = In.Count; if (inCandidatesCount <= 0) return; var iTemp = new List<uint>(inCandidatesCount); var iTempBitmaps = new List<SeqBitmap>(inCandidatesCount); for (int i = 0; i < inCandidatesCount; i++) { SeqBitmap newSeqBitmap = prefixBitmap.CreateNewByIStep(_sequenceDatabase[In[i]]); if (newSeqBitmap.Support < _minSupport) continue; iTemp.Add(In[i]); iTempBitmaps.Add(newSeqBitmap); } for (int i = 0, j = iTemp.Count; i < j; i++) { SeqBitmap newSeqBitmap = iTempBitmaps[i]; Sequence newPrefix = Sequence.CreateNewByIStep(prefix, iTemp[i], newSeqBitmap.Support); Spam(newPrefix, newSeqBitmap, sTemp, iTemp.GetRange(i + 1, j - i - 1)); } }