private void Bide() { _allSequentialPatterns.Clear(); ProjectedDatabase projectedDatabase = _sequenceDatabase.ConvertToProjectedDatabase(); // Divides the search space according to frequent items and mine each recursively. for (int i = 0; i < _1LengthSequentialPatterns.Count; ++i) { ProjectedDatabase freqItemProjectedDb = ConstructProjectedDatabase(_1LengthSequentialPatterns[i], projectedDatabase); if (freqItemProjectedDb == null || !freqItemProjectedDb.IsNotEmpty) { continue; } if (ClosureChecker.BackScan(freqItemProjectedDb)) { PrunedSequencesCount++; } else { Bide(freqItemProjectedDb); } } }
/// <summary> /// Checks if a given database contains some forward extensions items or backward extensions items. /// </summary> private static bool HasProjectedDatabaseAnyExtensions(ProjectedDatabase projectedDatabase, IEnumerable <Sequence> newSequentialPatterns) { // Checks if exist any forward extensions. If not tries find any backward extension return(newSequentialPatterns.Any(sequence => sequence.Support == projectedDatabase.Prefix.Support) || ClosureChecker.BackwardExtensionCheck(projectedDatabase)); }
private void Bide(ProjectedDatabase projectedDatabase) { if(Stopped) return; _sExtensions.Clear(); _iExtensions.Clear(); // Scans projected database once and finds extension items. projectedDatabase.FindLocalFrequentItems(_minSupport, _sExtensions, _iExtensions); var newSequentialPatterns = new List<Sequence>(); // Sequence-extension items. foreach (var pair in _sExtensions.OrderBy(e => e.Key)) { newSequentialPatterns.Add( Sequence.CreateNewBySStep(projectedDatabase.Prefix, pair.Key, pair.Value)); } // Itemset-extension items. foreach (var pair in _iExtensions.OrderBy(e => e.Key)) { newSequentialPatterns.Add( Sequence.CreateNewByIStep(projectedDatabase.Prefix, pair.Key, pair.Value)); } // If projected database does not contain backward or forward extensions // than a prefix of projection is closed sequential pattern. if(!HasProjectedDatabaseAnyExtensions(projectedDatabase, newSequentialPatterns)) { _allSequentialPatterns.Add(projectedDatabase.Prefix); LogPattern(projectedDatabase.Prefix); } else { NoClosedSequencesCount++; } // Divides the search space and recursively mine the subset of all sequential patterns. for (int i = 0; i < newSequentialPatterns.Count; ++i) { ProjectedDatabase seqPatternProjectedDatabase = ConstructProjectedDatabase(newSequentialPatterns[i], projectedDatabase); if (seqPatternProjectedDatabase == null || !seqPatternProjectedDatabase.IsNotEmpty) continue; if (ClosureChecker.BackScan(seqPatternProjectedDatabase)) { PrunedSequencesCount++; } else { Bide(seqPatternProjectedDatabase); } } }
/// <summary> /// Converts sequence database to the projected sequence database. /// </summary> internal ProjectedDatabase ConvertToProjectedDatabase() { var projectedDatabase = new ProjectedDatabase(); for (int sid = 0, j = this.Count; sid < j; ++sid) { projectedDatabase.Add(new PseudoSequence(this[sid])); } return(projectedDatabase); }
private void PrefixSpan(ProjectedDatabase projectedDatabase) { if (Stopped) return; _sExtensions.Clear(); _iExtensions.Clear(); // Scans projected database once and finds extension items. projectedDatabase.FindLocalFrequentItems(_minSupport, _sExtensions, _iExtensions); var newSequentialPatterns = new List<Sequence>(); // Sequence-extension items. foreach (var pair in _sExtensions.OrderBy(e => e.Key)) { newSequentialPatterns.Add( Sequence.CreateNewBySStep(projectedDatabase.Prefix, pair.Key, pair.Value)); } // Itemset-extension items. foreach (var pair in _iExtensions.OrderBy(e => e.Key)) { newSequentialPatterns.Add( Sequence.CreateNewByIStep(projectedDatabase.Prefix, pair.Key, pair.Value)); } // Divides the search space and recursively mine the subset of all sequential patterns. for (int i = 0; i < newSequentialPatterns.Count; ++i) { Sequence seqPattern = newSequentialPatterns[i]; _allSequentialPatterns.Add(seqPattern); LogPattern(seqPattern); ProjectedDatabase seqPatternProjectedDatabase = ConstructProjectedDatabase(seqPattern, projectedDatabase); if (seqPatternProjectedDatabase == null || !seqPatternProjectedDatabase.IsNotEmpty) continue; PrefixSpan(seqPatternProjectedDatabase); } }
private ProjectedDatabase ConstructProjectedDatabase(Sequence prefix, ProjectedDatabase database) { var prefixLastItem = prefix.LastItem; var projectedDatabase = new ProjectedDatabase(prefix); // The count of sequences in a database which dont have to contains prefix // and projected database can still contains some sequential pattern. var minSequencesInProjectedDb = database.Size - (int)_minSupport; for (int sid = 0, lastSid = database.Size; sid < lastSid; sid++) { // If sequence contains the last item of a prefix, project sequence and add it to db. int itemsetIndex; if ((itemsetIndex = database[sid].Contains(prefixLastItem)) >= 0) { var pseudoSequence = PseudoSequence.MakeProjection(database[sid], prefix, itemsetIndex, false); if (pseudoSequence != null) { projectedDatabase.Add(pseudoSequence); continue; } } // If projected database can not contains new sequential pattern stop projection sooner. if (--minSequencesInProjectedDb >= 0) { continue; } return(null); } return(projectedDatabase); }
/// <summary> /// Checks if a given database contains backward-S-extension event. /// </summary> private static bool BackwardSExtensionCheck(ProjectedDatabase projectedDatabase) { var lastInLastAppearances = new int[projectedDatabase.Count]; var seqMaximumPeriod = new HashSet <uint>(); for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var ithMaximumPeriod = default(HashSet <uint>); var isIthMaximumPeriodEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; // The i-th maximum period of a prefix sequence is defined: // 1) 1 < i <= n: it is the piece of sequence between the end // of the first instance of prefix e1e2...ei-1 and LLi. // 2) i == 1: it is the piece of sequence located before the first last-in-last appearance (LL1). int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]); lastInLastAppearances[sid] = lastInLast; // If i-th maximum period is empty just continue fill in lastInLastAppearances for the checking (i-1)-th maximum period. if (isIthMaximumPeriodEmpty) { continue; } int firstInstance = 0; if (i != 0) { firstInstance = sequence.GetFirstInstance(i) + 1; } seqMaximumPeriod.Clear(); for (; firstInstance < lastInLast; firstInstance++) { seqMaximumPeriod.UnionWith(sequence.RawItems[firstInstance]); } // ScanSkip: if (ithMaximumPeriod == null) { ithMaximumPeriod = new HashSet <uint>(seqMaximumPeriod); } else { ithMaximumPeriod.IntersectWith(seqMaximumPeriod); } // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions, // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period. if (ithMaximumPeriod.Count != 0) { continue; } if (i == 0) { return(false); } isIthMaximumPeriodEmpty = true; } // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension. if (ithMaximumPeriod.Count != 0) { return(true); } } return(false); }
private ProjectedDatabase ConstructProjectedDatabase(Sequence prefix, ProjectedDatabase sourceProjectedDatabase) { var prefixLastItem = prefix.LastItem; var projectedDatabase = new ProjectedDatabase(prefix); // The count of sequences in a database which dont have to contains prefix // and projected database can still contains some sequential pattern. int minSequencesInProjectedDb = sourceProjectedDatabase.Size - (int) _minSupport; for (int sid = 0, lastSid = sourceProjectedDatabase.Size; sid < lastSid; sid++) { // If sequence contains the last item of a prefix, project sequence and add it to db. int itemsetIndex; if ((itemsetIndex = sourceProjectedDatabase[sid].Contains(prefixLastItem)) >= 0) { var pseudoSequence = PseudoSequence.MakeProjection(sourceProjectedDatabase[sid], prefix, itemsetIndex); if (pseudoSequence != null && pseudoSequence.IsNotEmpty) { projectedDatabase.Add(pseudoSequence); continue; } } // If projected database can not contains new sequential pattern stop projection sooner. if (--minSequencesInProjectedDb < 0) return null; } return projectedDatabase; }
/// <summary> /// Checks if a given database contains backward-I-extension event. /// </summary> private static bool BackwardIExtensionCheck(ProjectedDatabase projectedDatabase) { var lastInLastAppearances = new int[projectedDatabase.Count]; var seqBackwardIExtensionItemset = new HashSet<uint>(); for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var ithBackwardIExtensionItemset = default(HashSet<uint>); var isIthBackwardIExtensionItemsetEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; // The i-th I-extensions period of a prefix sequence is defined: // 1) 1 < i <= n: it is the piece of sequence between the end // of the first instance of prefix e1e2..ei-1 // and the beginning of the first event after the LLi. // 2) i == 1: it is the piece of sequence located before the first event after LL1. int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]); lastInLastAppearances[sid] = lastInLast; // If ith maximum period is empty just continue fill in lastInLastAppearances for the checking (i-1)-th maximum period. if (isIthBackwardIExtensionItemsetEmpty) continue; int firstInstance = 0; if (i != 0) firstInstance = sequence.GetFirstInstance(i) + 1; seqBackwardIExtensionItemset.Clear(); for (; firstInstance <= lastInLast; firstInstance++) { if (ei.IsSubsetOf(sequence.RawItems[firstInstance])) { seqBackwardIExtensionItemset.UnionWith(sequence.RawItems[firstInstance]); } } seqBackwardIExtensionItemset.ExceptWith(ei); // ScanSkip: if (ithBackwardIExtensionItemset == null) { ithBackwardIExtensionItemset = new HashSet<uint>(seqBackwardIExtensionItemset); } else { ithBackwardIExtensionItemset.IntersectWith(seqBackwardIExtensionItemset); } // If ith maximum period is empty and i==0 there cant exist any backward-I-extension, // otherwise if i>0, try find backward-I-extension in (i-1)-th maximum period. if (ithBackwardIExtensionItemset.Count != 0) continue; if (i == 0) return false; isIthBackwardIExtensionItemsetEmpty = true; } // If all ith maximum periods (in every sequence) contains some items we got a backward-I-extension. if (ithBackwardIExtensionItemset.Count > 0) return true; } return false; }
/// <summary> /// Returns TRUE if a prefix of projection can be safely pruned. /// </summary> public static bool BackScan(ProjectedDatabase projectedDatabase) { return BackScanS(projectedDatabase) || BackScanI(projectedDatabase); }
/// <summary> /// Checks if a given database contains some forward extensions items or backward extensions items. /// </summary> private static bool HasProjectedDatabaseAnyExtensions(ProjectedDatabase projectedDatabase, IEnumerable<Sequence> newSequentialPatterns) { // Checks if exist any forward extensions. If not tries find any backward extension return newSequentialPatterns.Any(sequence => sequence.Support == projectedDatabase.Prefix.Support) || ClosureChecker.BackwardExtensionCheck(projectedDatabase); }
/// <summary> /// Returns TRUE if a prefix of projection can be safely pruned. /// </summary> public static bool BackScan(ProjectedDatabase projectedDatabase) { return(BackScanS(projectedDatabase) || BackScanI(projectedDatabase)); }
/// <summary> /// Checks if a given database contains backward-S-extension event. /// </summary> private static bool BackwardSExtensionCheck(ProjectedDatabase projectedDatabase) { var lastInLastAppearances = new int[projectedDatabase.Count]; var seqMaximumPeriod = new HashSet<uint>(); for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var ithMaximumPeriod = default(HashSet<uint>); var isIthMaximumPeriodEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; // The i-th maximum period of a prefix sequence is defined: // 1) 1 < i <= n: it is the piece of sequence between the end // of the first instance of prefix e1e2...ei-1 and LLi. // 2) i == 1: it is the piece of sequence located before the first last-in-last appearance (LL1). int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]); lastInLastAppearances[sid] = lastInLast; // If i-th maximum period is empty just continue fill in lastInLastAppearances for the checking (i-1)-th maximum period. if (isIthMaximumPeriodEmpty) continue; int firstInstance = 0; if (i != 0) firstInstance = sequence.GetFirstInstance(i) + 1; seqMaximumPeriod.Clear(); for (; firstInstance < lastInLast; firstInstance++) { seqMaximumPeriod.UnionWith(sequence.RawItems[firstInstance]); } // ScanSkip: if (ithMaximumPeriod == null) { ithMaximumPeriod = new HashSet<uint>(seqMaximumPeriod); } else { ithMaximumPeriod.IntersectWith(seqMaximumPeriod); } // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions, // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period. if (ithMaximumPeriod.Count != 0) continue; if (i == 0) return false; isIthMaximumPeriodEmpty = true; } // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension. if (ithMaximumPeriod.Count != 0) return true; } return false; }
private void Bide(ProjectedDatabase projectedDatabase) { if (Stopped) { return; } _sExtensions.Clear(); _iExtensions.Clear(); // Scans projected database once and finds extension items. projectedDatabase.FindLocalFrequentItems(_minSupport, _sExtensions, _iExtensions); var newSequentialPatterns = new List <Sequence>(); // Sequence-extension items. foreach (var pair in _sExtensions.OrderBy(e => e.Key)) { newSequentialPatterns.Add( Sequence.CreateNewBySStep(projectedDatabase.Prefix, pair.Key, pair.Value)); } // Itemset-extension items. foreach (var pair in _iExtensions.OrderBy(e => e.Key)) { newSequentialPatterns.Add( Sequence.CreateNewByIStep(projectedDatabase.Prefix, pair.Key, pair.Value)); } // If projected database does not contain backward or forward extensions // than a prefix of projection is closed sequential pattern. if (!HasProjectedDatabaseAnyExtensions(projectedDatabase, newSequentialPatterns)) { _allSequentialPatterns.Add(projectedDatabase.Prefix); LogPattern(projectedDatabase.Prefix); } else { NoClosedSequencesCount++; } // Divides the search space and recursively mine the subset of all sequential patterns. for (int i = 0; i < newSequentialPatterns.Count; ++i) { ProjectedDatabase seqPatternProjectedDatabase = ConstructProjectedDatabase(newSequentialPatterns[i], projectedDatabase); if (seqPatternProjectedDatabase == null || !seqPatternProjectedDatabase.IsNotEmpty) { continue; } if (ClosureChecker.BackScan(seqPatternProjectedDatabase)) { PrunedSequencesCount++; } else { Bide(seqPatternProjectedDatabase); } } }
private static bool BackScanI(ProjectedDatabase projectedDatabase) { var lastInFirstAppearances = new int[projectedDatabase.Count]; var seqSemiMaximumPeriod = new HashSet <uint>(); var n = projectedDatabase.Prefix.Size - 1; for (int i = n; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var eiLastItemId = ei.Last(); var ithSemiMaximumPeriod = default(HashSet <uint>); var isIthSemiMaximumPeriodEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; int lastInFirst = sequence.GetLastInFirst(i, ei, lastInFirstAppearances[sid]); lastInFirstAppearances[sid] = lastInFirst; // If i-th semimaximum period is empty just continue fill in lastInFirstAppearances for the checking (i-1)-th semimaximum period. if (isIthSemiMaximumPeriodEmpty) { continue; } int firstInstance = 0; if (i != 0) { firstInstance = sequence.GetFirstInstance(i) + 1; } seqSemiMaximumPeriod.Clear(); for (; firstInstance <= lastInFirst; firstInstance++) { if (!ei.IsSubsetOf(sequence.RawItems[firstInstance])) { continue; } seqSemiMaximumPeriod.UnionWith( i == n ? sequence.RawItems[firstInstance].Where(a => a < eiLastItemId) : sequence.RawItems[firstInstance]); } seqSemiMaximumPeriod.ExceptWith(ei); // ScanSkip: if (ithSemiMaximumPeriod == null) { ithSemiMaximumPeriod = new HashSet <uint>(seqSemiMaximumPeriod); } else { ithSemiMaximumPeriod.IntersectWith(seqSemiMaximumPeriod); } // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions, // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period. if (ithSemiMaximumPeriod.Count != 0) { continue; } if (i == 0) { return(false); } isIthSemiMaximumPeriodEmpty = true; } // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension. if (ithSemiMaximumPeriod.Count != 0) { return(true); } } return(false); }
/// <summary> /// Checks if a given database contains some backward-extension event. /// </summary> public static bool BackwardExtensionCheck(ProjectedDatabase projectedDatabase) { return(BackwardSExtensionCheck(projectedDatabase) || BackwardIExtensionCheck(projectedDatabase)); }
private static bool BackScanI(ProjectedDatabase projectedDatabase) { var lastInFirstAppearances = new int[projectedDatabase.Count]; var seqSemiMaximumPeriod = new HashSet<uint>(); var n = projectedDatabase.Prefix.Size - 1; for (int i = n; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var eiLastItemId = ei.Last(); var ithSemiMaximumPeriod = default(HashSet<uint>); var isIthSemiMaximumPeriodEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; int lastInFirst = sequence.GetLastInFirst(i, ei, lastInFirstAppearances[sid]); lastInFirstAppearances[sid] = lastInFirst; // If i-th semimaximum period is empty just continue fill in lastInFirstAppearances for the checking (i-1)-th semimaximum period. if (isIthSemiMaximumPeriodEmpty) continue; int firstInstance = 0; if (i != 0) firstInstance = sequence.GetFirstInstance(i) + 1; seqSemiMaximumPeriod.Clear(); for (; firstInstance <= lastInFirst; firstInstance++) { if (!ei.IsSubsetOf(sequence.RawItems[firstInstance])) continue; seqSemiMaximumPeriod.UnionWith( i == n ? sequence.RawItems[firstInstance].Where(a => a < eiLastItemId) : sequence.RawItems[firstInstance]); } seqSemiMaximumPeriod.ExceptWith(ei); // ScanSkip: if (ithSemiMaximumPeriod == null) { ithSemiMaximumPeriod = new HashSet<uint>(seqSemiMaximumPeriod); } else { ithSemiMaximumPeriod.IntersectWith(seqSemiMaximumPeriod); } // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions, // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period. if (ithSemiMaximumPeriod.Count != 0) continue; if (i == 0) return false; isIthSemiMaximumPeriodEmpty = true; } // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension. if (ithSemiMaximumPeriod.Count != 0) return true; } return false; }
/// <summary> /// Checks if a given database contains backward-I-extension event. /// </summary> private static bool BackwardIExtensionCheck(ProjectedDatabase projectedDatabase) { var lastInLastAppearances = new int[projectedDatabase.Count]; var seqBackwardIExtensionItemset = new HashSet <uint>(); for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var ithBackwardIExtensionItemset = default(HashSet <uint>); var isIthBackwardIExtensionItemsetEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; // The i-th I-extensions period of a prefix sequence is defined: // 1) 1 < i <= n: it is the piece of sequence between the end // of the first instance of prefix e1e2..ei-1 // and the beginning of the first event after the LLi. // 2) i == 1: it is the piece of sequence located before the first event after LL1. int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]); lastInLastAppearances[sid] = lastInLast; // If ith maximum period is empty just continue fill in lastInLastAppearances for the checking (i-1)-th maximum period. if (isIthBackwardIExtensionItemsetEmpty) { continue; } int firstInstance = 0; if (i != 0) { firstInstance = sequence.GetFirstInstance(i) + 1; } seqBackwardIExtensionItemset.Clear(); for (; firstInstance <= lastInLast; firstInstance++) { if (ei.IsSubsetOf(sequence.RawItems[firstInstance])) { seqBackwardIExtensionItemset.UnionWith(sequence.RawItems[firstInstance]); } } seqBackwardIExtensionItemset.ExceptWith(ei); // ScanSkip: if (ithBackwardIExtensionItemset == null) { ithBackwardIExtensionItemset = new HashSet <uint>(seqBackwardIExtensionItemset); } else { ithBackwardIExtensionItemset.IntersectWith(seqBackwardIExtensionItemset); } // If ith maximum period is empty and i==0 there cant exist any backward-I-extension, // otherwise if i>0, try find backward-I-extension in (i-1)-th maximum period. if (ithBackwardIExtensionItemset.Count != 0) { continue; } if (i == 0) { return(false); } isIthBackwardIExtensionItemsetEmpty = true; } // If all ith maximum periods (in every sequence) contains some items we got a backward-I-extension. if (ithBackwardIExtensionItemset.Count > 0) { return(true); } } return(false); }
/// <summary> /// Checks if a given database contains some backward-extension event. /// </summary> public static bool BackwardExtensionCheck(ProjectedDatabase projectedDatabase) { return BackwardSExtensionCheck(projectedDatabase) || BackwardIExtensionCheck(projectedDatabase); }