/// <summary> /// Returns the last in last appearance of the prefix ei event in sequence. /// </summary> /// <param name="ei">I-th event of the prefix.</param> /// <param name="last">Last in last appearance of ei+1 event.</param> /// <returns></returns> public static int GetLastInLast(this PseudoSequence me, uint[] ei, int last) { var items = me.RawItems; int i = (last == 0 ? items.Length : last) - 1; for (; i >= 0; i--) { if (ei.IsSubsetOf(items[i])) { return(i); } } return(0); }
/// <summary> /// Returns the last in first appearance of a given ei event. /// </summary> /// <param name="i">Index of ei event in prefix.</param> /// <param name="ei">I-th event of the prefix.</param> /// <param name="last">Last in first appearance of ei+1 event.</param> public static int GetLastInFirst(this PseudoSequence me, int i, uint[] ei, int last) { if (last == 0) { return(me._firstInstances.Last()); } var items = me.RawItems; for (int j = last - 1; j >= 0; j--) { if (ei.IsSubsetOf(items[j])) { return(j); } } return(0); }
protected void addSequenceToProjection(Sequence sequence) { // keeps record of encountered items in the sequence // to adding sequence more than once HashSet<Item> seen = new HashSet<Item>(Item.EqComp); for (int i = 0; i < sequence.NumTransactions; i++) { for(int k = 0; k < sequence[i].NumItems; k++) { Item item = sequence[i][k]; // already processed if (seen.Contains(item)) continue; else seen.Add(item); var pSeq = new PseudoSequence(); pSeq.Init(_nextPseudoSequenceIndex, i, k); // point to the next item // in the sequence pSeq.PointToNextItem(sequence); // add pseudo sequence to the list of projections // for the current item List<PseudoSequence> itemProjection; if (!_projections.TryGetValue(item, out itemProjection)) { itemProjection = new List<PseudoSequence>(); _projections.Add(item, itemProjection); } if (pSeq.IsValid) itemProjection.Add(pSeq); } } _nextPseudoSequenceIndex++; }
private ProjectedDatabase ConstructProjectedDatabase(Sequence prefix, ProjectedDatabase database) { var prefixLastItem = prefix.LastItem; var projectedDatabase = new ProjectedDatabase(prefix); // The count of sequences in a database which dont have to contains prefix // and projected database can still contains some sequential pattern. var minSequencesInProjectedDb = database.Size - (int)_minSupport; for (int sid = 0, lastSid = database.Size; sid < lastSid; sid++) { // If sequence contains the last item of a prefix, project sequence and add it to db. int itemsetIndex; if ((itemsetIndex = database[sid].Contains(prefixLastItem)) >= 0) { var pseudoSequence = PseudoSequence.MakeProjection(database[sid], prefix, itemsetIndex, false); if (pseudoSequence != null) { projectedDatabase.Add(pseudoSequence); continue; } } // If projected database can not contains new sequential pattern stop projection sooner. if (--minSequencesInProjectedDb >= 0) { continue; } return(null); } return(projectedDatabase); }
/// <summary> /// Checks if a given database contains backward-I-extension event. /// </summary> private static bool BackwardIExtensionCheck(ProjectedDatabase projectedDatabase) { var lastInLastAppearances = new int[projectedDatabase.Count]; var seqBackwardIExtensionItemset = new HashSet <uint>(); for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var ithBackwardIExtensionItemset = default(HashSet <uint>); var isIthBackwardIExtensionItemsetEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; // The i-th I-extensions period of a prefix sequence is defined: // 1) 1 < i <= n: it is the piece of sequence between the end // of the first instance of prefix e1e2..ei-1 // and the beginning of the first event after the LLi. // 2) i == 1: it is the piece of sequence located before the first event after LL1. int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]); lastInLastAppearances[sid] = lastInLast; // If ith maximum period is empty just continue fill in lastInLastAppearances for the checking (i-1)-th maximum period. if (isIthBackwardIExtensionItemsetEmpty) { continue; } int firstInstance = 0; if (i != 0) { firstInstance = sequence.GetFirstInstance(i) + 1; } seqBackwardIExtensionItemset.Clear(); for (; firstInstance <= lastInLast; firstInstance++) { if (ei.IsSubsetOf(sequence.RawItems[firstInstance])) { seqBackwardIExtensionItemset.UnionWith(sequence.RawItems[firstInstance]); } } seqBackwardIExtensionItemset.ExceptWith(ei); // ScanSkip: if (ithBackwardIExtensionItemset == null) { ithBackwardIExtensionItemset = new HashSet <uint>(seqBackwardIExtensionItemset); } else { ithBackwardIExtensionItemset.IntersectWith(seqBackwardIExtensionItemset); } // If ith maximum period is empty and i==0 there cant exist any backward-I-extension, // otherwise if i>0, try find backward-I-extension in (i-1)-th maximum period. if (ithBackwardIExtensionItemset.Count != 0) { continue; } if (i == 0) { return(false); } isIthBackwardIExtensionItemsetEmpty = true; } // If all ith maximum periods (in every sequence) contains some items we got a backward-I-extension. if (ithBackwardIExtensionItemset.Count > 0) { return(true); } } return(false); }
private static bool BackScanI(ProjectedDatabase projectedDatabase) { var lastInFirstAppearances = new int[projectedDatabase.Count]; var seqSemiMaximumPeriod = new HashSet <uint>(); var n = projectedDatabase.Prefix.Size - 1; for (int i = n; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var eiLastItemId = ei.Last(); var ithSemiMaximumPeriod = default(HashSet <uint>); var isIthSemiMaximumPeriodEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; int lastInFirst = sequence.GetLastInFirst(i, ei, lastInFirstAppearances[sid]); lastInFirstAppearances[sid] = lastInFirst; // If i-th semimaximum period is empty just continue fill in lastInFirstAppearances for the checking (i-1)-th semimaximum period. if (isIthSemiMaximumPeriodEmpty) { continue; } int firstInstance = 0; if (i != 0) { firstInstance = sequence.GetFirstInstance(i) + 1; } seqSemiMaximumPeriod.Clear(); for (; firstInstance <= lastInFirst; firstInstance++) { if (!ei.IsSubsetOf(sequence.RawItems[firstInstance])) { continue; } seqSemiMaximumPeriod.UnionWith( i == n ? sequence.RawItems[firstInstance].Where(a => a < eiLastItemId) : sequence.RawItems[firstInstance]); } seqSemiMaximumPeriod.ExceptWith(ei); // ScanSkip: if (ithSemiMaximumPeriod == null) { ithSemiMaximumPeriod = new HashSet <uint>(seqSemiMaximumPeriod); } else { ithSemiMaximumPeriod.IntersectWith(seqSemiMaximumPeriod); } // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions, // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period. if (ithSemiMaximumPeriod.Count != 0) { continue; } if (i == 0) { return(false); } isIthSemiMaximumPeriodEmpty = true; } // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension. if (ithSemiMaximumPeriod.Count != 0) { return(true); } } return(false); }
/// <summary> /// Checks if a given database contains backward-S-extension event. /// </summary> private static bool BackwardSExtensionCheck(ProjectedDatabase projectedDatabase) { var lastInLastAppearances = new int[projectedDatabase.Count]; var seqMaximumPeriod = new HashSet <uint>(); for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--) { var ei = projectedDatabase.Prefix[i]; var ithMaximumPeriod = default(HashSet <uint>); var isIthMaximumPeriodEmpty = false; for (int sid = 0; sid < projectedDatabase.Count; sid++) { PseudoSequence sequence = projectedDatabase[sid]; // The i-th maximum period of a prefix sequence is defined: // 1) 1 < i <= n: it is the piece of sequence between the end // of the first instance of prefix e1e2...ei-1 and LLi. // 2) i == 1: it is the piece of sequence located before the first last-in-last appearance (LL1). int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]); lastInLastAppearances[sid] = lastInLast; // If i-th maximum period is empty just continue fill in lastInLastAppearances for the checking (i-1)-th maximum period. if (isIthMaximumPeriodEmpty) { continue; } int firstInstance = 0; if (i != 0) { firstInstance = sequence.GetFirstInstance(i) + 1; } seqMaximumPeriod.Clear(); for (; firstInstance < lastInLast; firstInstance++) { seqMaximumPeriod.UnionWith(sequence.RawItems[firstInstance]); } // ScanSkip: if (ithMaximumPeriod == null) { ithMaximumPeriod = new HashSet <uint>(seqMaximumPeriod); } else { ithMaximumPeriod.IntersectWith(seqMaximumPeriod); } // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions, // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period. if (ithMaximumPeriod.Count != 0) { continue; } if (i == 0) { return(false); } isIthMaximumPeriodEmpty = true; } // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension. if (ithMaximumPeriod.Count != 0) { return(true); } } return(false); }
public bool IsPrefixOf(Dictionary <Item, int> fDict, Sequence seq, PseudoSequence pSeq, out PseudoSequence suffix) { suffix = new PseudoSequence(); if (seq.IsEmpty) { return(false); } if (seq.NumTransactions - pSeq.TransactionIndex < 0) { return(false); } // current transaction index in prefix var prefixTransactionIndex = 0; // current item index in current prefix transaction var prefixItemIndex = 0; // current transaction index in suffix var suffixTransactionIndex = pSeq.TransactionIndex; // current item index in current suffix transaction var suffixItemIndex = pSeq.ItemIndex; // whether or not only last item needs to be matched to form a prefix // e.g. prefix=<(abc)> suffix=<(_c)> only item c needs to be checked // to validate new prefix if (pSeq.ItemIndex != 0 || this.LastTransaction.NumItems == 1) { // only check last item since prefix already established // from previous rounds prefixTransactionIndex = this.NumTransactions - 1; prefixItemIndex = this[prefixTransactionIndex].NumItems - 1; if (pSeq.ItemIndex != 0 && this.LastTransaction.NumItems == 1) { suffixTransactionIndex++; suffixItemIndex = 0; } } for (int i = suffixTransactionIndex; i < seq.NumTransactions; i++) { int k = (i == suffixTransactionIndex) ? suffixItemIndex : 0; for (; k < seq[i].NumItems; k++) { // if the current prefix item does not equal suffix item at [i, k] if (!this[prefixTransactionIndex][prefixItemIndex].Equals(seq[i][k])) { // to form a prefix, all items in the last transaction of the prefix // must be alphabetically AFTER all previous items in the transaction. // The sorting step takes care of that, however, need to check for cases // where two items have the same support making their ordering arbitrary int support; if (pSeq.ItemIndex == 0 || (fDict.TryGetValue(seq[i][k], out support) && support == fDict[this[prefixTransactionIndex][prefixItemIndex]])) { // items have same support, continue continue; } // prefix not found in this transaction break; } // items matched, continue to next item in prefix (if any) prefixItemIndex++; // no more items in current prefix transaction // continue to next transaction (if any) if (prefixItemIndex >= this[prefixTransactionIndex].NumItems) { // move to next transaction in prefix prefixTransactionIndex++; prefixItemIndex = 0; // no more transactions, DONE! if (prefixTransactionIndex >= this.NumTransactions) { // add the suffix pseudo sequence suffix.Init(pSeq.SequenceIndex, i, k); suffix.PointToNextItem(seq); return(suffix.IsValid); } } } // reset the prefix transaction in the case that // the transaction contains more than 1 item // making a full transaction match necessary if (this[prefixTransactionIndex].NumItems != 1) { prefixTransactionIndex = 0; prefixItemIndex = 0; } } return(false); }
/// <summary> /// Returns the first instance of the prefix (i-1)-th event in sequence. /// </summary> /// <param name="me">PseudoSequence.</param> /// <param name="i">Index of i-th event in the prefix.</param> public static int GetFirstInstance(this PseudoSequence me, int i) { return(me._firstInstances[i]); }