Example #1
0
        /// <summary>
        /// Returns the last in last appearance of the prefix ei event in sequence.
        /// </summary>
        /// <param name="ei">I-th event of the prefix.</param>
        /// <param name="last">Last in last appearance of ei+1 event.</param>
        /// <returns></returns>
        public static int GetLastInLast(this PseudoSequence me, uint[] ei, int last)
        {
            var items = me.RawItems;

            int i = (last == 0 ? items.Length : last) - 1;

            for (; i >= 0; i--)
            {
                if (ei.IsSubsetOf(items[i]))
                {
                    return(i);
                }
            }
            return(0);
        }
Example #2
0
        /// <summary>
        /// Returns the last in first appearance of a given ei event.
        /// </summary>
        /// <param name="i">Index of ei event in prefix.</param>
        /// <param name="ei">I-th event of the prefix.</param>
        /// <param name="last">Last in first appearance of ei+1 event.</param>
        public static int GetLastInFirst(this PseudoSequence me, int i, uint[] ei, int last)
        {
            if (last == 0)
            {
                return(me._firstInstances.Last());
            }

            var items = me.RawItems;

            for (int j = last - 1; j >= 0; j--)
            {
                if (ei.IsSubsetOf(items[j]))
                {
                    return(j);
                }
            }

            return(0);
        }
Example #3
0
    protected void addSequenceToProjection(Sequence sequence)
    {     
        // keeps record of encountered items in the sequence
        // to adding sequence more than once
        HashSet<Item> seen = new HashSet<Item>(Item.EqComp);

        for (int i = 0; i < sequence.NumTransactions; i++)
        {
            for(int k = 0; k < sequence[i].NumItems; k++)
            {
                Item item = sequence[i][k];

                // already processed
                if (seen.Contains(item)) 
                    continue;
                else 
                    seen.Add(item);

                var pSeq = new PseudoSequence();
                pSeq.Init(_nextPseudoSequenceIndex, i, k);

                // point to the next item
                // in the sequence
                pSeq.PointToNextItem(sequence);

                // add pseudo sequence to the list of projections
                // for the current item
                List<PseudoSequence> itemProjection;
                if (!_projections.TryGetValue(item, out itemProjection))
                {
                    itemProjection = new List<PseudoSequence>();
                    _projections.Add(item, itemProjection);
                }

                if (pSeq.IsValid)
                        itemProjection.Add(pSeq);
            }
        }

        _nextPseudoSequenceIndex++;
    }
        private ProjectedDatabase ConstructProjectedDatabase(Sequence prefix, ProjectedDatabase database)
        {
            var prefixLastItem    = prefix.LastItem;
            var projectedDatabase = new ProjectedDatabase(prefix);

            // The count of sequences in a database which dont have to contains prefix
            // and projected database can still contains some sequential pattern.
            var minSequencesInProjectedDb = database.Size - (int)_minSupport;

            for (int sid = 0, lastSid = database.Size; sid < lastSid; sid++)
            {
                // If sequence contains the last item of a prefix, project sequence and add it to db.

                int itemsetIndex;

                if ((itemsetIndex = database[sid].Contains(prefixLastItem)) >= 0)
                {
                    var pseudoSequence = PseudoSequence.MakeProjection(database[sid], prefix, itemsetIndex, false);

                    if (pseudoSequence != null)
                    {
                        projectedDatabase.Add(pseudoSequence);
                        continue;
                    }
                }

                // If projected database can not contains new sequential pattern stop projection sooner.
                if (--minSequencesInProjectedDb >= 0)
                {
                    continue;
                }

                return(null);
            }

            return(projectedDatabase);
        }
        /// <summary>
        /// Checks if a given database contains backward-I-extension event.
        /// </summary>
        private static bool BackwardIExtensionCheck(ProjectedDatabase projectedDatabase)
        {
            var lastInLastAppearances        = new int[projectedDatabase.Count];
            var seqBackwardIExtensionItemset = new HashSet <uint>();

            for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--)
            {
                var ei = projectedDatabase.Prefix[i];

                var ithBackwardIExtensionItemset = default(HashSet <uint>);

                var isIthBackwardIExtensionItemsetEmpty = false;

                for (int sid = 0; sid < projectedDatabase.Count; sid++)
                {
                    PseudoSequence sequence = projectedDatabase[sid];

                    // The i-th I-extensions period of a prefix sequence is defined:
                    // 1) 1 < i <= n: it is the piece of sequence between the end
                    //               of the first instance of prefix e1e2..ei-1
                    //               and the beginning of the first event after the LLi.

                    // 2) i == 1: it is the piece of sequence located before the first event after LL1.

                    int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]);
                    lastInLastAppearances[sid] = lastInLast;

                    // If ith maximum period is empty just continue fill in lastInLastAppearances for the checking  (i-1)-th maximum period.
                    if (isIthBackwardIExtensionItemsetEmpty)
                    {
                        continue;
                    }

                    int firstInstance = 0;
                    if (i != 0)
                    {
                        firstInstance = sequence.GetFirstInstance(i) + 1;
                    }

                    seqBackwardIExtensionItemset.Clear();
                    for (; firstInstance <= lastInLast; firstInstance++)
                    {
                        if (ei.IsSubsetOf(sequence.RawItems[firstInstance]))
                        {
                            seqBackwardIExtensionItemset.UnionWith(sequence.RawItems[firstInstance]);
                        }
                    }
                    seqBackwardIExtensionItemset.ExceptWith(ei);


                    // ScanSkip:
                    if (ithBackwardIExtensionItemset == null)
                    {
                        ithBackwardIExtensionItemset = new HashSet <uint>(seqBackwardIExtensionItemset);
                    }
                    else
                    {
                        ithBackwardIExtensionItemset.IntersectWith(seqBackwardIExtensionItemset);
                    }

                    // If ith maximum period is empty and i==0 there cant exist any backward-I-extension,
                    // otherwise if i>0, try find backward-I-extension in (i-1)-th maximum period.
                    if (ithBackwardIExtensionItemset.Count != 0)
                    {
                        continue;
                    }

                    if (i == 0)
                    {
                        return(false);
                    }

                    isIthBackwardIExtensionItemsetEmpty = true;
                }

                // If all ith maximum periods (in every sequence) contains some items we got a backward-I-extension.
                if (ithBackwardIExtensionItemset.Count > 0)
                {
                    return(true);
                }
            }

            return(false);
        }
        private static bool BackScanI(ProjectedDatabase projectedDatabase)
        {
            var lastInFirstAppearances = new int[projectedDatabase.Count];
            var seqSemiMaximumPeriod   = new HashSet <uint>();
            var n = projectedDatabase.Prefix.Size - 1;

            for (int i = n; i >= 0; i--)
            {
                var ei                   = projectedDatabase.Prefix[i];
                var eiLastItemId         = ei.Last();
                var ithSemiMaximumPeriod = default(HashSet <uint>);

                var isIthSemiMaximumPeriodEmpty = false;

                for (int sid = 0; sid < projectedDatabase.Count; sid++)
                {
                    PseudoSequence sequence = projectedDatabase[sid];

                    int lastInFirst = sequence.GetLastInFirst(i, ei, lastInFirstAppearances[sid]);
                    lastInFirstAppearances[sid] = lastInFirst;

                    // If i-th semimaximum period is empty just continue fill in lastInFirstAppearances for the checking  (i-1)-th semimaximum period.
                    if (isIthSemiMaximumPeriodEmpty)
                    {
                        continue;
                    }

                    int firstInstance = 0;
                    if (i != 0)
                    {
                        firstInstance = sequence.GetFirstInstance(i) + 1;
                    }

                    seqSemiMaximumPeriod.Clear();
                    for (; firstInstance <= lastInFirst; firstInstance++)
                    {
                        if (!ei.IsSubsetOf(sequence.RawItems[firstInstance]))
                        {
                            continue;
                        }

                        seqSemiMaximumPeriod.UnionWith(
                            i == n
              ? sequence.RawItems[firstInstance].Where(a => a < eiLastItemId)
              :  sequence.RawItems[firstInstance]);
                    }
                    seqSemiMaximumPeriod.ExceptWith(ei);

                    // ScanSkip:
                    if (ithSemiMaximumPeriod == null)
                    {
                        ithSemiMaximumPeriod = new HashSet <uint>(seqSemiMaximumPeriod);
                    }
                    else
                    {
                        ithSemiMaximumPeriod.IntersectWith(seqSemiMaximumPeriod);
                    }

                    // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions,
                    // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period.
                    if (ithSemiMaximumPeriod.Count != 0)
                    {
                        continue;
                    }

                    if (i == 0)
                    {
                        return(false);
                    }

                    isIthSemiMaximumPeriodEmpty = true;
                }

                // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension.
                if (ithSemiMaximumPeriod.Count != 0)
                {
                    return(true);
                }
            }

            return(false);
        }
        /// <summary>
        /// Checks if a given database contains backward-S-extension event.
        /// </summary>
        private static bool BackwardSExtensionCheck(ProjectedDatabase projectedDatabase)
        {
            var lastInLastAppearances = new int[projectedDatabase.Count];
            var seqMaximumPeriod      = new HashSet <uint>();

            for (int i = projectedDatabase.Prefix.Size - 1; i >= 0; i--)
            {
                var ei = projectedDatabase.Prefix[i];

                var ithMaximumPeriod = default(HashSet <uint>);

                var isIthMaximumPeriodEmpty = false;

                for (int sid = 0; sid < projectedDatabase.Count; sid++)
                {
                    PseudoSequence sequence = projectedDatabase[sid];

                    // The i-th maximum period of a prefix sequence is defined:
                    // 1) 1 < i <= n: it is the piece of sequence between the end
                    //                of the first instance of prefix e1e2...ei-1 and LLi.

                    // 2) i == 1: it is the piece of sequence located before the first last-in-last appearance (LL1).

                    int lastInLast = sequence.GetLastInLast(ei, lastInLastAppearances[sid]);
                    lastInLastAppearances[sid] = lastInLast;

                    // If i-th maximum period is empty just continue fill in lastInLastAppearances for the checking  (i-1)-th maximum period.
                    if (isIthMaximumPeriodEmpty)
                    {
                        continue;
                    }

                    int firstInstance = 0;
                    if (i != 0)
                    {
                        firstInstance = sequence.GetFirstInstance(i) + 1;
                    }

                    seqMaximumPeriod.Clear();

                    for (; firstInstance < lastInLast; firstInstance++)
                    {
                        seqMaximumPeriod.UnionWith(sequence.RawItems[firstInstance]);
                    }

                    // ScanSkip:
                    if (ithMaximumPeriod == null)
                    {
                        ithMaximumPeriod = new HashSet <uint>(seqMaximumPeriod);
                    }
                    else
                    {
                        ithMaximumPeriod.IntersectWith(seqMaximumPeriod);
                    }

                    // If i-th maximum period is empty and i==0 there cant exist any backward-S-extensions,
                    // otherwise if i>0, try find backward-S-extensions in (i-1)-th maximum period.
                    if (ithMaximumPeriod.Count != 0)
                    {
                        continue;
                    }

                    if (i == 0)
                    {
                        return(false);
                    }

                    isIthMaximumPeriodEmpty = true;
                }

                // If all i-th maximum periods (in every sequence) contains any items, we got a backward-S-extension.
                if (ithMaximumPeriod.Count != 0)
                {
                    return(true);
                }
            }

            return(false);
        }
Example #8
0
    public bool IsPrefixOf(Dictionary <Item, int> fDict, Sequence seq, PseudoSequence pSeq, out PseudoSequence suffix)
    {
        suffix = new PseudoSequence();

        if (seq.IsEmpty)
        {
            return(false);
        }

        if (seq.NumTransactions - pSeq.TransactionIndex < 0)
        {
            return(false);
        }

        // current transaction index in prefix
        var prefixTransactionIndex = 0;

        // current item index in current prefix transaction
        var prefixItemIndex = 0;

        // current transaction index in suffix
        var suffixTransactionIndex = pSeq.TransactionIndex;

        // current item index in current suffix transaction
        var suffixItemIndex = pSeq.ItemIndex;

        // whether or not only last item needs to be matched to form a prefix
        // e.g. prefix=<(abc)> suffix=<(_c)> only item c needs to be checked
        // to validate new prefix
        if (pSeq.ItemIndex != 0 || this.LastTransaction.NumItems == 1)
        {
            // only check last item since prefix already established
            // from previous rounds
            prefixTransactionIndex = this.NumTransactions - 1;
            prefixItemIndex        = this[prefixTransactionIndex].NumItems - 1;

            if (pSeq.ItemIndex != 0 && this.LastTransaction.NumItems == 1)
            {
                suffixTransactionIndex++;
                suffixItemIndex = 0;
            }
        }

        for (int i = suffixTransactionIndex; i < seq.NumTransactions; i++)
        {
            int k = (i == suffixTransactionIndex) ? suffixItemIndex : 0;
            for (; k < seq[i].NumItems; k++)
            {
                // if the current prefix item does not equal suffix item at [i, k]
                if (!this[prefixTransactionIndex][prefixItemIndex].Equals(seq[i][k]))
                {
                    // to form a prefix, all items in the last transaction of the prefix
                    // must be alphabetically AFTER all previous items in the transaction.
                    // The sorting step takes care of that, however, need to check for cases
                    // where two items have the same support making their ordering arbitrary

                    int support;
                    if (pSeq.ItemIndex == 0 || (fDict.TryGetValue(seq[i][k], out support) && support == fDict[this[prefixTransactionIndex][prefixItemIndex]]))
                    {
                        // items have same support, continue
                        continue;
                    }

                    // prefix not found in this transaction
                    break;
                }

                // items matched, continue to next item in prefix (if any)
                prefixItemIndex++;

                // no more items in current prefix transaction
                // continue to next transaction (if any)
                if (prefixItemIndex >= this[prefixTransactionIndex].NumItems)
                {
                    // move to next transaction in prefix
                    prefixTransactionIndex++;
                    prefixItemIndex = 0;

                    // no more transactions, DONE!
                    if (prefixTransactionIndex >= this.NumTransactions)
                    {
                        // add the suffix pseudo sequence
                        suffix.Init(pSeq.SequenceIndex, i, k);
                        suffix.PointToNextItem(seq);

                        return(suffix.IsValid);
                    }
                }
            }

            // reset the prefix transaction in the case that
            // the transaction contains more than 1 item
            // making a full transaction match necessary
            if (this[prefixTransactionIndex].NumItems != 1)
            {
                prefixTransactionIndex = 0;
                prefixItemIndex        = 0;
            }
        }

        return(false);
    }
Example #9
0
 /// <summary>
 /// Returns the first instance of the prefix (i-1)-th event in sequence.
 /// </summary>
 /// <param name="me">PseudoSequence.</param>
 /// <param name="i">Index of i-th event in the prefix.</param>
 public static int GetFirstInstance(this PseudoSequence me, int i)
 {
     return(me._firstInstances[i]);
 }