private ProjectedDatabase ConstructProjectedDatabase(Sequence prefix, ProjectedDatabase sourceProjectedDatabase) { var prefixLastItem = prefix.LastItem; var projectedDatabase = new ProjectedDatabase(prefix); // The count of sequences in a database which dont have to contains prefix // and projected database can still contains some sequential pattern. int minSequencesInProjectedDb = sourceProjectedDatabase.Size - (int)_minSupport; for (int sid = 0, lastSid = sourceProjectedDatabase.Size; sid < lastSid; sid++) { // If sequence contains the last item of a prefix, project sequence and add it to db. int itemsetIndex; if ((itemsetIndex = sourceProjectedDatabase[sid].Contains(prefixLastItem)) >= 0) { var pseudoSequence = PseudoSequence.MakeProjection(sourceProjectedDatabase[sid], prefix, itemsetIndex); if (pseudoSequence != null && pseudoSequence.IsNotEmpty) { projectedDatabase.Add(pseudoSequence); continue; } } // If projected database can not contains new sequential pattern stop projection sooner. if (--minSequencesInProjectedDb < 0) { return(null); } } return(projectedDatabase); }
/// <summary> /// Sets the first instance of prefix in the pseudo sequence. /// </summary> /// <param name="sequence"></param> private void SetFirsInstanceOfPrefix(PseudoSequence sequence) { _firstInstances = new List <int>(sequence._firstInstances); if (_prefix.IsLastItemIExtension) { _firstInstances[_firstInstances.Count - 1] = _isLastItemIExtension ? _offset : _offset - 1; } else { _firstInstances.Add(_isLastItemIExtension ? _offset : _offset - 1); } }
/// <summary> /// Initializes a new instance of the PseudoSequence class. /// </summary> /// <param name="isLastItemIExtension">If the last item of the prefix was append by I-Step. </param> public PseudoSequence(Sequence prefix, PseudoSequence sequence, int offset, bool isLastItemIExtension) { _offset = offset; _prefix = prefix; _items = sequence._items; _isLastItemIExtension = isLastItemIExtension; if (offset < _items.Length) { _firstItemset = _items[offset]; } SetFirsInstanceOfPrefix(sequence); }
/// <summary> /// Initializes a new instance of the PseudoSequence class. /// </summary> /// <param name="isLastItemIExtension">If the last item of the prefix was append by I-Step. </param> public PseudoSequence(Sequence prefix, PseudoSequence sequence, int offset, uint[] firstItemset, bool isLastItemIExtension) : this(prefix, sequence, offset, isLastItemIExtension) { _firstItemset = firstItemset; }
/// <summary> /// Sets the first instance of prefix in the pseudo sequence. /// </summary> /// <param name="sequence"></param> private void SetFirsInstanceOfPrefix(PseudoSequence sequence) { _firstInstances = new List<int>(sequence._firstInstances); if (_prefix.IsLastItemIExtension) { _firstInstances[_firstInstances.Count - 1] = _isLastItemIExtension ? _offset : _offset - 1; } else { _firstInstances.Add(_isLastItemIExtension ? _offset : _offset - 1); } }
/// <summary> /// Creates pseudo-projected sequence from a given sequence, /// the projection will be start at the first occurence of the last prefix item. /// </summary> /// <param name="firstItemsetIndex">An index of the first itemset, where should be started finding occurence of the last prefix item.</param> public static PseudoSequence MakeProjection(PseudoSequence sequence, Sequence prefix, int firstItemsetIndex = 0, bool removeEmpty = true) { // Two types of prefix last item: y and (_y). // Rules for projection: // 1) y: xyz... => z... // 2) y: (xyz)... => (_z)... // 3) (_y): (vyz)... => (_z)... // 4) (_y): (_xyz)... => (_z)... // Where x,z can be zero, one or more items and v one or more items, // and v union y contains all items from the last itemset of prefix. var prefixLastItem = prefix.LastItem; var lastItemsetIndex = sequence.Size - 1; var prefixLastItemset = prefix[prefix.Size - 1]; var isPrefixLastItemsetIExtensions = prefix.IsLastItemIExtension; // Finds last item of prefix and creates projection. for (int tid = firstItemsetIndex, lastTid = sequence.Size; tid < lastTid; tid++) { var isActualItemsetPostfix = (tid == 0 && sequence.IsLastItemIExtension); // Projection is not allow in situation: (x): (_x).... => {} if (!isPrefixLastItemsetIExtensions && isActualItemsetPostfix) continue; var actualItemset = sequence[tid]; var lastItemIndexInActualItemset = actualItemset.Length - 1; // Every item in an actual itemset compare with prefix last item. for (var itemId = 0; itemId < actualItemset.Length; itemId++) { if (actualItemset[itemId] != prefixLastItem) continue; // If is not the end of a sequence makes projection, otherwise stop. if (removeEmpty && tid == lastItemsetIndex && itemId == lastItemIndexInActualItemset) return null; // 1) y: xyz... => z..., // 2) y: (xyz)... => (_z)... if (!isPrefixLastItemsetIExtensions || // OR 4) (_y): (_xyz)... => (_z)... isActualItemsetPostfix || // OR 3) (_y): (vyz)... => (_z)... prefixLastItemset.IsSubsetOf(actualItemset, itemId)) { return itemId == lastItemIndexInActualItemset ? new PseudoSequence(prefix, sequence, sequence._offset + tid + 1, false) : new PseudoSequence(prefix, sequence, sequence._offset + tid, sequence[tid].Slice(itemId + 1), true); } } } return null; }
/// <summary> /// Creates pseudo-projected sequence from a given sequence, /// the projection will be start at the first occurence of the last prefix item. /// </summary> /// <param name="firstItemsetIndex">An index of the first itemset, where should be started finding occurence of the last prefix item.</param> public static PseudoSequence MakeProjection(PseudoSequence sequence, Sequence prefix, int firstItemsetIndex = 0, bool removeEmpty = true) { // Two types of prefix last item: y and (_y). // Rules for projection: // 1) y: xyz... => z... // 2) y: (xyz)... => (_z)... // 3) (_y): (vyz)... => (_z)... // 4) (_y): (_xyz)... => (_z)... // Where x,z can be zero, one or more items and v one or more items, // and v union y contains all items from the last itemset of prefix. var prefixLastItem = prefix.LastItem; var lastItemsetIndex = sequence.Size - 1; var prefixLastItemset = prefix[prefix.Size - 1]; var isPrefixLastItemsetIExtensions = prefix.IsLastItemIExtension; // Finds last item of prefix and creates projection. for (int tid = firstItemsetIndex, lastTid = sequence.Size; tid < lastTid; tid++) { var isActualItemsetPostfix = (tid == 0 && sequence.IsLastItemIExtension); // Projection is not allow in situation: (x): (_x).... => {} if (!isPrefixLastItemsetIExtensions && isActualItemsetPostfix) { continue; } var actualItemset = sequence[tid]; var lastItemIndexInActualItemset = actualItemset.Length - 1; // Every item in an actual itemset compare with prefix last item. for (var itemId = 0; itemId < actualItemset.Length; itemId++) { if (actualItemset[itemId] != prefixLastItem) { continue; } // If is not the end of a sequence makes projection, otherwise stop. if (removeEmpty && tid == lastItemsetIndex && itemId == lastItemIndexInActualItemset) { return(null); } // 1) y: xyz... => z..., // 2) y: (xyz)... => (_z)... if (!isPrefixLastItemsetIExtensions || // OR 4) (_y): (_xyz)... => (_z)... isActualItemsetPostfix || // OR 3) (_y): (vyz)... => (_z)... prefixLastItemset.IsSubsetOf(actualItemset, itemId)) { return(itemId == lastItemIndexInActualItemset ? new PseudoSequence(prefix, sequence, sequence._offset + tid + 1, false) : new PseudoSequence(prefix, sequence, sequence._offset + tid, sequence[tid].Slice(itemId + 1), true)); } } } return(null); }