Example #1
0
        /// <summary>
        /// the same with the itemset extension ,picku all the sequence node and do sequence extension
        /// </summary>
        /// <returns></returns>
        private SequenceTree <string> SequenceExtension()
        {
            SequenceTree = new SequenceTree <string>(DataSet.NumberOfRows);

            var queue = new Queue <SequenceNode <string> >();
            Sequence <string>     s;
            SequenceNode <string> node;

            // first we insert all the itemset into the sequence tree as the children of the root node
            foreach (var frequentItemset in DataSet.GetItemSILDic())
            {
                s = new Sequence <string>(new Itemset <string>(frequentItemset.Key.Split(' ')), 0);
                var VIL = frequentItemset.Value.GetStartingVIL();
                node = SequenceTree.AddChild(SequenceTree.Root, s, VIL, frequentItemset.Value.Support);
                queue.Enqueue(node);
            }

            // then we pick every node of the root's children then do the the sequence extension
            while (queue.Count != 0)
            {
                node = queue.Dequeue();
                SequenceExtension(SequenceTree, node);
                foreach (var child in node.GetChildren)
                {
                    queue.Enqueue(child);
                }
            }

            return(SequenceTree);
        }
Example #2
0
        /// <summary>
        /// extension a sequence,pick all the VIL of the sequence's brothers(include itself) one by one to do the VIL merge
        /// we need a ListNode[] array to store the new sequence's VIL
        /// after the merge operation, if the new sequence is ferquent, we need to insert the new sequence node(the brother node) to the sequence tree as the child of this sequence node
        /// </summary>
        /// <param name="sequenceTree"></param>
        /// <param name="node"></param>
        private void SequenceExtension(SequenceTree <string> sequenceTree, SequenceNode <string> node)
        {
            // to stiore the support
            var count = 0;

            //to store the new sequence VIL
            ListNode[] newPositionList;
            ListNode   listNode, listBrotherNode;
            // pick up the VIL of this Node
            var nodeVIL = node.VerticalIdList;
            //store those ListNode which have the same relative position  ,first int is the relative position, and the second int is the sequenceID
            //used to create the VIL of the new sequence.
            var positionDic = new Dictionary <int, Dictionary <int, ListNode> >();
            // used to store the VIL of its brothers
            VerticalIdList brotherVIL;
            // in this way we get all the node of its brother and itself
            var brothers = node.Parent.GetChildren;

            foreach (var brotherNode in brothers)
            {
                //init it
                newPositionList = new ListNode[nodeVIL.Elements.Length];
                //clear the positionDic
                positionDic.Clear();
                //this is a point should be noticed, we may have two or more sequences have the same itemset
                //but they have different relative position , so when we they are brother node and wo do sequence extension
                //we may do the many times extension to add the same itemset , so we shoul avoid this situation
                if (brotherNode != node && brotherNode.Sequence.GetLastItemset() == node.Sequence.GetLastItemset())
                {
                    continue;
                    ;
                }
                //pick up the VIL of the brother Node
                brotherVIL = brotherNode.VerticalIdList;
                for (int i = 0; i < nodeVIL.Elements.Length; i++)
                {
                    listNode        = nodeVIL.Elements[i];
                    listBrotherNode = brotherVIL.Elements[i];
                    // if this List node is null or its brother is null just ignore it
                    if ((listNode == null) || (listBrotherNode == null))
                    {
                        continue;
                        ;
                    }

                    // find the node which listBrotherNode SID bigger than listNode SID
                    while ((listBrotherNode != null) && (listNode.GetSparseId >= listBrotherNode.GetSparseId))
                    {
                        listBrotherNode = listBrotherNode.GetNext;
                    }

                    // store all the back node with the relative position in a dictionary(relativeposition, dictionary(sequenceID, ListNode))
                    while (listBrotherNode != null)
                    {
                        var relativePosition = listBrotherNode.GetSparseId - listNode.GetSparseId;
                        if (positionDic.ContainsKey(relativePosition))
                        {
                            positionDic[relativePosition].Add(i, listBrotherNode);
                        }
                        else
                        {
                            positionDic.Add(relativePosition, new Dictionary <int, ListNode>());
                            positionDic[relativePosition].Add(i, listBrotherNode);
                        }

                        listBrotherNode = listBrotherNode.GetNext;
                    }
                }
                // if the have the relative position sequence if frequent, create its VIL and then insert in to the tree with its VIL
                foreach (var keyValue in positionDic)
                {
                    if (keyValue.Value.Count >= DataSet.MinSupport)
                    {
                        foreach (var sequenceIdWithListNode in keyValue.Value)
                        {
                            newPositionList[sequenceIdWithListNode.Key] = sequenceIdWithListNode.Value;
                        }
                        var sequence = node.Sequence.Clone();
//                        sequence.AddItemset(brotherNode.Sequence.GetLastItemset());
                        sequence.AddItemsetWithRelativePosition(brotherNode.Sequence.GetLastItemset(), keyValue.Key);
                        sequenceTree.AddChild(node, sequence, new VerticalIdList(newPositionList, keyValue.Value.Count),
                                              keyValue.Value.Count);
                    }
                }
            }
        }