Ejemplo n.º 1
0
 public ISimpleItemset GetItemset(Itemset i)
 {
     //return (Itemset)dualStruct[i];
     return(dualStruct[i.GetItem(0), i.GetItem(1)]);
 }
Ejemplo n.º 2
0
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, int maxGroupLength, MineResults mineResult)
        {
            Itemset    newHead;
            List <int> newTail;

            if (head.Count >= maxLength)
            {
                return;
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset();
                    newHead.AddItem(i);
                    newHead.itemGroup[newHead.Count - 1] = 1;
                    newHead.groupLength = 1;
                    RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult);
                }
                else
                {
                    int currentItemGroup = head.itemGroup[head.Count - 1];
                    for (int loopGroups = 0; loopGroups < 2; loopGroups++)
                    {
                        // Don't try to add the same item to the same group twice
                        if ((loopGroups == 0) && (i < head.GetItem(head.Count - 1)))
                        {
                            continue;
                        }

                        // Don't create too long group
                        if ((loopGroups == 0) && (head.groupLength >= maxGroupLength))
                        {
                            continue;
                        }

                        // Add 0 / 1 depends on the loop
                        int newItemGroup            = currentItemGroup + loopGroups;
                        FastSparseBitArray bitArray = null;

                        // Perform AND with all previous group members - calc support
                        for (int loopHead = 0; loopHead < head.Count; loopHead++)
                        {
                            if (head.itemGroup[loopHead] == newItemGroup - 1)
                            {
                                if (bitArray == null)
                                {
                                    bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions();
                                }
                                else
                                {
                                    bitArray = bitArray.And(_dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions());
                                }
                            }
                        }

                        if (currentItemGroup > 1)
                        {
                            bitArray = bitArray.And(head.GetTransactions());
                        }

                        bool validSupport = false;
                        if (bitArray == null)
                        {
                            validSupport = true;
                        }
                        else if (bitArray.CountElements() >= support)
                        {
                            validSupport = true;
                        }

                        if (validSupport == false)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            break;
                        }
                        else                         //if (validSupport == true)
                        {
                            newTail = new List <int>(tail);
                            newTail.RemoveAt(loopTail);
                            newHead = new Itemset(head);
                            newHead.AddItem(i);
                            newHead.itemGroup[newHead.Count - 1] = newItemGroup;
                            if (loopGroups == 0)
                            {
                                newHead.groupLength = head.groupLength + 1;
                            }
                            else
                            {
                                newHead.groupLength = 1;
                            }

                            newHead.SetTransactions(bitArray);
                            if (bitArray != null)
                            {
                                newHead.support = bitArray.CountElements();
                            }
                            RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult);

                            // Add the new item as 'found' itemset only if 1. it's above minSupport, 2. it contains more then the first group
                            if ((newHead.Count >= minLength) && (newHead.Count > newHead.groupLength))
                            {
                                mineResult.Add(newHead);
                            }

                            newHead.SetTransactions(null);
                        }
                    }
                }
            }
        }
Ejemplo n.º 3
0
        // Mine with skip-errors
        void RecurseMining(Itemset head, List <int> tail, int minSupport, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            Itemset         newHead;
            List <int>      newTail;
            MistakesBitMask mistakes;

            if (head.Count >= maxLength)
            {
                return;
            }

            List <FastSparseBitArray> newSupportVectorBitMask = new List <FastSparseBitArray>();
            List <MistakesBitMask>    newBitMask = new List <MistakesBitMask>();
            List <int> newSupport = new List <int>();

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                mistakes = new MistakesBitMask(maxMistakes);

                FastSparseBitArray bitArray = _dualComp.GetItemset(head.GetLastItem(), i).GetTransactions();
                mistakes.mistakes[0] = bitArray;

                if (head.Count > 1)
                {
                    FastSparseBitArray bitMaskTwoBack = _dualComp.GetItemset(head.GetItem(head.Count - 2), i).GetTransactions();

                    MistakesBitMask existingMistakes = head.GetMistakes();

                    mistakes.mistakes[0] = mistakes.mistakes[0].And(existingMistakes.mistakes[0]);

                    // ************* Build candidate **************
                    // Build the mistakes array
                    for (int loopMistakes = 1; loopMistakes < maxMistakes + 1; loopMistakes++)
                    {
                        if (existingMistakes.mistakes[loopMistakes] != null)
                        {
                            FastSparseBitArray noNewErrors = bitArray.And(existingMistakes.mistakes[loopMistakes]);

                            FastSparseBitArray newError = noNewErrors;                            // = head.GetParent().GetMistakes().mistakes[loopMistakes - 1].And(bitMaskTwoBack);

                            int     upwardCount     = 1;
                            Itemset upwardTraversal = head;
                            upwardTraversal = upwardTraversal.GetParent();
                            while ((upwardTraversal != null) && (loopMistakes - upwardCount >= 0))
                            {
                                FastSparseBitArray bitTIDs = _dualComp.GetItemset(upwardTraversal.GetLastItem(), i).GetTransactions();
                                newError = newError.Or(upwardTraversal.GetMistakes().mistakes[loopMistakes - upwardCount].And(bitTIDs));

                                upwardTraversal = upwardTraversal.GetParent();
                                upwardCount++;
                            }

                            mistakes.mistakes[loopMistakes] = newError;                            //.Or(noNewErrors);
                        }
                        else
                        {
                            FastSparseBitArray bitTIDs;
                            Itemset            ancestor = head;
                            FastSparseBitArray newError = bitArray;
                            while (ancestor.GetParent() != null)
                            {
                                ancestor = ancestor.GetParent();
                                bitTIDs  = _dualComp.GetItemset(ancestor.GetLastItem(), i).GetTransactions();
                                newError = newError.Or(bitTIDs);
                            }

                            mistakes.mistakes[loopMistakes] = newError;                            //.Or(noNewErrors);
                            break;
                        }
                    }
                }

                // *********** Calculate the support *************
                int support = 999999;

                // The support is the Union of the last 'maxMistakes' mistakes
                // vectors, to get the last mistakes vector the algorithm preforms
                // a traversal backword on the last items developed (backtracks the DFS)
                FastSparseBitArray currentSupportVector = mistakes.mistakes[maxMistakes];
                if (currentSupportVector != null)
                {
                    Itemset upwardTraversal = head;
                    for (int upward = 0; upward < maxMistakes; upward++)
                    {
                        if (upwardTraversal == null)
                        {
                            break;
                        }

                        currentSupportVector = currentSupportVector.Or(upwardTraversal.GetMistakes().mistakes[maxMistakes - upward - 1]);
                        upwardTraversal      = upwardTraversal.GetParent();
                    }

                    if (upwardTraversal != null)
                    {
                        support = currentSupportVector.CountElements();
                    }
                }

                if (support >= minSupport)
                {
                    newSupportVectorBitMask.Add(currentSupportVector);
                    newBitMask.Add(mistakes);
                    newSupport.Add(support);
                }
                else                 // Just remove the item as it will not be a memeber later...
                {
                    tail.RemoveAt(loopTail);
                    loopTail--;
                }
            }

            // Do Recurse call
            if (head.Count > 0)
            {
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset(head);
                    newHead.AddItem(tail[loopTail]);
                    newHead.support = newSupport[loopTail];
                    newHead.SetTransactions((Utils.FastSparseBitArray)newSupportVectorBitMask[loopTail]);
                    newHead.SetMistakes((MistakesBitMask)newBitMask[loopTail]);
                    newHead.SetParent(head);

                    RecurseMining(newHead, newTail, minSupport, minLength, maxLength, maxMistakes, mineResult);

                    if (newHead.Count >= minLength)
                    {
                        mineResult.Add(newHead);

                        /*
                         * System.IO.FileStream fs = new System.IO.FileStream("res.txt",
                         *                                                              System.IO.FileMode.Append);
                         * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs);
                         * tw.WriteLine(newHead.ToString());
                         * tw.Close();
                         * fs.Close();
                         */
                    }
                    newHead.SetParent(null);
                    newHead.SetMistakes(null);
                    newHead.SetTransactions(null);
                }
            }
        }