예제 #1
0
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, MineResults mineResult)
        {
            Itemset    newHead;
            List <int> newTail;

            if (head.Count >= maxLength)
            {
                return;
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset();
                    newHead.AddItem(i);
                    RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult);
                }
                else
                {
                    for (int loopHead = 0; loopHead < head.Count; loopHead++)
                    {
                        for (int loopReverseOrder = 0; loopReverseOrder < 1; loopReverseOrder++)
                        {
                            FastSparseBitArray bitArray;
                            if (loopReverseOrder == 0)
                            {
                                if (head.outDegree[loopHead] >= MAX_OUT_RANK)
                                {
                                    break;
                                }

                                bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions();
                            }
                            else
                            {
                                if (head.inDegree[loopHead] >= MAX_IN_RANK)
                                {
                                    break;
                                }

                                bitArray = _dualComp.GetItemset(i, head.GetItem(loopHead)).GetTransactions();
                            }

                            if (head.Count > 1)
                            {
                                bitArray = bitArray.And(head.GetTransactions());
                            }

                            if (bitArray.CountElements() >= support)
                            {
                                newTail = new List <int>(tail);
                                newTail.RemoveAt(loopTail);
                                newHead = new Itemset(head);
                                newHead.AddItem(i);
                                newHead.SetTransactions(bitArray);
                                newHead.support = bitArray.CountElements();
                                if (loopReverseOrder == 0)
                                {
                                    newHead.inDegree[newHead.Count - 1] = 1;
                                    newHead.outDegree[loopHead]++;
                                    newHead.customStringDisplay += "" + head.GetItem(loopHead) + "=>" + i + "; ";
                                }
                                else
                                {
                                    newHead.inDegree[loopHead]++;
                                    newHead.outDegree[newHead.Count - 1] = 1;
                                    newHead.customStringDisplay         += "" + i + "=>" + head.GetItem(loopHead) + "; ";
                                }

                                RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult);

                                if (newHead.Count >= minLength)
                                {
                                    mineResult.Add(newHead);

                                    /*
                                     * System.IO.FileStream fs = new System.IO.FileStream("res.txt",
                                     *      System.IO.FileMode.Append);
                                     * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs);
                                     * tw.WriteLine(newHead.ToString());
                                     * tw.Close();
                                     * fs.Close();*/
                                }
                                newHead.SetTransactions(null);
                            }
                        }
                    }
                }
            }
        }
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, MineResults mineResult, int maxLayerDiff)
        {
            if (head.Count >= maxLength)
            {
                return;
            }

            List <Candidate> candidates = new List <Candidate>();

            // Find the layer with the minimal number of items
            int minLayerSize = head.layersItemsSum[0];

            for (int loopLayer = 1; loopLayer < head.layersItemsSum.Length; loopLayer++)
            {
                minLayerSize = Math.Min(minLayerSize, head.layersItemsSum[loopLayer]);
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                // TODO : Calculation of the support should be according to the items from the
                // lower and upper layers.
                FastSparseBitArray bitArray = null;
                for (int loop = 0; loop < head.Count; loop++)
                {
                    int headItem = head.GetItem(loop);

                    if (_itemsLayers[headItem] < _itemsLayers[i])
                    {
                        if (bitArray == null)
                        {
                            bitArray = _dualComp.GetItemset(headItem, i).GetTransactions();
                        }
                        else
                        {
                            bitArray = bitArray.And(_dualComp.GetItemset(headItem, i).GetTransactions());
                        }
                    }
                    else if (_itemsLayers[headItem] > _itemsLayers[i])
                    {
                        if (bitArray == null)
                        {
                            bitArray = _dualComp.GetItemset(i, headItem).GetTransactions();
                        }
                        else
                        {
                            bitArray = bitArray.And(_dualComp.GetItemset(i, headItem).GetTransactions());
                        }
                    }
                }

                if (head.GetTransactions() != null)
                {
                    if (bitArray == null)
                    {
                        bitArray = head.GetTransactions();
                    }
                    else
                    {
                        bitArray = bitArray.And(head.GetTransactions());
                    }
                }
                int currentSupport = Int32.MaxValue;
                if (bitArray != null)
                {
                    currentSupport = bitArray.CountElements();
                }

/*
 *                              FastSparseBitArray bitArray = dualComp.GetItemset(head.GetLastItem(), i).GetTransactions();
 *
 *                              if (head.Count > 1)
 *                                      bitArray = bitArray.And(head.GetTransactions());
 *
 *                              int currentSupport = bitArray.CountElements();
 */
                if (currentSupport >= support)
                {
                    Candidate cand = new Candidate();
                    cand.item = i;

                    cand.support = currentSupport;

                    // If the new tail creates and itemset with unbalanced layers
                    // set support to infinity so the itemset will not be traversed
                    if (head.layersItemsSum[_itemsLayers[i]] + 1 > minLayerSize + maxLayerDiff)
                    {
                        cand.support = Int32.MaxValue;
                    }

                    cand.bitArray = bitArray;

                    candidates.Add(cand);
                }
            }

            // Dynamic Reordering
            candidates.Sort();

            // Rebuild tail
            List <int> newTail = new List <int>();

            for (int loop = 0; loop < candidates.Count; loop++)
            {
                newTail.Add(candidates[loop].item);
            }

            Itemset newHead;

            for (int loopTail = 0; loopTail < candidates.Count; loopTail++)
            {
                // Stop recursing when all the tail left are MaxInt (e.g. from the same layer)
                if (candidates[loopTail].support == Int32.MaxValue)
                {
                    break;
                }

                int i = newTail[0];
                newTail.RemoveAt(0);

                newHead = new Itemset(head);
                newHead.AddItem(i);
                newHead.layersItemsSum[_itemsLayers[i]]++;

                FastSparseBitArray bitArray = candidates[loopTail].bitArray;

                newHead.SetTransactions(bitArray);
                newHead.support = candidates[loopTail].support;
                RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult, maxLayerDiff);

                if (newHead.Count >= minLength)
                {
                    mineResult.Add(newHead);
                }

                newHead.SetTransactions(null);
            }
        }
예제 #3
0
        void RecurseMining(List <DFSLevelItem> levelItems, int support, int minLength, int maxLength, MineResults mineResult)
        {
            // Simple pattern cut-off: if the pattern is not long enough, or too long
            for (int i = levelItems.Count - 1; i >= 0; i--)
            {
                if (levelItems[i].Head.Count >= maxLength)
                {
                    levelItems.RemoveAt(i);
                    continue;
                }

                if (levelItems[i].Head.Count + levelItems[i].Tail.Count < minLength)
                {
                    levelItems.RemoveAt(i);
                    continue;
                }
            }

            if (levelItems.Count == 0)
            {
                return;
            }

            Dictionary <int, int> lookAheadPrune = new Dictionary <int, int>();

            for (int levelItemsLoop = 0; levelItemsLoop < levelItems.Count; levelItemsLoop++)
            {
                IntList      tail = levelItems[levelItemsLoop].Tail;
                ItemsetBasic head = levelItems[levelItemsLoop].Head;

                List <FastSparseBitArray> bitArrays = new List <FastSparseBitArray>(tail.Count);
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    FastSparseBitArray bitArray = null;

                    if (_dualComp != null)
                    {
                        ISimpleItemset dualItemset = _dualComp.GetItemset(head.GetLastItem(), i);
                        if (dualItemset == null)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            continue;
                        }

                        bitArray = dualItemset.GetTransactions();
                    }
                    else
                    {
                        bitArray = FastSparseBitArrayPool.Instance.Allocate();
                        _ds.BuildBitVector(head.GetLastItem(), i, bitArray);
                    }

                    if (head.Count > 1)
                    {
                        bitArray = bitArray.And(head.GetTransactions());
                    }

                    bitArray.frequency = bitArray.CountElements();
                    if (bitArray.frequency >= support)
                    {
                        bitArrays.Add(bitArray);

                        // Prune look-ahead
                        if (bitArray.frequency == head.support)
                        {
                            int lookAheadSupport;
                            if (lookAheadPrune.TryGetValue(i, out lookAheadSupport) == false)
                            {
                                lookAheadPrune.Add(i, bitArray.frequency);
                            }
                            else
                            {
                                lookAheadPrune[i] = Math.Max(lookAheadSupport, bitArray.frequency);
                            }
                        }
                    }
                    else
                    {
                        // Don't release bit vectors from O2 matrix, or there is no
                        // O2 matrix
                        if ((head.Count > 1) || (_dualComp == null))
                        {
                            FastSparseBitArrayPool.Instance.Release(bitArray);
                        }

                        tail.RemoveAt(loopTail);
                        loopTail--;
                    }
                }

                levelItems[levelItemsLoop].TailBitArrays = bitArrays;
            }

            for (int levelItemsLoop = 0; levelItemsLoop < levelItems.Count; levelItemsLoop++)
            {
                ItemsetBasic head = levelItems[levelItemsLoop].Head;
                List <FastSparseBitArray> bitArrays = levelItems[levelItemsLoop].TailBitArrays;

                int lookAheadSupport;
                if (lookAheadPrune.TryGetValue(head.GetLastItem(), out lookAheadSupport) == true)
                {
                    if (lookAheadSupport == head.support)
                    {
                        if (head.Count > 2)
                        {
                            for (int j = 0; j < bitArrays.Count; j++)
                            {
                                FastSparseBitArrayPool.Instance.Release(bitArrays[j]);
                            }
                        }
                        continue;
                    }
                }

                IntList tail = levelItems[levelItemsLoop].Tail;

                List <DFSLevelItem> newLevelItems = new List <DFSLevelItem>();
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    IntList newTail = (IntList)tail.Clone();
                    newTail.RemoveAt(loopTail);

                    ItemsetBasic newHead = new ItemsetBasic(head);
                    newHead.AddItem(i);
                    FastSparseBitArray bitArray = bitArrays[loopTail];
                    newHead.SetTransactions(bitArray);
                    newHead.support = bitArray.frequency;

                    if (newHead.Count >= minLength)
                    {
                        mineResult.Add(newHead);
                    }

                    newLevelItems.Add(new DFSLevelItem(newHead, newTail));
                }

                RecurseMining(newLevelItems, support, minLength, maxLength, mineResult);

                // Release IntList
                for (int j = 0; j < newLevelItems.Count; j++)
                {
                    IntListPool.Instance.Release(newLevelItems[j].Tail);
                }

                // Release FastSparseBitArray
                if (head.Count > 2)
                {
                    for (int j = 0; j < bitArrays.Count; j++)
                    {
                        FastSparseBitArrayPool.Instance.Release(bitArrays[j]);
                    }
                }
            }
        }
예제 #4
0
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, int maxGroupLength, MineResults mineResult)
        {
            Itemset    newHead;
            List <int> newTail;

            if (head.Count >= maxLength)
            {
                return;
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset();
                    newHead.AddItem(i);
                    newHead.itemGroup[newHead.Count - 1] = 1;
                    newHead.groupLength = 1;
                    RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult);
                }
                else
                {
                    int currentItemGroup = head.itemGroup[head.Count - 1];
                    for (int loopGroups = 0; loopGroups < 2; loopGroups++)
                    {
                        // Don't try to add the same item to the same group twice
                        if ((loopGroups == 0) && (i < head.GetItem(head.Count - 1)))
                        {
                            continue;
                        }

                        // Don't create too long group
                        if ((loopGroups == 0) && (head.groupLength >= maxGroupLength))
                        {
                            continue;
                        }

                        // Add 0 / 1 depends on the loop
                        int newItemGroup            = currentItemGroup + loopGroups;
                        FastSparseBitArray bitArray = null;

                        // Perform AND with all previous group members - calc support
                        for (int loopHead = 0; loopHead < head.Count; loopHead++)
                        {
                            if (head.itemGroup[loopHead] == newItemGroup - 1)
                            {
                                if (bitArray == null)
                                {
                                    bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions();
                                }
                                else
                                {
                                    bitArray = bitArray.And(_dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions());
                                }
                            }
                        }

                        if (currentItemGroup > 1)
                        {
                            bitArray = bitArray.And(head.GetTransactions());
                        }

                        bool validSupport = false;
                        if (bitArray == null)
                        {
                            validSupport = true;
                        }
                        else if (bitArray.CountElements() >= support)
                        {
                            validSupport = true;
                        }

                        if (validSupport == false)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            break;
                        }
                        else                         //if (validSupport == true)
                        {
                            newTail = new List <int>(tail);
                            newTail.RemoveAt(loopTail);
                            newHead = new Itemset(head);
                            newHead.AddItem(i);
                            newHead.itemGroup[newHead.Count - 1] = newItemGroup;
                            if (loopGroups == 0)
                            {
                                newHead.groupLength = head.groupLength + 1;
                            }
                            else
                            {
                                newHead.groupLength = 1;
                            }

                            newHead.SetTransactions(bitArray);
                            if (bitArray != null)
                            {
                                newHead.support = bitArray.CountElements();
                            }
                            RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult);

                            // Add the new item as 'found' itemset only if 1. it's above minSupport, 2. it contains more then the first group
                            if ((newHead.Count >= minLength) && (newHead.Count > newHead.groupLength))
                            {
                                mineResult.Add(newHead);
                            }

                            newHead.SetTransactions(null);
                        }
                    }
                }
            }
        }
예제 #5
0
        // Mine with skip-errors
        void RecurseMining(Itemset head, List <int> tail, int minSupport, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            Itemset         newHead;
            List <int>      newTail;
            MistakesBitMask mistakes;

            if (head.Count >= maxLength)
            {
                return;
            }

            List <FastSparseBitArray> newSupportVectorBitMask = new List <FastSparseBitArray>();
            List <MistakesBitMask>    newBitMask = new List <MistakesBitMask>();
            List <int> newSupport = new List <int>();

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                mistakes = new MistakesBitMask(maxMistakes);

                FastSparseBitArray bitArray = _dualComp.GetItemset(head.GetLastItem(), i).GetTransactions();
                mistakes.mistakes[0] = bitArray;

                if (head.Count > 1)
                {
                    FastSparseBitArray bitMaskTwoBack = _dualComp.GetItemset(head.GetItem(head.Count - 2), i).GetTransactions();

                    MistakesBitMask existingMistakes = head.GetMistakes();

                    mistakes.mistakes[0] = mistakes.mistakes[0].And(existingMistakes.mistakes[0]);

                    // ************* Build candidate **************
                    // Build the mistakes array
                    for (int loopMistakes = 1; loopMistakes < maxMistakes + 1; loopMistakes++)
                    {
                        if (existingMistakes.mistakes[loopMistakes] != null)
                        {
                            FastSparseBitArray noNewErrors = bitArray.And(existingMistakes.mistakes[loopMistakes]);

                            FastSparseBitArray newError = noNewErrors;                            // = head.GetParent().GetMistakes().mistakes[loopMistakes - 1].And(bitMaskTwoBack);

                            int     upwardCount     = 1;
                            Itemset upwardTraversal = head;
                            upwardTraversal = upwardTraversal.GetParent();
                            while ((upwardTraversal != null) && (loopMistakes - upwardCount >= 0))
                            {
                                FastSparseBitArray bitTIDs = _dualComp.GetItemset(upwardTraversal.GetLastItem(), i).GetTransactions();
                                newError = newError.Or(upwardTraversal.GetMistakes().mistakes[loopMistakes - upwardCount].And(bitTIDs));

                                upwardTraversal = upwardTraversal.GetParent();
                                upwardCount++;
                            }

                            mistakes.mistakes[loopMistakes] = newError;                            //.Or(noNewErrors);
                        }
                        else
                        {
                            FastSparseBitArray bitTIDs;
                            Itemset            ancestor = head;
                            FastSparseBitArray newError = bitArray;
                            while (ancestor.GetParent() != null)
                            {
                                ancestor = ancestor.GetParent();
                                bitTIDs  = _dualComp.GetItemset(ancestor.GetLastItem(), i).GetTransactions();
                                newError = newError.Or(bitTIDs);
                            }

                            mistakes.mistakes[loopMistakes] = newError;                            //.Or(noNewErrors);
                            break;
                        }
                    }
                }

                // *********** Calculate the support *************
                int support = 999999;

                // The support is the Union of the last 'maxMistakes' mistakes
                // vectors, to get the last mistakes vector the algorithm preforms
                // a traversal backword on the last items developed (backtracks the DFS)
                FastSparseBitArray currentSupportVector = mistakes.mistakes[maxMistakes];
                if (currentSupportVector != null)
                {
                    Itemset upwardTraversal = head;
                    for (int upward = 0; upward < maxMistakes; upward++)
                    {
                        if (upwardTraversal == null)
                        {
                            break;
                        }

                        currentSupportVector = currentSupportVector.Or(upwardTraversal.GetMistakes().mistakes[maxMistakes - upward - 1]);
                        upwardTraversal      = upwardTraversal.GetParent();
                    }

                    if (upwardTraversal != null)
                    {
                        support = currentSupportVector.CountElements();
                    }
                }

                if (support >= minSupport)
                {
                    newSupportVectorBitMask.Add(currentSupportVector);
                    newBitMask.Add(mistakes);
                    newSupport.Add(support);
                }
                else                 // Just remove the item as it will not be a memeber later...
                {
                    tail.RemoveAt(loopTail);
                    loopTail--;
                }
            }

            // Do Recurse call
            if (head.Count > 0)
            {
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset(head);
                    newHead.AddItem(tail[loopTail]);
                    newHead.support = newSupport[loopTail];
                    newHead.SetTransactions((Utils.FastSparseBitArray)newSupportVectorBitMask[loopTail]);
                    newHead.SetMistakes((MistakesBitMask)newBitMask[loopTail]);
                    newHead.SetParent(head);

                    RecurseMining(newHead, newTail, minSupport, minLength, maxLength, maxMistakes, mineResult);

                    if (newHead.Count >= minLength)
                    {
                        mineResult.Add(newHead);

                        /*
                         * System.IO.FileStream fs = new System.IO.FileStream("res.txt",
                         *                                                              System.IO.FileMode.Append);
                         * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs);
                         * tw.WriteLine(newHead.ToString());
                         * tw.Close();
                         * fs.Close();
                         */
                    }
                    newHead.SetParent(null);
                    newHead.SetMistakes(null);
                    newHead.SetTransactions(null);
                }
            }
        }
예제 #6
0
        void RecurseMining(ItemsetBasic head, IntList tail, int support, int minLength, int maxLength, MineResults mineResult)
        {
            if (head.Count >= maxLength)
            {
                return;
            }

            if (head.Count + tail.Count < minLength)
            {
                return;
            }

            List <FastSparseBitArray> bitArrays = new List <FastSparseBitArray>(tail.Count);

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    IntList newTail = (IntList)tail.Clone(); // new IntList(tail);
                    newTail.RemoveAt(loopTail);
                    ItemsetBasic newHead = new ItemsetBasic(tail.Count);
                    newHead.AddItem(i);
                    RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult);

                    IntListPool.Instance.Release(newTail);
                }
                else
                {
                    FastSparseBitArray bitArray = null;

                    if (_dualComp != null)
                    {
                        ISimpleItemset dualItemset = _dualComp.GetItemset(head.GetLastItem(), i);
                        if (dualItemset == null)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            continue;
                        }

                        bitArray = dualItemset.GetTransactions();
                    }
                    else
                    {
                        bitArray = FastSparseBitArrayPool.Instance.Allocate();
                        _ds.BuildBitVector(head.GetLastItem(), i, bitArray);
                    }

                    if (head.Count > 1)
                    {
                        bitArray = bitArray.And(head.GetTransactions());
                    }

                    bitArray.frequency = bitArray.CountElements();
                    if (bitArray.frequency >= support)
                    {
                        bitArrays.Add(bitArray);
                    }
                    else
                    {
                        // Don't release bit vectors from O2 matrix, or there is no
                        // O2 matrix
                        if ((head.Count > 1) || (_dualComp == null))
                        {
                            FastSparseBitArrayPool.Instance.Release(bitArray);
                        }

                        tail.RemoveAt(loopTail);
                        loopTail--;
                    }
                }
            }

            if (head.Count > 0)
            {
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    IntList newTail = (IntList)tail.Clone(); // new IntList(tail);
                    newTail.RemoveAt(loopTail);

                    // Create 'head' restore point
                    FastSparseBitArray restoreBitArray = head.GetTransactions();
                    int restoreSupport = head.support;

                    head.AddItem(i);
                    FastSparseBitArray bitArray = bitArrays[loopTail];
                    head.SetTransactions(bitArray);
                    head.support = bitArray.frequency;
                    RecurseMining(head, newTail, support, minLength, maxLength, mineResult);

                    IntListPool.Instance.Release(newTail);

                    if (head.Count >= minLength)
                    {
                        mineResult.Add(head);
                    }

                    head.SetTransactions(null);
                    if (head.Count > 2)
                    {
                        FastSparseBitArrayPool.Instance.Release(bitArray);
                    }

                    // Restore 'head'
                    head.RemoveLastItem();
                    head.SetTransactions(restoreBitArray);
                    head.support = restoreSupport;
                }
            }
        }