Пример #1
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Form1 form = new Form1();
                form.ShowDialog();
            }
            else
            {
                MinerParams minerParams = null;

                try
                {
                    minerParams = MinerParams.Parse(args);

                    DateTime    timeBefore = DateTime.Now;
                    MineResults mineResult = null;

                    mineResult = DoMine(minerParams);

                    if (mineResult != null)
                    {
                        try
                        {
                            System.Console.WriteLine("Mining time : " + (DateTime.Now - timeBefore));
                            System.Console.WriteLine("Total found " + mineResult.Count.ToString() + " patterns");
                            System.Console.WriteLine(mineResult.ToString());
                        }
                        catch (Exception ex)
                        {
                            MessageBox.Show("Exception during writing result: " + ex.ToString(), "Exception",
                                            MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                        }
                        finally
                        {
                            mineResult.Dispose();
                            mineResult = null;
                        }
                    }
                }
                catch (ArgumentException ex)
                {
                    System.Console.WriteLine("Failed to parse command line argument: " + ex.Message);
                    System.Console.WriteLine();
                    System.Console.WriteLine(MinerParams.Usage());
                }
            }
        }
Пример #2
0
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, MineResults mineResult)
        {
            Itemset    newHead;
            List <int> newTail;

            if (head.Count >= maxLength)
            {
                return;
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset();
                    newHead.AddItem(i);
                    RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult);
                }
                else
                {
                    for (int loopHead = 0; loopHead < head.Count; loopHead++)
                    {
                        for (int loopReverseOrder = 0; loopReverseOrder < 1; loopReverseOrder++)
                        {
                            FastSparseBitArray bitArray;
                            if (loopReverseOrder == 0)
                            {
                                if (head.outDegree[loopHead] >= MAX_OUT_RANK)
                                {
                                    break;
                                }

                                bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions();
                            }
                            else
                            {
                                if (head.inDegree[loopHead] >= MAX_IN_RANK)
                                {
                                    break;
                                }

                                bitArray = _dualComp.GetItemset(i, head.GetItem(loopHead)).GetTransactions();
                            }

                            if (head.Count > 1)
                            {
                                bitArray = bitArray.And(head.GetTransactions());
                            }

                            if (bitArray.CountElements() >= support)
                            {
                                newTail = new List <int>(tail);
                                newTail.RemoveAt(loopTail);
                                newHead = new Itemset(head);
                                newHead.AddItem(i);
                                newHead.SetTransactions(bitArray);
                                newHead.support = bitArray.CountElements();
                                if (loopReverseOrder == 0)
                                {
                                    newHead.inDegree[newHead.Count - 1] = 1;
                                    newHead.outDegree[loopHead]++;
                                    newHead.customStringDisplay += "" + head.GetItem(loopHead) + "=>" + i + "; ";
                                }
                                else
                                {
                                    newHead.inDegree[loopHead]++;
                                    newHead.outDegree[newHead.Count - 1] = 1;
                                    newHead.customStringDisplay         += "" + i + "=>" + head.GetItem(loopHead) + "; ";
                                }

                                RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult);

                                if (newHead.Count >= minLength)
                                {
                                    mineResult.Add(newHead);

                                    /*
                                     * System.IO.FileStream fs = new System.IO.FileStream("res.txt",
                                     *      System.IO.FileMode.Append);
                                     * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs);
                                     * tw.WriteLine(newHead.ToString());
                                     * tw.Close();
                                     * fs.Close();*/
                                }
                                newHead.SetTransactions(null);
                            }
                        }
                    }
                }
            }
        }
Пример #3
0
        override public void Mine(int support, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            Itemset    head = new Itemset();
            List <int> tail = new List <int>();

            for (int loop = 0; loop < _ds.GetColumnCount(); loop++)
            {
                tail.Add(loop);
            }

            RecurseMining(head, tail, support, minLength, maxLength, mineResult);
        }
        override public void Mine(int support, int minLength, int maxLength, int maxLayerDiff, MineResults mineResult)
        {
            List <int> tail = new List <int>();

            int numberOfLayers = 0;

            for (int loopLayers = 0; loopLayers < _itemsLayers.Length; loopLayers++)
            {
                numberOfLayers = Math.Max(numberOfLayers, _itemsLayers[loopLayers]);
            }
            numberOfLayers++;
            Itemset.numberOfLayers = numberOfLayers;             // Set number of layers

            for (int loop = 0; loop < _ds.GetColumnCount(); loop++)
            {
                tail.Add(loop);
            }

            // TODO : Sort the tail in some smart way...
            Itemset head;

            while (tail.Count > 0)
            {
                head = new Itemset();
                head.AddItem(tail[0]);
                head.layersItemsSum[_itemsLayers[tail[0]]]++;

                tail.RemoveAt(0);

                RecurseMining(head, tail, support, minLength, maxLength, mineResult, maxLayerDiff);
            }

            Itemset.numberOfLayers = -1;
        }
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, MineResults mineResult, int maxLayerDiff)
        {
            if (head.Count >= maxLength)
            {
                return;
            }

            List <Candidate> candidates = new List <Candidate>();

            // Find the layer with the minimal number of items
            int minLayerSize = head.layersItemsSum[0];

            for (int loopLayer = 1; loopLayer < head.layersItemsSum.Length; loopLayer++)
            {
                minLayerSize = Math.Min(minLayerSize, head.layersItemsSum[loopLayer]);
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                // TODO : Calculation of the support should be according to the items from the
                // lower and upper layers.
                FastSparseBitArray bitArray = null;
                for (int loop = 0; loop < head.Count; loop++)
                {
                    int headItem = head.GetItem(loop);

                    if (_itemsLayers[headItem] < _itemsLayers[i])
                    {
                        if (bitArray == null)
                        {
                            bitArray = _dualComp.GetItemset(headItem, i).GetTransactions();
                        }
                        else
                        {
                            bitArray = bitArray.And(_dualComp.GetItemset(headItem, i).GetTransactions());
                        }
                    }
                    else if (_itemsLayers[headItem] > _itemsLayers[i])
                    {
                        if (bitArray == null)
                        {
                            bitArray = _dualComp.GetItemset(i, headItem).GetTransactions();
                        }
                        else
                        {
                            bitArray = bitArray.And(_dualComp.GetItemset(i, headItem).GetTransactions());
                        }
                    }
                }

                if (head.GetTransactions() != null)
                {
                    if (bitArray == null)
                    {
                        bitArray = head.GetTransactions();
                    }
                    else
                    {
                        bitArray = bitArray.And(head.GetTransactions());
                    }
                }
                int currentSupport = Int32.MaxValue;
                if (bitArray != null)
                {
                    currentSupport = bitArray.CountElements();
                }

/*
 *                              FastSparseBitArray bitArray = dualComp.GetItemset(head.GetLastItem(), i).GetTransactions();
 *
 *                              if (head.Count > 1)
 *                                      bitArray = bitArray.And(head.GetTransactions());
 *
 *                              int currentSupport = bitArray.CountElements();
 */
                if (currentSupport >= support)
                {
                    Candidate cand = new Candidate();
                    cand.item = i;

                    cand.support = currentSupport;

                    // If the new tail creates and itemset with unbalanced layers
                    // set support to infinity so the itemset will not be traversed
                    if (head.layersItemsSum[_itemsLayers[i]] + 1 > minLayerSize + maxLayerDiff)
                    {
                        cand.support = Int32.MaxValue;
                    }

                    cand.bitArray = bitArray;

                    candidates.Add(cand);
                }
            }

            // Dynamic Reordering
            candidates.Sort();

            // Rebuild tail
            List <int> newTail = new List <int>();

            for (int loop = 0; loop < candidates.Count; loop++)
            {
                newTail.Add(candidates[loop].item);
            }

            Itemset newHead;

            for (int loopTail = 0; loopTail < candidates.Count; loopTail++)
            {
                // Stop recursing when all the tail left are MaxInt (e.g. from the same layer)
                if (candidates[loopTail].support == Int32.MaxValue)
                {
                    break;
                }

                int i = newTail[0];
                newTail.RemoveAt(0);

                newHead = new Itemset(head);
                newHead.AddItem(i);
                newHead.layersItemsSum[_itemsLayers[i]]++;

                FastSparseBitArray bitArray = candidates[loopTail].bitArray;

                newHead.SetTransactions(bitArray);
                newHead.support = candidates[loopTail].support;
                RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult, maxLayerDiff);

                if (newHead.Count >= minLength)
                {
                    mineResult.Add(newHead);
                }

                newHead.SetTransactions(null);
            }
        }
Пример #6
0
        void RecurseMining(List <DFSLevelItem> levelItems, int support, int minLength, int maxLength, MineResults mineResult)
        {
            // Simple pattern cut-off: if the pattern is not long enough, or too long
            for (int i = levelItems.Count - 1; i >= 0; i--)
            {
                if (levelItems[i].Head.Count >= maxLength)
                {
                    levelItems.RemoveAt(i);
                    continue;
                }

                if (levelItems[i].Head.Count + levelItems[i].Tail.Count < minLength)
                {
                    levelItems.RemoveAt(i);
                    continue;
                }
            }

            if (levelItems.Count == 0)
            {
                return;
            }

            Dictionary <int, int> lookAheadPrune = new Dictionary <int, int>();

            for (int levelItemsLoop = 0; levelItemsLoop < levelItems.Count; levelItemsLoop++)
            {
                IntList      tail = levelItems[levelItemsLoop].Tail;
                ItemsetBasic head = levelItems[levelItemsLoop].Head;

                List <FastSparseBitArray> bitArrays = new List <FastSparseBitArray>(tail.Count);
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    FastSparseBitArray bitArray = null;

                    if (_dualComp != null)
                    {
                        ISimpleItemset dualItemset = _dualComp.GetItemset(head.GetLastItem(), i);
                        if (dualItemset == null)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            continue;
                        }

                        bitArray = dualItemset.GetTransactions();
                    }
                    else
                    {
                        bitArray = FastSparseBitArrayPool.Instance.Allocate();
                        _ds.BuildBitVector(head.GetLastItem(), i, bitArray);
                    }

                    if (head.Count > 1)
                    {
                        bitArray = bitArray.And(head.GetTransactions());
                    }

                    bitArray.frequency = bitArray.CountElements();
                    if (bitArray.frequency >= support)
                    {
                        bitArrays.Add(bitArray);

                        // Prune look-ahead
                        if (bitArray.frequency == head.support)
                        {
                            int lookAheadSupport;
                            if (lookAheadPrune.TryGetValue(i, out lookAheadSupport) == false)
                            {
                                lookAheadPrune.Add(i, bitArray.frequency);
                            }
                            else
                            {
                                lookAheadPrune[i] = Math.Max(lookAheadSupport, bitArray.frequency);
                            }
                        }
                    }
                    else
                    {
                        // Don't release bit vectors from O2 matrix, or there is no
                        // O2 matrix
                        if ((head.Count > 1) || (_dualComp == null))
                        {
                            FastSparseBitArrayPool.Instance.Release(bitArray);
                        }

                        tail.RemoveAt(loopTail);
                        loopTail--;
                    }
                }

                levelItems[levelItemsLoop].TailBitArrays = bitArrays;
            }

            for (int levelItemsLoop = 0; levelItemsLoop < levelItems.Count; levelItemsLoop++)
            {
                ItemsetBasic head = levelItems[levelItemsLoop].Head;
                List <FastSparseBitArray> bitArrays = levelItems[levelItemsLoop].TailBitArrays;

                int lookAheadSupport;
                if (lookAheadPrune.TryGetValue(head.GetLastItem(), out lookAheadSupport) == true)
                {
                    if (lookAheadSupport == head.support)
                    {
                        if (head.Count > 2)
                        {
                            for (int j = 0; j < bitArrays.Count; j++)
                            {
                                FastSparseBitArrayPool.Instance.Release(bitArrays[j]);
                            }
                        }
                        continue;
                    }
                }

                IntList tail = levelItems[levelItemsLoop].Tail;

                List <DFSLevelItem> newLevelItems = new List <DFSLevelItem>();
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    IntList newTail = (IntList)tail.Clone();
                    newTail.RemoveAt(loopTail);

                    ItemsetBasic newHead = new ItemsetBasic(head);
                    newHead.AddItem(i);
                    FastSparseBitArray bitArray = bitArrays[loopTail];
                    newHead.SetTransactions(bitArray);
                    newHead.support = bitArray.frequency;

                    if (newHead.Count >= minLength)
                    {
                        mineResult.Add(newHead);
                    }

                    newLevelItems.Add(new DFSLevelItem(newHead, newTail));
                }

                RecurseMining(newLevelItems, support, minLength, maxLength, mineResult);

                // Release IntList
                for (int j = 0; j < newLevelItems.Count; j++)
                {
                    IntListPool.Instance.Release(newLevelItems[j].Tail);
                }

                // Release FastSparseBitArray
                if (head.Count > 2)
                {
                    for (int j = 0; j < bitArrays.Count; j++)
                    {
                        FastSparseBitArrayPool.Instance.Release(bitArrays[j]);
                    }
                }
            }
        }
Пример #7
0
        override public void Mine(int support, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            IntListPool.Instance.Clear();

            ItemsetBasic head = new ItemsetBasic(_ds.GetColumnCount());
            IntList      tail = new IntList(_ds.GetColumnCount());

            // Build all items tail
            for (int loop = 0; loop < _ds.GetColumnCount(); loop++)
            {
                tail.Add(loop);
            }

            List <DFSLevelItem> levelItems = new List <DFSLevelItem>();

            // Build first level look-ahead
            for (int loop = 0; loop < tail.Count; loop++)
            {
                int i = tail[loop];

                IntList newTail = (IntList)tail.Clone();
                newTail.RemoveAt(loop);

                ItemsetBasic newHead = new ItemsetBasic(tail.Count);
                newHead.AddItem(i);
                newHead.support = _ds.RowCount;

                levelItems.Add(new DFSLevelItem(newHead, newTail));
            }

            RecurseMining(levelItems, support, minLength, maxLength, mineResult);
        }
Пример #8
0
        private void buttonMine_Click(object sender, System.EventArgs e)
        {
            MinerParams minerParams = new MinerParams();

            if (textBoxDatasetPath.Text == "")
            {
                MessageBox.Show("No dataset selected", "Dataset Error",
                                MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                return;
            }

            minerParams.DatasetFileName            = textBoxDatasetPath.Text;
            minerParams.InputContainsColumnHeaders = this.checkBoxInputColumnHeaders.Checked;
            minerParams.InputContainsRowHeaders    = this.checkBoxInputRowHeaders.Checked;

            minerParams.WriteOutputFiles = this.checkBoxWriteOutput.Checked;
            minerParams.WriteAllResults  = this.checkBoxWriteAllResults.Checked;

            minerParams.MinSupport = Int32.Parse(textBoxSupport.Text);
            minerParams.MinLength  = Int32.Parse(textBoxMinLength.Text);
            minerParams.MaxLength  = Int32.Parse(textBoxMaxLength.Text);

            minerParams.MaxErrors    = Int32.Parse(textBoxMaxErrors.Text);
            minerParams.MinGroups    = Int32.Parse(textBoxMinGroups.Text);
            minerParams.MaxLayerDiff = Int32.Parse(textBoxMaxLayerDiff.Text);

            minerParams.InCoreDualCompare = this.checkBoxUseInCoreDualCompare.Checked;

            if (radioButtonOPSM.Checked == true)
            {
                minerParams.Algorithm = Algorithm.OPSM;
            }
            else if (radioButtonTreePattern.Checked == true)
            {
                minerParams.Algorithm = Algorithm.TreePattern;
            }
            else if (radioButtonWithError.Checked == true)
            {
                minerParams.Algorithm = Algorithm.WithErrors;
            }
            else if (radioButtonGroups.Checked == true)
            {
                minerParams.Algorithm = Algorithm.Groups;
            }
            else if (radioButtonLayers.Checked == true)
            {
                minerParams.Algorithm = Algorithm.Layers;
            }
            else
            {
                throw new ArgumentException("Unsupported algorithm");
            }

            buttonMine.Enabled = false;

            DateTime    timeBefore = DateTime.Now;
            MineResults mineResult = null;

            richTextBoxResults.Clear();

            mineResult = OPSMMain.DoMine(minerParams);

            if (mineResult != null)
            {
                try
                {
                    richTextBoxResults.AppendText("Mining time : " + (DateTime.Now - timeBefore) + "\n");
                    richTextBoxResults.AppendText("Total found " + mineResult.Count.ToString() + " patterns\n");
                    richTextBoxResults.AppendText(mineResult.ToString());
                }
                catch (Exception ex)
                {
                    MessageBox.Show("Exception during writing result: " + ex.ToString(), "Exception",
                                    MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                }
                finally
                {
                    mineResult.Dispose();
                    mineResult = null;
                }
            }

            buttonMine.Enabled = true;
        }
Пример #9
0
 abstract public void Mine(int support, int minLength, int maxLength, int maxMistakes, MineResults mineResults);
Пример #10
0
        void RecurseMining(Itemset head, List <int> tail, int support, int minLength, int maxLength, int maxGroupLength, MineResults mineResult)
        {
            Itemset    newHead;
            List <int> newTail;

            if (head.Count >= maxLength)
            {
                return;
            }

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset();
                    newHead.AddItem(i);
                    newHead.itemGroup[newHead.Count - 1] = 1;
                    newHead.groupLength = 1;
                    RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult);
                }
                else
                {
                    int currentItemGroup = head.itemGroup[head.Count - 1];
                    for (int loopGroups = 0; loopGroups < 2; loopGroups++)
                    {
                        // Don't try to add the same item to the same group twice
                        if ((loopGroups == 0) && (i < head.GetItem(head.Count - 1)))
                        {
                            continue;
                        }

                        // Don't create too long group
                        if ((loopGroups == 0) && (head.groupLength >= maxGroupLength))
                        {
                            continue;
                        }

                        // Add 0 / 1 depends on the loop
                        int newItemGroup            = currentItemGroup + loopGroups;
                        FastSparseBitArray bitArray = null;

                        // Perform AND with all previous group members - calc support
                        for (int loopHead = 0; loopHead < head.Count; loopHead++)
                        {
                            if (head.itemGroup[loopHead] == newItemGroup - 1)
                            {
                                if (bitArray == null)
                                {
                                    bitArray = _dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions();
                                }
                                else
                                {
                                    bitArray = bitArray.And(_dualComp.GetItemset(head.GetItem(loopHead), i).GetTransactions());
                                }
                            }
                        }

                        if (currentItemGroup > 1)
                        {
                            bitArray = bitArray.And(head.GetTransactions());
                        }

                        bool validSupport = false;
                        if (bitArray == null)
                        {
                            validSupport = true;
                        }
                        else if (bitArray.CountElements() >= support)
                        {
                            validSupport = true;
                        }

                        if (validSupport == false)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            break;
                        }
                        else                         //if (validSupport == true)
                        {
                            newTail = new List <int>(tail);
                            newTail.RemoveAt(loopTail);
                            newHead = new Itemset(head);
                            newHead.AddItem(i);
                            newHead.itemGroup[newHead.Count - 1] = newItemGroup;
                            if (loopGroups == 0)
                            {
                                newHead.groupLength = head.groupLength + 1;
                            }
                            else
                            {
                                newHead.groupLength = 1;
                            }

                            newHead.SetTransactions(bitArray);
                            if (bitArray != null)
                            {
                                newHead.support = bitArray.CountElements();
                            }
                            RecurseMining(newHead, newTail, support, minLength, maxLength, maxGroupLength, mineResult);

                            // Add the new item as 'found' itemset only if 1. it's above minSupport, 2. it contains more then the first group
                            if ((newHead.Count >= minLength) && (newHead.Count > newHead.groupLength))
                            {
                                mineResult.Add(newHead);
                            }

                            newHead.SetTransactions(null);
                        }
                    }
                }
            }
        }
Пример #11
0
        public static MineResults DoMine(MinerParams minerParams)
        {
            int parameter1 = 0;

            OPSM.Dataset dataset    = null;
            MineResults  mineResult = null;

            try
            {
                dataset = new Dataset(
                    minerParams.DatasetFileName,
                    minerParams.InputContainsColumnHeaders,
                    minerParams.InputContainsRowHeaders);
            }
            catch (Exception ex)
            {
                MessageBox.Show("Failed to load dataset: " + ex.Message, "Exception",
                                MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
            }

            if (dataset != null)
            {
                try
                {
                    if (minerParams.Algorithm == Algorithm.WithErrors)
                    {
                        parameter1 = minerParams.MaxErrors;
                    }
                    else if (minerParams.Algorithm == Algorithm.Groups)
                    {
                        parameter1 = minerParams.MinGroups;
                    }
                    else if (minerParams.Algorithm == Algorithm.Layers)
                    {
                        parameter1 = minerParams.MaxLayerDiff;
                    }

                    OPSM.DualCompare dualCompare = null;
                    int dualSupport = -1;
                    if (minerParams.Algorithm == Algorithm.OPSM)
                    {
                        dualSupport = minerParams.MinSupport;
                    }

                    if (minerParams.InCoreDualCompare == true)
                    {
                        dualCompare = new DualCompare(dataset, dualSupport);
                    }
                    OPSM.Miner dfsMiner = null;

                    if (minerParams.Algorithm == Algorithm.OPSM)
                    {
                        dfsMiner = new DFSLookAheadMiner(dataset, dualCompare);
                    }
                    else if (minerParams.Algorithm == Algorithm.TreePattern)
                    {
                        dfsMiner = new DFSPatternMiner(dataset, dualCompare);
                    }
                    else if (minerParams.Algorithm == Algorithm.WithErrors)
                    {
                        dfsMiner = new DFSErrorMiner(dataset, dualCompare);
                    }
                    else if (minerParams.Algorithm == Algorithm.Groups)
                    {
                        dfsMiner = new DFSGroupsMiner(dataset, dualCompare);
                    }
                    else if (minerParams.Algorithm == Algorithm.Layers)
                    {
                        dfsMiner = new DFSLayerMiner(dataset, dualCompare, minerParams.DatasetFileName);
                    }

                    string targetDirectory = minerParams.OutputDirectory;

                    mineResult = new MineResults(
                        15,
                        targetDirectory,
                        minerParams.WriteAllResults && minerParams.WriteOutputFiles);

                    dfsMiner.Mine(minerParams.MinSupport, minerParams.MinLength, minerParams.MaxLength, parameter1, mineResult);

                    if (mineResult != null)
                    {
                        mineResult.WriteResults(dataset);
                    }
                }
                catch (Exception ex)
                {
                    mineResult = null;
                    MessageBox.Show("Exception during mining: " + ex.ToString(), "Exception",
                                    MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
                }
            }

            return(mineResult);
        }
Пример #12
0
        // Mine with skip-errors
        void RecurseMining(Itemset head, List <int> tail, int minSupport, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            Itemset         newHead;
            List <int>      newTail;
            MistakesBitMask mistakes;

            if (head.Count >= maxLength)
            {
                return;
            }

            List <FastSparseBitArray> newSupportVectorBitMask = new List <FastSparseBitArray>();
            List <MistakesBitMask>    newBitMask = new List <MistakesBitMask>();
            List <int> newSupport = new List <int>();

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                mistakes = new MistakesBitMask(maxMistakes);

                FastSparseBitArray bitArray = _dualComp.GetItemset(head.GetLastItem(), i).GetTransactions();
                mistakes.mistakes[0] = bitArray;

                if (head.Count > 1)
                {
                    FastSparseBitArray bitMaskTwoBack = _dualComp.GetItemset(head.GetItem(head.Count - 2), i).GetTransactions();

                    MistakesBitMask existingMistakes = head.GetMistakes();

                    mistakes.mistakes[0] = mistakes.mistakes[0].And(existingMistakes.mistakes[0]);

                    // ************* Build candidate **************
                    // Build the mistakes array
                    for (int loopMistakes = 1; loopMistakes < maxMistakes + 1; loopMistakes++)
                    {
                        if (existingMistakes.mistakes[loopMistakes] != null)
                        {
                            FastSparseBitArray noNewErrors = bitArray.And(existingMistakes.mistakes[loopMistakes]);

                            FastSparseBitArray newError = noNewErrors;                            // = head.GetParent().GetMistakes().mistakes[loopMistakes - 1].And(bitMaskTwoBack);

                            int     upwardCount     = 1;
                            Itemset upwardTraversal = head;
                            upwardTraversal = upwardTraversal.GetParent();
                            while ((upwardTraversal != null) && (loopMistakes - upwardCount >= 0))
                            {
                                FastSparseBitArray bitTIDs = _dualComp.GetItemset(upwardTraversal.GetLastItem(), i).GetTransactions();
                                newError = newError.Or(upwardTraversal.GetMistakes().mistakes[loopMistakes - upwardCount].And(bitTIDs));

                                upwardTraversal = upwardTraversal.GetParent();
                                upwardCount++;
                            }

                            mistakes.mistakes[loopMistakes] = newError;                            //.Or(noNewErrors);
                        }
                        else
                        {
                            FastSparseBitArray bitTIDs;
                            Itemset            ancestor = head;
                            FastSparseBitArray newError = bitArray;
                            while (ancestor.GetParent() != null)
                            {
                                ancestor = ancestor.GetParent();
                                bitTIDs  = _dualComp.GetItemset(ancestor.GetLastItem(), i).GetTransactions();
                                newError = newError.Or(bitTIDs);
                            }

                            mistakes.mistakes[loopMistakes] = newError;                            //.Or(noNewErrors);
                            break;
                        }
                    }
                }

                // *********** Calculate the support *************
                int support = 999999;

                // The support is the Union of the last 'maxMistakes' mistakes
                // vectors, to get the last mistakes vector the algorithm preforms
                // a traversal backword on the last items developed (backtracks the DFS)
                FastSparseBitArray currentSupportVector = mistakes.mistakes[maxMistakes];
                if (currentSupportVector != null)
                {
                    Itemset upwardTraversal = head;
                    for (int upward = 0; upward < maxMistakes; upward++)
                    {
                        if (upwardTraversal == null)
                        {
                            break;
                        }

                        currentSupportVector = currentSupportVector.Or(upwardTraversal.GetMistakes().mistakes[maxMistakes - upward - 1]);
                        upwardTraversal      = upwardTraversal.GetParent();
                    }

                    if (upwardTraversal != null)
                    {
                        support = currentSupportVector.CountElements();
                    }
                }

                if (support >= minSupport)
                {
                    newSupportVectorBitMask.Add(currentSupportVector);
                    newBitMask.Add(mistakes);
                    newSupport.Add(support);
                }
                else                 // Just remove the item as it will not be a memeber later...
                {
                    tail.RemoveAt(loopTail);
                    loopTail--;
                }
            }

            // Do Recurse call
            if (head.Count > 0)
            {
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    newTail = new List <int>(tail);
                    newTail.RemoveAt(loopTail);
                    newHead = new Itemset(head);
                    newHead.AddItem(tail[loopTail]);
                    newHead.support = newSupport[loopTail];
                    newHead.SetTransactions((Utils.FastSparseBitArray)newSupportVectorBitMask[loopTail]);
                    newHead.SetMistakes((MistakesBitMask)newBitMask[loopTail]);
                    newHead.SetParent(head);

                    RecurseMining(newHead, newTail, minSupport, minLength, maxLength, maxMistakes, mineResult);

                    if (newHead.Count >= minLength)
                    {
                        mineResult.Add(newHead);

                        /*
                         * System.IO.FileStream fs = new System.IO.FileStream("res.txt",
                         *                                                              System.IO.FileMode.Append);
                         * System.IO.StreamWriter tw = new System.IO.StreamWriter(fs);
                         * tw.WriteLine(newHead.ToString());
                         * tw.Close();
                         * fs.Close();
                         */
                    }
                    newHead.SetParent(null);
                    newHead.SetMistakes(null);
                    newHead.SetTransactions(null);
                }
            }
        }
Пример #13
0
        override public void Mine(int minSupport, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            Itemset    head = new Itemset();
            List <int> tail = new List <int>();

            for (int loop = 0; loop < _ds.GetColumnCount(); loop++)
            {
                tail.Add(loop);
            }

            List <int> newTail;
            Itemset    newHead;

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                newTail = new List <int>(tail);
                newTail.RemoveAt(loopTail);
                newHead = new Itemset();
                newHead.AddItem(tail[loopTail]);
                RecurseMining(newHead, newTail, minSupport, minLength, maxLength, maxMistakes, mineResult);
            }
        }
Пример #14
0
        void RecurseMining(ItemsetBasic head, IntList tail, int support, int minLength, int maxLength, MineResults mineResult)
        {
            if (head.Count >= maxLength)
            {
                return;
            }

            if (head.Count + tail.Count < minLength)
            {
                return;
            }

            List <FastSparseBitArray> bitArrays = new List <FastSparseBitArray>(tail.Count);

            for (int loopTail = 0; loopTail < tail.Count; loopTail++)
            {
                int i = tail[loopTail];

                if (head.Count == 0)
                {
                    IntList newTail = (IntList)tail.Clone(); // new IntList(tail);
                    newTail.RemoveAt(loopTail);
                    ItemsetBasic newHead = new ItemsetBasic(tail.Count);
                    newHead.AddItem(i);
                    RecurseMining(newHead, newTail, support, minLength, maxLength, mineResult);

                    IntListPool.Instance.Release(newTail);
                }
                else
                {
                    FastSparseBitArray bitArray = null;

                    if (_dualComp != null)
                    {
                        ISimpleItemset dualItemset = _dualComp.GetItemset(head.GetLastItem(), i);
                        if (dualItemset == null)
                        {
                            tail.RemoveAt(loopTail);
                            loopTail--;
                            continue;
                        }

                        bitArray = dualItemset.GetTransactions();
                    }
                    else
                    {
                        bitArray = FastSparseBitArrayPool.Instance.Allocate();
                        _ds.BuildBitVector(head.GetLastItem(), i, bitArray);
                    }

                    if (head.Count > 1)
                    {
                        bitArray = bitArray.And(head.GetTransactions());
                    }

                    bitArray.frequency = bitArray.CountElements();
                    if (bitArray.frequency >= support)
                    {
                        bitArrays.Add(bitArray);
                    }
                    else
                    {
                        // Don't release bit vectors from O2 matrix, or there is no
                        // O2 matrix
                        if ((head.Count > 1) || (_dualComp == null))
                        {
                            FastSparseBitArrayPool.Instance.Release(bitArray);
                        }

                        tail.RemoveAt(loopTail);
                        loopTail--;
                    }
                }
            }

            if (head.Count > 0)
            {
                for (int loopTail = 0; loopTail < tail.Count; loopTail++)
                {
                    int i = tail[loopTail];

                    IntList newTail = (IntList)tail.Clone(); // new IntList(tail);
                    newTail.RemoveAt(loopTail);

                    // Create 'head' restore point
                    FastSparseBitArray restoreBitArray = head.GetTransactions();
                    int restoreSupport = head.support;

                    head.AddItem(i);
                    FastSparseBitArray bitArray = bitArrays[loopTail];
                    head.SetTransactions(bitArray);
                    head.support = bitArray.frequency;
                    RecurseMining(head, newTail, support, minLength, maxLength, mineResult);

                    IntListPool.Instance.Release(newTail);

                    if (head.Count >= minLength)
                    {
                        mineResult.Add(head);
                    }

                    head.SetTransactions(null);
                    if (head.Count > 2)
                    {
                        FastSparseBitArrayPool.Instance.Release(bitArray);
                    }

                    // Restore 'head'
                    head.RemoveLastItem();
                    head.SetTransactions(restoreBitArray);
                    head.support = restoreSupport;
                }
            }
        }
Пример #15
0
        override public void Mine(int support, int minLength, int maxLength, int maxMistakes, MineResults mineResult)
        {
            IntListPool.Instance.Clear();

            ItemsetBasic head = new ItemsetBasic(_ds.GetColumnCount());
            IntList      tail = new IntList(_ds.GetColumnCount());

            for (int loop = 0; loop < _ds.GetColumnCount(); loop++)
            {
                tail.Add(loop);
            }

            RecurseMining(head, tail, support, minLength, maxLength, mineResult);
        }