예제 #1
    private async Task processBatch(
        List <Sequence> batch,
        FrequentSequencesBatchResult resultDelegate,
        double minSupport,
        bool purge)
        PrevBlockFileReadingTime = _blockProcessingTimer.ElapsedMilliseconds;


        if (batch.Count > 0)

        Tuple <List <Sequence>, double> results = _algorithm.FrequentSequences(_minSupport);

        List <Sequence> sequences = results.Item1;

        if (_mineTopK && sequences.Count >= _k && purge)
            _minSupport = sequences[_k].Support / (double)NumTransactionsProcessed;

        if (purge)
            for (var i = sequences.Count - 1; i >= 0; i--)
                if (sequences[i].Support < Math.Floor(NumTransactionsProcessed * minSupport))

        await resultDelegate(sequences, results.Item2, CurrentIteration);

예제 #2
    public async Task ProcessCSVFile(
        string filepath,
        double minSup,
        int k,
        bool mineTopK,
        int sampleSize,
        int dbSize,
        double errorTolerance,
        FrequentSequencesBatchResult resultDelegate,
        long killAfter)
        Stopwatch killTimer = Stopwatch.StartNew();

        this._minSupport = minSup;
        this._k          = k;
        this._mineTopK   = mineTopK;

        _algorithm = new ProSecCo(minSup, mineTopK, errorTolerance, sampleSize, dbSize);

        // current batch to process
        List <Sequence> batch = new List <Sequence>();

        var didProcessSampleBlocks = false;

        // open the CSV file
        using (var reader = new StreamReader(File.OpenRead(filepath)))

            while (!reader.EndOfStream)
                // read the current line
                string line = reader.ReadLine();

                // split the line by comma (assuming CSV file)
                string [] transactions = line.Split(new string[] { " -1 " }, StringSplitOptions.None);

                List <Transaction> tr = new List <Transaction>(transactions.Length);
                for (int i = 0; i < transactions.Length - 1; i++)
                    string []   items = transactions[i].Split(' ');
                    Transaction trans = new Transaction(items);

                Sequence sequence = new Sequence(tr);

                if (!didProcessSampleBlocks)
                    // check if there are enough transactions
                    if (batch.Count % sampleSize == 0)
                        NumTransactionsProcessed += batch.Count;

                        // update the sample and get back
                        // the error for the sample
                        var sampleError = _algorithm.AddSequenceBatchToSampleWindow(batch);

                        if (!_didGetTopKSupport && _mineTopK)
                            _minSupport        = _algorithm.GetTopKMinSupport(_k);
                            _didGetTopKSupport = true;


                        Console.WriteLine("[DEBUG]: Min support=" + _minSupport + " Error=" + sampleError);

                        // make sure we have enough transaction so that the error > minsup
                        if (sampleError <= _minSupport * 0.75)
                            Console.WriteLine("[DEBUG]: Starting progressive algorithm.");
                            didProcessSampleBlocks = true;
                            await processBatch(batch, resultDelegate, _minSupport, false);
                else if (didProcessSampleBlocks && batch.Count > 0 && batch.Count % sampleSize == 0)
                    if (killAfter <= killTimer.ElapsedMilliseconds)
                        return; // kill the mining
                    NumTransactionsProcessed += batch.Count;

                    await processBatch(batch, resultDelegate, _minSupport, false);

        NumTransactionsProcessed += batch.Count;
        if (!didProcessSampleBlocks)

            // handle small data set case
            didProcessSampleBlocks = true;

            await processBatch(batch, resultDelegate, _minSupport, true);
            await processBatch(batch, resultDelegate, _minSupport, true);