예제 #1
0
        private async Task runBatchMiner(BenchmarkRequest request, IServerStreamWriter <BenchmarkReply> responseStream, ServerCallContext context)
        {
            Console.WriteLine("run batch miner " + request.UseTopK);

            await Task.Run(async() => {
                BatchMiner miner   = new BatchMiner();
                var batchStopwatch = new Stopwatch();
                batchStopwatch.Restart();
                long totalTimeElapsed = 0;
                //int numBlocks = (int)Math.Ceiling(request.DBSize/(double)request.SampleSize);

                await miner.ProcessCSVFile(
                    request.File,
                    request.Support,
                    request.K,
                    request.UseTopK,
                    request.SampleSize,
                    request.DBSize,
                    request.ErrorTolerance,
                    async delegate(List <Sequence> frequentSequencePatterns, double error, int iteration)
                {
                    batchStopwatch.Stop();
                    totalTimeElapsed += batchStopwatch.ElapsedMilliseconds;
                    var reply         = new BenchmarkReply
                    {
                        NrProcessedRecords = miner.NumTransactionsProcessed,
                        ReplyType          = ReplyType.Batch,
                        Iteration          = iteration,
                        Error = error,
                        BatchRuntimeInMillis                = batchStopwatch.ElapsedMilliseconds,
                        TotalRuntimeInMillis                = totalTimeElapsed,
                        PrevBlockFileReadingTime            = miner.PrevBlockFileReadingTime,
                        PrevBlockPreProcessingRuntime       = miner.Algorithm.PrevBlockPreProcessingRuntime,
                        PrevBlockPrefixSpanRuntime          = miner.Algorithm.PrevBlockPrefixSpanRuntime,
                        PrevBlockSubsequenceMatchingRuntime = miner.Algorithm.PrevBlockSubsequenceMatchingRuntime
                    };

                    var ser = JsonConvert.SerializeObject(frequentSequencePatterns, _jsonSettings);
                    reply.SequencesInJson = ser;
                    await responseStream.WriteAsync(reply);
                    batchStopwatch.Restart();
                },
                    long.MaxValue
                    );

                await responseStream.WriteAsync(new BenchmarkReply
                {
                    ReplyType            = ReplyType.Complete,
                    TotalRuntimeInMillis = totalTimeElapsed
                });
            });
        }
예제 #2
0
        private static async Task <long> processFile(string filepath, double minSupport, int k, bool mineTopK, int sampleSize, int dbSize, long killTime)
        {
            BatchMiner batchPatternMiner = new BatchMiner();

            batchStopwatch.Restart();
            totalTimeElapsed = 0;

            await batchPatternMiner.ProcessCSVFile(
                filepath,
                minSupport,
                k,
                mineTopK,
                sampleSize,
                dbSize,
                errorTolerance,
                async delegate(List <Sequence> frequentSequencePatterns, double error, int iteration)
            {
                onBatchResults(batchPatternMiner, frequentSequencePatterns, error, iteration);
            },
                killTime);

            batchStopwatch.Stop();

            // print out final result stats
            Console.WriteLine("********************************************************************************");
            Console.WriteLine("ProSecCo completed.");
            Console.WriteLine("Number of frequent sequences:     " + numSequences + " sequences");
            if (mineTopK)
            {
                Console.WriteLine("Top-k:                  " + k);
            }
            else
            {
                Console.WriteLine("Minimum support:        " + minSupport);
            }
            Console.WriteLine("Total runtime:          " + totalTimeElapsed + "ms");
            Console.WriteLine("Sum of runtimes:        " + sumComponentTimes + "ms");

            Console.WriteLine("********************************************************************************");

            return(totalTimeElapsed);
        }
예제 #3
0
    private static async Task <long> processCSVFileProgressive(string filepath, double minSupport, int k, bool mineTopK, int sampleSize, int numBlocks)
    {
        BatchMiner miner = new BatchMiner();

        batchStopwatch.Restart();
        totalTimeElapsed = 0;

        await miner.ProcessCSVFile(
            filepath,
            minSupport,
            k,
            mineTopK,
            sampleSize,
            numBlocks,
            ERROR_TOLERANCE,
            new FrequentSequencesBatchResult(onBatchResultsNoOutput),
            long.MaxValue
            );

        batchStopwatch.Stop();

        return(totalTimeElapsed);
    }
예제 #4
0
        private static async Task onBatchResults(
            BatchMiner batchMiner,
            List <Sequence> frequentSequences,
            double error,
            int iteration)
        {
            numSequences = frequentSequences.Count;
            batchStopwatch.Stop();


            totalTimeElapsed += batchStopwatch.ElapsedMilliseconds;

            // sort and print
            frequentSequences.Sort(Sequence.SequenceSorter);
            frequentSequences.ForEach(Console.WriteLine);

            sumComponentTimes += batchMiner.PrevBlockFileReadingTime;
            sumComponentTimes += batchMiner.Algorithm.PrevBlockPreProcessingRuntime;
            sumComponentTimes += batchMiner.Algorithm.PrevBlockPrefixSpanRuntime;
            sumComponentTimes += batchMiner.Algorithm.PrevBlockSubsequenceMatchingRuntime;

            Console.WriteLine("-----------------------------------------------------------");
            Console.WriteLine("Block " + iteration);
            Console.WriteLine("Number of frequent sequences:     " + frequentSequences.Count + " sequences");
            Console.WriteLine("Error:  " + error);
            Console.WriteLine("Processing time: " + batchStopwatch.ElapsedMilliseconds + "ms");
            Console.WriteLine("Runtime breakdown: ");
            Console.WriteLine(" -File reading:         " + batchMiner.PrevBlockFileReadingTime + "ms");
            Console.WriteLine(" -Pre-processing:       " + batchMiner.Algorithm.PrevBlockPreProcessingRuntime + "ms");
            Console.WriteLine(" -PrefixSpan:           " + batchMiner.Algorithm.PrevBlockPrefixSpanRuntime + "ms");
            Console.WriteLine(" -Subsequence matching: " + batchMiner.Algorithm.PrevBlockSubsequenceMatchingRuntime + "ms");
            Console.WriteLine("-----------------------------------------------------------\n");


            batchStopwatch.Restart();
        }
예제 #5
0
    // TESTS
    private static async Task testSupportDifference(List <Sequence> expectedResults, double minSupport, int k, bool mineTopK, int sampleSize, int datasetSize)
    {
        batchProcessingTimes = new List <double>();
        batchErrors          = new List <double>();

        Console.WriteLine("----------------------------------------------------");
        Console.WriteLine("MIN SUPPORT: " + minSupport);
        Console.WriteLine("----------------------------------------------------");

        BatchMiner      miner             = new BatchMiner();
        List <Sequence> results           = new List <Sequence>();
        List <double>   supportDiffMax    = new List <double>();
        List <double>   supportDiffMedian = new List <double>();
        List <double>   falseNegatives    = new List <double>();
        List <double>   falsePositives    = new List <double>();

        int minCount = 0;

        for (var i = 0; i < NUM_TRIALS; i++)
        {
            currentTrialNumber = i;
            currentBatchNumber = 0;

            batchStopwatch.Restart();

            // test difference between PrefixSpan and PrefixSpan supports
            miner = new BatchMiner();
            await miner.ProcessCSVFile(
                FILE_PATH,
                minSupport,
                k,
                mineTopK,
                sampleSize,
                (int)Math.Ceiling(datasetSize / (double)sampleSize),
                ERROR_TOLERANCE,
                async (List <Sequence> frequentSequences, double error, int iteration) => {
                await onBatchResultsNoOutput(frequentSequences, error, iteration);
                results = frequentSequences;

                minCount = (int)Math.Floor(datasetSize *minSupport);
                Tuple <double, double, int, int> supportDiffs = getSupportDifference(expectedResults, results, datasetSize, miner.NumTransactionsProcessed, minCount);

                if (i == 0)
                {
                    batchErrors.Add(error);
                    supportDiffMedian.Add(supportDiffs.Item1);
                    supportDiffMax.Add(supportDiffs.Item2);
                    falsePositives.Add(supportDiffs.Item3);
                    falseNegatives.Add(supportDiffs.Item4);
                }
                else
                {
                    batchErrors[currentBatchNumber]       += (error);
                    supportDiffMedian[currentBatchNumber] += (supportDiffs.Item1);
                    supportDiffMax[currentBatchNumber]    += (supportDiffs.Item2);
                    falsePositives[currentBatchNumber]    += (supportDiffs.Item3);
                    falseNegatives[currentBatchNumber]    += (supportDiffs.Item4);
                }

                currentBatchNumber++;
            },
                killAfter : long.MaxValue
                );
        }

        minCount = (int)Math.Floor(miner.NumTransactionsProcessed * minSupport);


        Console.WriteLine("Min support:               " + minSupport);

        Tuple <double, double, int, int> result = getSupportDifference(expectedResults, results, datasetSize, miner.NumTransactionsProcessed, minCount);

        Console.WriteLine("Median support diff:       " + result.Item1 / (double)miner.NumTransactionsProcessed);
        Console.WriteLine("Max support diff:          " + result.Item2 / (double)miner.NumTransactionsProcessed);
        Console.WriteLine("Total Results:             " + results.Count);
        Console.WriteLine("False positives:           " + result.Item3);
        Console.WriteLine("False negatives:           " + result.Item4);
        Console.WriteLine("Sample size:     " + sampleSize);
        Console.WriteLine("----------------------------------------------------");


        var divBy = (double)(NUM_TRIALS * STAT_SAMPLE_INTERVAL);

        Console.Write("PROCESSING TIMES: \n[");
        int    index      = 0;
        int    batch      = 0;
        double currentVal = 0.0;

        foreach (var time in batchProcessingTimes)
        {
            currentVal += time;

            if (batch == STAT_SAMPLE_INTERVAL)
            {
                Console.Write("(" + index + ", " + Math.Round(currentVal / divBy) + ")");
                currentVal = 0;
                index++;
                batch = 0;
            }

            batch++;
        }

        Console.WriteLine("]");

        Console.Write("ERRORS (median): \n[");
        index      = 0;
        batch      = 0;
        currentVal = 0.0;
        foreach (var median in supportDiffMedian)
        {
            currentVal += median;

            if (batch == STAT_SAMPLE_INTERVAL)
            {
                Console.Write("(" + index + ", " + currentVal / divBy + ")");
                index++;
                currentVal = 0;
                batch      = 0;
            }

            batch++;
        }
        Console.WriteLine("]\n");

        Console.Write("ERRORS (max): \n[");
        index      = 0;
        batch      = 0;
        currentVal = 0.0;
        foreach (var max in supportDiffMax)
        {
            currentVal += max;

            if (batch == STAT_SAMPLE_INTERVAL)
            {
                Console.Write("(" + index + ", " + currentVal / divBy + ")");
                index++;
                batch      = 0;
                currentVal = 0;
            }

            batch++;
        }
        Console.WriteLine("]\n\n");

        Console.Write("FALSE-POSITIVES: \n[");
        index      = 0;
        batch      = 0;
        currentVal = 0.0;
        foreach (var pos in falsePositives)
        {
            currentVal += pos;

            if (batch == STAT_SAMPLE_INTERVAL)
            {
                Console.Write("(" + index + ", " + currentVal / divBy + ")");
                index++;
                batch      = 0;
                currentVal = 0;
            }

            batch++;
        }
        Console.WriteLine("]\n\n");

        Console.Write("FALSE-NEGATIVES: \n[");
        index      = 0;
        batch      = 0;
        currentVal = 0.0;
        foreach (var neg in falseNegatives)
        {
            currentVal += neg;

            if (batch == STAT_SAMPLE_INTERVAL)
            {
                Console.Write("(" + index + ", " + currentVal / divBy + ")");
                index++;
                batch      = 0;
                currentVal = 0;
            }

            batch++;
        }
        Console.WriteLine("]\n\n");
    }