private async Task runBatchMiner(BenchmarkRequest request, IServerStreamWriter <BenchmarkReply> responseStream, ServerCallContext context) { Console.WriteLine("run batch miner " + request.UseTopK); await Task.Run(async() => { BatchMiner miner = new BatchMiner(); var batchStopwatch = new Stopwatch(); batchStopwatch.Restart(); long totalTimeElapsed = 0; //int numBlocks = (int)Math.Ceiling(request.DBSize/(double)request.SampleSize); await miner.ProcessCSVFile( request.File, request.Support, request.K, request.UseTopK, request.SampleSize, request.DBSize, request.ErrorTolerance, async delegate(List <Sequence> frequentSequencePatterns, double error, int iteration) { batchStopwatch.Stop(); totalTimeElapsed += batchStopwatch.ElapsedMilliseconds; var reply = new BenchmarkReply { NrProcessedRecords = miner.NumTransactionsProcessed, ReplyType = ReplyType.Batch, Iteration = iteration, Error = error, BatchRuntimeInMillis = batchStopwatch.ElapsedMilliseconds, TotalRuntimeInMillis = totalTimeElapsed, PrevBlockFileReadingTime = miner.PrevBlockFileReadingTime, PrevBlockPreProcessingRuntime = miner.Algorithm.PrevBlockPreProcessingRuntime, PrevBlockPrefixSpanRuntime = miner.Algorithm.PrevBlockPrefixSpanRuntime, PrevBlockSubsequenceMatchingRuntime = miner.Algorithm.PrevBlockSubsequenceMatchingRuntime }; var ser = JsonConvert.SerializeObject(frequentSequencePatterns, _jsonSettings); reply.SequencesInJson = ser; await responseStream.WriteAsync(reply); batchStopwatch.Restart(); }, long.MaxValue ); await responseStream.WriteAsync(new BenchmarkReply { ReplyType = ReplyType.Complete, TotalRuntimeInMillis = totalTimeElapsed }); }); }
private static async Task <long> processFile(string filepath, double minSupport, int k, bool mineTopK, int sampleSize, int dbSize, long killTime) { BatchMiner batchPatternMiner = new BatchMiner(); batchStopwatch.Restart(); totalTimeElapsed = 0; await batchPatternMiner.ProcessCSVFile( filepath, minSupport, k, mineTopK, sampleSize, dbSize, errorTolerance, async delegate(List <Sequence> frequentSequencePatterns, double error, int iteration) { onBatchResults(batchPatternMiner, frequentSequencePatterns, error, iteration); }, killTime); batchStopwatch.Stop(); // print out final result stats Console.WriteLine("********************************************************************************"); Console.WriteLine("ProSecCo completed."); Console.WriteLine("Number of frequent sequences: " + numSequences + " sequences"); if (mineTopK) { Console.WriteLine("Top-k: " + k); } else { Console.WriteLine("Minimum support: " + minSupport); } Console.WriteLine("Total runtime: " + totalTimeElapsed + "ms"); Console.WriteLine("Sum of runtimes: " + sumComponentTimes + "ms"); Console.WriteLine("********************************************************************************"); return(totalTimeElapsed); }
private static async Task <long> processCSVFileProgressive(string filepath, double minSupport, int k, bool mineTopK, int sampleSize, int numBlocks) { BatchMiner miner = new BatchMiner(); batchStopwatch.Restart(); totalTimeElapsed = 0; await miner.ProcessCSVFile( filepath, minSupport, k, mineTopK, sampleSize, numBlocks, ERROR_TOLERANCE, new FrequentSequencesBatchResult(onBatchResultsNoOutput), long.MaxValue ); batchStopwatch.Stop(); return(totalTimeElapsed); }
// TESTS private static async Task testSupportDifference(List <Sequence> expectedResults, double minSupport, int k, bool mineTopK, int sampleSize, int datasetSize) { batchProcessingTimes = new List <double>(); batchErrors = new List <double>(); Console.WriteLine("----------------------------------------------------"); Console.WriteLine("MIN SUPPORT: " + minSupport); Console.WriteLine("----------------------------------------------------"); BatchMiner miner = new BatchMiner(); List <Sequence> results = new List <Sequence>(); List <double> supportDiffMax = new List <double>(); List <double> supportDiffMedian = new List <double>(); List <double> falseNegatives = new List <double>(); List <double> falsePositives = new List <double>(); int minCount = 0; for (var i = 0; i < NUM_TRIALS; i++) { currentTrialNumber = i; currentBatchNumber = 0; batchStopwatch.Restart(); // test difference between PrefixSpan and PrefixSpan supports miner = new BatchMiner(); await miner.ProcessCSVFile( FILE_PATH, minSupport, k, mineTopK, sampleSize, (int)Math.Ceiling(datasetSize / (double)sampleSize), ERROR_TOLERANCE, async (List <Sequence> frequentSequences, double error, int iteration) => { await onBatchResultsNoOutput(frequentSequences, error, iteration); results = frequentSequences; minCount = (int)Math.Floor(datasetSize *minSupport); Tuple <double, double, int, int> supportDiffs = getSupportDifference(expectedResults, results, datasetSize, miner.NumTransactionsProcessed, minCount); if (i == 0) { batchErrors.Add(error); supportDiffMedian.Add(supportDiffs.Item1); supportDiffMax.Add(supportDiffs.Item2); falsePositives.Add(supportDiffs.Item3); falseNegatives.Add(supportDiffs.Item4); } else { batchErrors[currentBatchNumber] += (error); supportDiffMedian[currentBatchNumber] += (supportDiffs.Item1); supportDiffMax[currentBatchNumber] += (supportDiffs.Item2); falsePositives[currentBatchNumber] += (supportDiffs.Item3); falseNegatives[currentBatchNumber] += (supportDiffs.Item4); } currentBatchNumber++; }, killAfter : long.MaxValue ); } minCount = (int)Math.Floor(miner.NumTransactionsProcessed * minSupport); Console.WriteLine("Min support: " + minSupport); Tuple <double, double, int, int> result = getSupportDifference(expectedResults, results, datasetSize, miner.NumTransactionsProcessed, minCount); Console.WriteLine("Median support diff: " + result.Item1 / (double)miner.NumTransactionsProcessed); Console.WriteLine("Max support diff: " + result.Item2 / (double)miner.NumTransactionsProcessed); Console.WriteLine("Total Results: " + results.Count); Console.WriteLine("False positives: " + result.Item3); Console.WriteLine("False negatives: " + result.Item4); Console.WriteLine("Sample size: " + sampleSize); Console.WriteLine("----------------------------------------------------"); var divBy = (double)(NUM_TRIALS * STAT_SAMPLE_INTERVAL); Console.Write("PROCESSING TIMES: \n["); int index = 0; int batch = 0; double currentVal = 0.0; foreach (var time in batchProcessingTimes) { currentVal += time; if (batch == STAT_SAMPLE_INTERVAL) { Console.Write("(" + index + ", " + Math.Round(currentVal / divBy) + ")"); currentVal = 0; index++; batch = 0; } batch++; } Console.WriteLine("]"); Console.Write("ERRORS (median): \n["); index = 0; batch = 0; currentVal = 0.0; foreach (var median in supportDiffMedian) { currentVal += median; if (batch == STAT_SAMPLE_INTERVAL) { Console.Write("(" + index + ", " + currentVal / divBy + ")"); index++; currentVal = 0; batch = 0; } batch++; } Console.WriteLine("]\n"); Console.Write("ERRORS (max): \n["); index = 0; batch = 0; currentVal = 0.0; foreach (var max in supportDiffMax) { currentVal += max; if (batch == STAT_SAMPLE_INTERVAL) { Console.Write("(" + index + ", " + currentVal / divBy + ")"); index++; batch = 0; currentVal = 0; } batch++; } Console.WriteLine("]\n\n"); Console.Write("FALSE-POSITIVES: \n["); index = 0; batch = 0; currentVal = 0.0; foreach (var pos in falsePositives) { currentVal += pos; if (batch == STAT_SAMPLE_INTERVAL) { Console.Write("(" + index + ", " + currentVal / divBy + ")"); index++; batch = 0; currentVal = 0; } batch++; } Console.WriteLine("]\n\n"); Console.Write("FALSE-NEGATIVES: \n["); index = 0; batch = 0; currentVal = 0.0; foreach (var neg in falseNegatives) { currentVal += neg; if (batch == STAT_SAMPLE_INTERVAL) { Console.Write("(" + index + ", " + currentVal / divBy + ")"); index++; batch = 0; currentVal = 0; } batch++; } Console.WriteLine("]\n\n"); }