internal string MeasurePerformance(int iTrial, int iPreferenceIndex, ArrayList listPreferences, ArrayList preferences, SQLCommon parser, Stopwatch sw, List<long> reportDimensions, List<long> reportSkylineSize, List<long> reportTimeTotal, List<long> reportTimeAlgorithm, List<double> reportMinCorrelation, List<double> reportMaxCorrelation, double minCorrelation, double maxCorrelation, List<double> reportCardinality, double cardinality, string strSQL, string strPreferenceSet, string strTrial, List<long> reportNumberOfMoves, long numberOfMoves, List<long> reportNumberOfComparisons, long numberOfComparisons) { Dictionary<ClusterAnalysis, List<List<double>>> clusterAnalysis; Dictionary<ClusterAnalysis, List<List<double>>> clusterAnalysisMedian; Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisTopBuckets; Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisMedianTopBuckets; List<IEnumerable<CLRSafeHashSet<int>>> producedSubsets = ProduceSubsets(preferences); InitClusterAnalysisDataStructures(out clusterAnalysis); InitClusterAnalysisDataStructures(out clusterAnalysisMedian); InitClusterAnalysisTopBucketsDataStructures( out clusterAnalysisTopBuckets); InitClusterAnalysisTopBucketsDataStructures( out clusterAnalysisMedianTopBuckets); var entireSkylineDataTable = new DataTable(); if (ExcessiveTests) { entireSkylineDataTable = parser.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, strSQL); } else { entireSkylineDataTable = parser.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, strSQL+ " SAMPLE BY RANDOM_SUBSETS COUNT " + SubsetsCount + " DIMENSION " + SubsetDimension); } List<long[]> entireDataTableSkylineValues = parser.SkylineType.Strategy.SkylineValues; int[] skylineAttributeColumns = SkylineSamplingHelper.GetSkylineAttributeColumns(entireSkylineDataTable); IReadOnlyDictionary<long, object[]> entireSkylineDatabase = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( entireSkylineDataTable, 0); IReadOnlyDictionary<long, object[]> entireSkylineNormalized = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( entireSkylineDataTable, 0); SkylineSamplingHelper.NormalizeColumns(entireSkylineNormalized, skylineAttributeColumns); DataTable entireDataTable; IReadOnlyDictionary<long, object[]> entireDatabaseNormalized = GetEntireDatabaseNormalized(parser, strSQL, skylineAttributeColumns, out entireDataTable); IReadOnlyDictionary<long, object[]> entireDatabase = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( entireDataTable, 0); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireDatabaseBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(entireDatabaseNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireDatabaseBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireDatabaseBuckets); foreach ( KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> s in entireDatabaseBuckets.OrderByDescending(l => l.Value.Count) .ThenBy(l => l.Key).Take(5)) { double percent = (double) s.Value.Count / entireDatabaseNormalized.Count; clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb].Add(s.Key, new List<double>()); for (var i = 0; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb][s.Key] .Add(percent); } } IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireSkylineBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(entireSkylineNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireSkylineBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireSkylineBuckets); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.EntireSkyline, entireSkylineBuckets, entireSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); foreach ( KeyValuePair<BigInteger, List<double>> bucket in clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline]) { double percent = clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline][ bucket.Key][0]; for (var i = 1; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline][ bucket.Key].Add(percent); } } var clusterAnalysisForMedian = new prefSQL.Evaluation.ClusterAnalysis(entireDatabaseNormalized, skylineAttributeColumns); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireDatabaseMedianBuckets = clusterAnalysisForMedian.GetBuckets(entireDatabaseNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireDatabaseMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireDatabaseMedianBuckets); foreach ( KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> s in entireDatabaseMedianBuckets.OrderByDescending(l => l.Value.Count) .ThenBy(l => l.Key).Take(5)) { double percent = (double) s.Value.Count / entireDatabaseNormalized.Count; clusterAnalysisMedianTopBuckets[ClusterAnalysis.EntireDb].Add( s.Key, new List<double>()); for (var i = 0; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisMedianTopBuckets[ClusterAnalysis.EntireDb][ s.Key] .Add(percent); } } IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireSkylineMedianBuckets = clusterAnalysisForMedian.GetBuckets(entireSkylineNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireSkylineMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireSkylineMedianBuckets); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.EntireSkyline, entireSkylineMedianBuckets, entireSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); foreach ( KeyValuePair<BigInteger, List<double>> bucket in clusterAnalysisMedianTopBuckets[ ClusterAnalysis.EntireSkyline]) { double percent = clusterAnalysisMedianTopBuckets[ ClusterAnalysis.EntireSkyline][bucket.Key][0]; for (var i = 1; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisMedianTopBuckets[ ClusterAnalysis.EntireSkyline][bucket.Key].Add(percent); } } strSQL += " SAMPLE BY RANDOM_SUBSETS COUNT " + SubsetsCount + " DIMENSION " + SubsetDimension; string strQuery; string operators; int numberOfRecords; string[] parameter; PrefSQLModel prefSqlModel = parser.GetPrefSqlModelFromPreferenceSql(strSQL); string ansiSql = parser.GetAnsiSqlFromPrefSqlModel(prefSqlModel); prefSQL.SQLParser.Helper.DetermineParameters(ansiSql, out parameter, out strQuery, out operators, out numberOfRecords); var subsetObjects = new List<long>(); var subsetTime = new List<long>(); var subsetTimeElapsed = new List<long>(); var setCoverageSecondRandom = new List<double>(); var setCoverageSample = new List<double>(); var setCoverageBestRank = new List<double>(); var setCoverageSumRank = new List<double>(); var representationErrorSecondRandom = new List<double>(); var representationErrorSample = new List<double>(); var representationErrorBestRank = new List<double>(); var representationErrorSumRank = new List<double>(); var representationErrorSumSecondRandom = new List<double>(); var representationErrorSumSample = new List<double>(); var representationErrorSumBestRank = new List<double>(); var representationErrorSumSumRank = new List<double>(); var dominatedObjectsCountSecondRandom = new List<double>(); var dominatedObjectsCountSample = new List<double>(); var dominatedObjectsCountBestRank = new List<double>(); var dominatedObjectsCountSumRank = new List<double>(); var dominatedObjectsOfBestObjectSecondRandom = new List<double>(); var dominatedObjectsOfBestObjectSample = new List<double>(); var dominatedObjectsOfBestObjectBestRank = new List<double>(); var dominatedObjectsOfBestObjectSumRank = new List<double>(); var subsetCount = 1; foreach (IEnumerable<CLRSafeHashSet<int>> subset in producedSubsets) { Console.WriteLine(strPreferenceSet + " (" + subsetCount + " / " + producedSubsets.Count + ")"); sw.Restart(); var subsetsProducer = new FixedSkylineSamplingSubsetsProducer(subset); var utility = new SkylineSamplingUtility(subsetsProducer); var skylineSample = new SkylineSampling(utility) { SubsetCount = prefSqlModel.SkylineSampleCount, SubsetDimension = prefSqlModel.SkylineSampleDimension, SelectedStrategy = parser.SkylineType }; DataTable sampleSkylineDataTable = skylineSample.GetSkylineTable(strQuery, operators); sw.Stop(); subsetObjects.Add(sampleSkylineDataTable.Rows.Count); subsetTime.Add(skylineSample.TimeMilliseconds); subsetTimeElapsed.Add(sw.ElapsedMilliseconds); IReadOnlyDictionary<long, object[]> sampleSkylineDatabase = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( sampleSkylineDataTable, 0); IReadOnlyDictionary<long, object[]> sampleSkylineNormalized = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( sampleSkylineDataTable, 0); SkylineSamplingHelper.NormalizeColumns(sampleSkylineNormalized, skylineAttributeColumns); IReadOnlyDictionary<long, object[]> secondRandomSampleDatabase = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> secondRandomSampleNormalized = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableBestRankDatabase = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableSumRankDatabase = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableBestRankNormalized = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableSumRankNormalized = new Dictionary<long, object[]>(); if (ExcessiveTests) { secondRandomSampleDatabase = SkylineSamplingHelper.GetRandomSample(entireSkylineDatabase, sampleSkylineDataTable.Rows.Count); var secondRandomSampleNormalizedToBeCreated = new Dictionary<long, object[]>(); foreach (KeyValuePair<long, object[]> k in secondRandomSampleDatabase) { var newValue = new object[k.Value.Length]; k.Value.CopyTo(newValue, 0); secondRandomSampleNormalizedToBeCreated.Add(k.Key, newValue); } secondRandomSampleNormalized = new ReadOnlyDictionary<long, object[]>( secondRandomSampleNormalizedToBeCreated); SkylineSamplingHelper.NormalizeColumns(secondRandomSampleNormalized, skylineAttributeColumns); entireSkylineDataTableBestRankNormalized = GetEntireSkylineDataTableRankNormalized(entireSkylineDataTable.Copy(), entireDataTableSkylineValues, skylineAttributeColumns, sampleSkylineDataTable.Rows.Count, 1, out entireSkylineDataTableBestRankDatabase); entireSkylineDataTableSumRankNormalized = GetEntireSkylineDataTableRankNormalized(entireSkylineDataTable.Copy(), entireDataTableSkylineValues, skylineAttributeColumns, sampleSkylineDataTable.Rows.Count, 2, out entireSkylineDataTableSumRankDatabase); IReadOnlyDictionary<long, object[]> baseRandomSampleNormalized = SkylineSamplingHelper.GetRandomSample(entireSkylineNormalized, sampleSkylineDataTable.Rows.Count); double setCoverageCoveredBySecondRandomSample = SetCoverage.GetCoverage( baseRandomSampleNormalized, secondRandomSampleNormalized, skylineAttributeColumns) * 100.0; double setCoverageCoveredBySkylineSample = SetCoverage.GetCoverage( baseRandomSampleNormalized, sampleSkylineNormalized, skylineAttributeColumns) * 100.0; double setCoverageCoveredByEntireBestRank = SetCoverage.GetCoverage( baseRandomSampleNormalized, entireSkylineDataTableBestRankNormalized, skylineAttributeColumns) * 100.0; double setCoverageCoveredByEntireSumRank = SetCoverage.GetCoverage(baseRandomSampleNormalized, entireSkylineDataTableSumRankNormalized, skylineAttributeColumns) * 100.0; setCoverageSecondRandom.Add(setCoverageCoveredBySecondRandomSample); setCoverageSample.Add(setCoverageCoveredBySkylineSample); setCoverageBestRank.Add(setCoverageCoveredByEntireBestRank); setCoverageSumRank.Add(setCoverageCoveredByEntireSumRank); Dictionary<long, double>.ValueCollection baseRepresentationErrorSecondRandomSample = SetCoverage .GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, secondRandomSampleNormalized), secondRandomSampleNormalized, skylineAttributeColumns); Dictionary<long, double>.ValueCollection baseRepresentationErrorSkylineSample = SetCoverage.GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, sampleSkylineNormalized), sampleSkylineNormalized, skylineAttributeColumns); Dictionary<long, double>.ValueCollection baseRepresentationErrorEntireBestRank = SetCoverage.GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, entireSkylineDataTableBestRankNormalized), entireSkylineDataTableBestRankNormalized, skylineAttributeColumns); Dictionary<long, double>.ValueCollection baseRepresentationErrorEntireSumRank = SetCoverage.GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, entireSkylineDataTableSumRankNormalized), entireSkylineDataTableSumRankNormalized, skylineAttributeColumns); representationErrorSecondRandom.Add(baseRepresentationErrorSecondRandomSample.Max()*100.0); representationErrorSample.Add(baseRepresentationErrorSkylineSample.Max() * 100.0); representationErrorBestRank.Add(baseRepresentationErrorEntireBestRank.Max() * 100.0); representationErrorSumRank.Add(baseRepresentationErrorEntireSumRank.Max() * 100.0); representationErrorSumSecondRandom.Add(baseRepresentationErrorSecondRandomSample.Sum() * 100.0); representationErrorSumSample.Add(baseRepresentationErrorSkylineSample.Sum() * 100.0); representationErrorSumBestRank.Add(baseRepresentationErrorEntireBestRank.Sum() * 100.0); representationErrorSumSumRank.Add(baseRepresentationErrorEntireSumRank.Sum() * 100.0); var dominatedObjectsCountRandomSample = new DominatedObjects(entireDatabase, secondRandomSampleDatabase, skylineAttributeColumns); var dominatedObjectsCountSampleSkyline = new DominatedObjects(entireDatabase, sampleSkylineDatabase, skylineAttributeColumns); var dominatedObjectsCountEntireSkylineBestRank = new DominatedObjects(entireDatabase, entireSkylineDataTableBestRankDatabase, skylineAttributeColumns); var dominatedObjectsCountEntireSkylineSumRank = new DominatedObjects(entireDatabase, entireSkylineDataTableSumRankDatabase, skylineAttributeColumns); dominatedObjectsCountSecondRandom.Add( dominatedObjectsCountRandomSample.NumberOfDistinctDominatedObjects); dominatedObjectsCountSample.Add( dominatedObjectsCountSampleSkyline.NumberOfDistinctDominatedObjects); dominatedObjectsCountBestRank.Add( dominatedObjectsCountEntireSkylineBestRank.NumberOfDistinctDominatedObjects); dominatedObjectsCountSumRank.Add( dominatedObjectsCountEntireSkylineSumRank.NumberOfDistinctDominatedObjects); dominatedObjectsOfBestObjectSecondRandom.Add( dominatedObjectsCountRandomSample .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); dominatedObjectsOfBestObjectSample.Add( dominatedObjectsCountSampleSkyline .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); dominatedObjectsOfBestObjectBestRank.Add( dominatedObjectsCountEntireSkylineBestRank .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); dominatedObjectsOfBestObjectSumRank.Add( dominatedObjectsCountEntireSkylineSumRank .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sampleBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(sampleSkylineNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSampleBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(sampleBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> randomBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(secondRandomSampleNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedRandomBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(randomBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> bestRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets( entireSkylineDataTableBestRankNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedBestRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( bestRankBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sumRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets( entireSkylineDataTableSumRankNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSumRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( sumRankBuckets); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.SampleSkyline, sampleBuckets, sampleSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.RandomSkyline, randomBuckets, secondRandomSampleNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.BestRank, bestRankBuckets, entireSkylineDataTableBestRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.SumRank, sumRankBuckets, entireSkylineDataTableSumRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sampleMedianBuckets = clusterAnalysisForMedian.GetBuckets(sampleSkylineNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSampleMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(sampleMedianBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> randomMedianBuckets = clusterAnalysisForMedian.GetBuckets(secondRandomSampleNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedRandomMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(randomMedianBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> bestRankMedianBuckets = clusterAnalysisForMedian.GetBuckets( entireSkylineDataTableBestRankNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedBestRankMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( bestRankMedianBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sumRankMedianBuckets = clusterAnalysisForMedian.GetBuckets( entireSkylineDataTableSumRankNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSumRankMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( sumRankMedianBuckets); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.SampleSkyline, sampleMedianBuckets, sampleSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.RandomSkyline, randomMedianBuckets, secondRandomSampleNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.BestRank, bestRankMedianBuckets, entireSkylineDataTableBestRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.SumRank, sumRankMedianBuckets, entireSkylineDataTableSumRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); var caEntireDbNew = new List<double>(); var caEntireSkylineNew = new List<double>(); var caSampleSkylineNew = new List<double>(); var caRandomSkylineNew = new List<double>(); var caBestRankNew = new List<double>(); var caSumRankNew = new List<double>(); for (var ii = 0; ii < skylineAttributeColumns.Length; ii++) { int entireSkyline = aggregatedEntireSkylineBuckets.ContainsKey(ii) ? aggregatedEntireSkylineBuckets[ii].Count : 0; int sampleSkyline = aggregatedSampleBuckets.ContainsKey(ii) ? aggregatedSampleBuckets[ii].Count : 0; int randomSkyline = aggregatedRandomBuckets.ContainsKey(ii) ? aggregatedRandomBuckets[ii].Count : 0; double entireSkylinePercent = (double)entireSkyline / entireSkylineNormalized.Count; double sampleSkylinePercent = (double) sampleSkyline / sampleSkylineNormalized.Count; double randomSkylinePercent = (double)randomSkyline / secondRandomSampleNormalized.Count; int entireDb = aggregatedEntireDatabaseBuckets.ContainsKey(ii) ? aggregatedEntireDatabaseBuckets[ii].Count : 0; double entireDbPercent = (double) entireDb / entireDatabaseNormalized.Count; int bestRank = aggregatedBestRankBuckets.ContainsKey(ii) ? aggregatedBestRankBuckets[ii].Count : 0; int sumRank = aggregatedSumRankBuckets.ContainsKey(ii) ? aggregatedSumRankBuckets[ii].Count : 0; double bestRankPercent = (double) bestRank / entireSkylineDataTableBestRankNormalized.Count; double sumRankPercent = (double) sumRank / entireSkylineDataTableSumRankNormalized.Count; caEntireDbNew.Add(entireDbPercent); caEntireSkylineNew.Add(entireSkylinePercent); caSampleSkylineNew.Add(sampleSkylinePercent); caRandomSkylineNew.Add(randomSkylinePercent); caBestRankNew.Add(bestRankPercent); caSumRankNew.Add(sumRankPercent); } var caMedianEntireDbNew = new List<double>(); var caMedianEntireSkylineNew = new List<double>(); var caMedianSampleSkylineNew = new List<double>(); var caMedianRandomSkylineNew = new List<double>(); var caMedianBestRankNew = new List<double>(); var caMedianSumRankNew = new List<double>(); for (var ii = 0; ii < skylineAttributeColumns.Length; ii++) { int entireSkyline = aggregatedEntireSkylineMedianBuckets.ContainsKey(ii) ? aggregatedEntireSkylineMedianBuckets[ii].Count : 0; int sampleSkyline = aggregatedSampleMedianBuckets.ContainsKey(ii) ? aggregatedSampleMedianBuckets[ii].Count : 0; int randomSkyline = aggregatedRandomMedianBuckets.ContainsKey(ii) ? aggregatedRandomMedianBuckets[ii].Count : 0; double entireSkylinePercent = (double)entireSkyline / entireSkylineNormalized.Count; double sampleSkylinePercent = (double) sampleSkyline / sampleSkylineNormalized.Count; double randomSkylinePercent = (double)randomSkyline / secondRandomSampleNormalized.Count; int entireDb = aggregatedEntireDatabaseMedianBuckets.ContainsKey(ii) ? aggregatedEntireDatabaseMedianBuckets[ii].Count : 0; double entireDbPercent = (double) entireDb / entireDatabaseNormalized.Count; int bestRank = aggregatedBestRankMedianBuckets.ContainsKey(ii) ? aggregatedBestRankMedianBuckets[ii].Count : 0; int sumRank = aggregatedSumRankMedianBuckets.ContainsKey(ii) ? aggregatedSumRankMedianBuckets[ii].Count : 0; double bestRankPercent = (double) bestRank / entireSkylineDataTableBestRankNormalized.Count; double sumRankPercent = (double) sumRank / entireSkylineDataTableSumRankNormalized.Count; caMedianEntireDbNew.Add(entireDbPercent); caMedianEntireSkylineNew.Add(entireSkylinePercent); caMedianSampleSkylineNew.Add(sampleSkylinePercent); caMedianRandomSkylineNew.Add(randomSkylinePercent); caMedianBestRankNew.Add(bestRankPercent); caMedianSumRankNew.Add(sumRankPercent); } clusterAnalysis[ClusterAnalysis.EntireDb].Add(caEntireDbNew); clusterAnalysis[ClusterAnalysis.EntireSkyline].Add( caEntireSkylineNew); clusterAnalysis[ClusterAnalysis.SampleSkyline].Add( caSampleSkylineNew); clusterAnalysis[ClusterAnalysis.RandomSkyline].Add( caRandomSkylineNew); clusterAnalysis[ClusterAnalysis.BestRank].Add( caBestRankNew); clusterAnalysis[ClusterAnalysis.SumRank].Add( caSumRankNew); clusterAnalysisMedian[ClusterAnalysis.EntireDb].Add( caMedianEntireDbNew); clusterAnalysisMedian[ClusterAnalysis.EntireSkyline].Add( caMedianEntireSkylineNew); clusterAnalysisMedian[ClusterAnalysis.SampleSkyline].Add( caMedianSampleSkylineNew); clusterAnalysisMedian[ClusterAnalysis.RandomSkyline].Add( caMedianRandomSkylineNew); clusterAnalysisMedian[ClusterAnalysis.BestRank].Add( caMedianBestRankNew); clusterAnalysisMedian[ClusterAnalysis.SumRank].Add( caMedianSumRankNew); } subsetCount++; } Dictionary<ClusterAnalysis, string> clusterAnalysisStrings = GetClusterAnalysisStrings(skylineAttributeColumns, clusterAnalysis); Dictionary<ClusterAnalysis, string> clusterAnalysisMedianStrings = GetClusterAnalysisStrings(skylineAttributeColumns, clusterAnalysisMedian); Dictionary<ClusterAnalysis, string> clusterAnalysisTopBucketsStrings = GetClusterAnalysisTopBucketsStrings(clusterAnalysisTopBuckets, ExcessiveTests); Dictionary<ClusterAnalysis, string> clusterAnalysisMedianTopBucketsStrings = GetClusterAnalysisTopBucketsStrings(clusterAnalysisMedianTopBuckets, ExcessiveTests); var time = (long) (subsetTime.Average() + .5); var objects = (long) (subsetObjects.Average() + .5); var elapsed = (long) (subsetTimeElapsed.Average() + .5); Console.WriteLine("subsetTime"); foreach (var i in subsetTime) { Console.WriteLine(i); } Console.WriteLine(""); Console.WriteLine("subsetObjects"); foreach (var i in subsetObjects) { Console.WriteLine(i); } Console.WriteLine(""); reportDimensions.Add(preferences.Count); reportSkylineSize.Add(objects); reportTimeTotal.Add(elapsed); reportTimeAlgorithm.Add(time); reportMinCorrelation.Add(minCorrelation); reportMaxCorrelation.Add(maxCorrelation); reportCardinality.Add(cardinality); var setCoverageSingle = new Dictionary<SkylineTypesSingle, List<double>> { {SkylineTypesSingle.Random, setCoverageSecondRandom}, {SkylineTypesSingle.Sample, setCoverageSample}, {SkylineTypesSingle.BestRank, setCoverageBestRank}, {SkylineTypesSingle.SumRank, setCoverageSumRank} }; var representationErrorSingle = new Dictionary<SkylineTypesSingle, List<double>> { {SkylineTypesSingle.Random, representationErrorSecondRandom}, {SkylineTypesSingle.Sample, representationErrorSample}, {SkylineTypesSingle.BestRank, representationErrorBestRank}, {SkylineTypesSingle.SumRank, representationErrorSumRank} }; var representationErrorSumSingle = new Dictionary<SkylineTypesSingle, List<double>> { {SkylineTypesSingle.Random, representationErrorSumSecondRandom}, {SkylineTypesSingle.Sample, representationErrorSumSample}, {SkylineTypesSingle.BestRank, representationErrorSumBestRank}, {SkylineTypesSingle.SumRank, representationErrorSumSumRank} }; var dominatedObjectsCountSingle = new Dictionary<SkylineTypesSingle, List<double>>() { { SkylineTypesSingle.Random, dominatedObjectsCountSecondRandom }, { SkylineTypesSingle.Sample, dominatedObjectsCountSample }, { SkylineTypesSingle.BestRank, dominatedObjectsCountBestRank }, { SkylineTypesSingle.SumRank, dominatedObjectsCountSumRank } }; var dominatedObjectsOfBestObjectSingle = new Dictionary<SkylineTypesSingle, List<double>> { { SkylineTypesSingle.Random, dominatedObjectsOfBestObjectSecondRandom }, { SkylineTypesSingle.Sample, dominatedObjectsOfBestObjectSample }, { SkylineTypesSingle.BestRank, dominatedObjectsOfBestObjectBestRank }, { SkylineTypesSingle.SumRank, dominatedObjectsOfBestObjectSumRank } }; AddToReports(_reportsLong, subsetObjects, subsetTime, _reportsDouble); if (ExcessiveTests) { AddToSetCoverage(_setCoverage, setCoverageSingle); AddToSetCoverage(_representationError, representationErrorSingle); AddToSetCoverage(_representationErrorSum, representationErrorSumSingle); AddToSetCoverage(_dominatedObjectsCount, dominatedObjectsCountSingle); AddToSetCoverage(_dominatedObjectsOfBestObject, dominatedObjectsOfBestObjectSingle); } string strLine = FormatLineString(strPreferenceSet, strTrial, preferences.Count, objects, elapsed, time, subsetTime.Min(), subsetTime.Max(), MyMathematic.GetSampleVariance(subsetTime), MyMathematic.GetSampleStdDeviation(subsetTime), Mathematic.Median(subsetTime), Mathematic.LowerQuartile(subsetTime), Mathematic.UpperQuartile(subsetTime), subsetObjects.Min(), subsetObjects.Max(), MyMathematic.GetSampleVariance(subsetObjects), MyMathematic.GetSampleStdDeviation(subsetObjects), Mathematic.Median(subsetObjects), Mathematic.LowerQuartile(subsetObjects), Mathematic.UpperQuartile(subsetObjects), setCoverageSingle, representationErrorSingle, representationErrorSumSingle, dominatedObjectsCountSingle, dominatedObjectsOfBestObjectSingle, clusterAnalysisStrings, clusterAnalysisMedianStrings, clusterAnalysisTopBucketsStrings, clusterAnalysisMedianTopBucketsStrings, minCorrelation, maxCorrelation, cardinality, numberOfMoves, numberOfComparisons); return strLine; }
private static void FillTopBuckets( Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisTopBuckets, ClusterAnalysis sampleSkylineType, IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> skylineTypeBuckets, int skylineCount, int entireDbCount, int entireSkylineCount) { List<KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>>> sortedTop5 = skylineTypeBuckets.OrderByDescending(l => l.Value.Count).ThenBy(l => l.Key).Take(5).ToList(); // track top 5 buckets foreach (KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> skylineTypeBucket in sortedTop5 ) { if (!clusterAnalysisTopBuckets[sampleSkylineType].ContainsKey(skylineTypeBucket.Key)) { clusterAnalysisTopBuckets[sampleSkylineType].Add(skylineTypeBucket.Key, new List<double>()); } double percent = (double) skylineTypeBucket.Value.Count / skylineCount; clusterAnalysisTopBuckets[sampleSkylineType][skylineTypeBucket.Key].Add(percent); } foreach ( KeyValuePair<BigInteger, List<double>> entireDbBucket in clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb]) // additionally track top 5 buckets of entire db { if (!clusterAnalysisTopBuckets[sampleSkylineType].ContainsKey(entireDbBucket.Key)) { clusterAnalysisTopBuckets[sampleSkylineType].Add(entireDbBucket.Key, new List<double>()); } if (!skylineTypeBuckets.ContainsKey(entireDbBucket.Key)) // not contained => percentage = 0 { clusterAnalysisTopBuckets[sampleSkylineType][entireDbBucket.Key].Add(0); } else if (sortedTop5.All(item => item.Key != entireDbBucket.Key)) // else: already added in previous foreach => no need to add again { double percent = (double) skylineTypeBuckets[entireDbBucket.Key].Count / entireDbCount; clusterAnalysisTopBuckets[sampleSkylineType][entireDbBucket.Key].Add(percent); } } foreach ( KeyValuePair<BigInteger, List<double>> entireSkylineBucket in clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline]) // additionally track top 5 buckets of entire skyline { if (!clusterAnalysisTopBuckets[sampleSkylineType].ContainsKey(entireSkylineBucket.Key)) { clusterAnalysisTopBuckets[sampleSkylineType].Add(entireSkylineBucket.Key, new List<double>()); } if (!skylineTypeBuckets.ContainsKey(entireSkylineBucket.Key)) // not contained => percentage = 0 { clusterAnalysisTopBuckets[sampleSkylineType][entireSkylineBucket.Key].Add(0); } else if (sortedTop5.All(item => item.Key != entireSkylineBucket.Key)) // else: already added in previous foreach => no need to add again { double percent = (double) skylineTypeBuckets[entireSkylineBucket.Key].Count / entireSkylineCount; clusterAnalysisTopBuckets[sampleSkylineType][entireSkylineBucket.Key].Add(percent); } } }