示例#1
0
        internal string MeasurePerformance(int iTrial, int iPreferenceIndex, ArrayList listPreferences,
            ArrayList preferences,
            SQLCommon parser, Stopwatch sw, List<long> reportDimensions, List<long> reportSkylineSize,
            List<long> reportTimeTotal,
            List<long> reportTimeAlgorithm, List<double> reportMinCorrelation, List<double> reportMaxCorrelation, double minCorrelation, double maxCorrelation,
            List<double> reportCardinality, double cardinality,
            string strSQL, string strPreferenceSet, string strTrial, List<long> reportNumberOfMoves, long numberOfMoves, List<long> reportNumberOfComparisons, long numberOfComparisons)
        {
            Dictionary<ClusterAnalysis, List<List<double>>> clusterAnalysis;
            Dictionary<ClusterAnalysis, List<List<double>>> clusterAnalysisMedian;
            Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisTopBuckets;
            Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisMedianTopBuckets;

            List<IEnumerable<CLRSafeHashSet<int>>> producedSubsets =
                ProduceSubsets(preferences);

            InitClusterAnalysisDataStructures(out clusterAnalysis);
            InitClusterAnalysisDataStructures(out clusterAnalysisMedian);
            InitClusterAnalysisTopBucketsDataStructures(
                out clusterAnalysisTopBuckets);
            InitClusterAnalysisTopBucketsDataStructures(
                out clusterAnalysisMedianTopBuckets);

            var entireSkylineDataTable = new DataTable();
            if (ExcessiveTests)
            {
                entireSkylineDataTable =
                parser.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName,
                    strSQL);
            }
            else
            {
                entireSkylineDataTable =
                parser.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName,
                    strSQL+ " SAMPLE BY RANDOM_SUBSETS COUNT " + SubsetsCount +
                      " DIMENSION " + SubsetDimension);
            }

            List<long[]> entireDataTableSkylineValues =
                parser.SkylineType.Strategy.SkylineValues;

            int[] skylineAttributeColumns =
                SkylineSamplingHelper.GetSkylineAttributeColumns(entireSkylineDataTable);

            IReadOnlyDictionary<long, object[]> entireSkylineDatabase =
                prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId(
                    entireSkylineDataTable, 0);
            IReadOnlyDictionary<long, object[]> entireSkylineNormalized =
                prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId(
                    entireSkylineDataTable, 0);
            SkylineSamplingHelper.NormalizeColumns(entireSkylineNormalized,
                skylineAttributeColumns);

            DataTable entireDataTable;
            IReadOnlyDictionary<long, object[]> entireDatabaseNormalized =
                GetEntireDatabaseNormalized(parser, strSQL, skylineAttributeColumns,
                    out entireDataTable);
            IReadOnlyDictionary<long, object[]> entireDatabase =
                prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId(
                    entireDataTable, 0);

            IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                entireDatabaseBuckets =
                    prefSQL.Evaluation.ClusterAnalysis.GetBuckets(entireDatabaseNormalized,
                        skylineAttributeColumns);

            IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                aggregatedEntireDatabaseBuckets =
                    prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireDatabaseBuckets);

            foreach (
                KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> s in
                    entireDatabaseBuckets.OrderByDescending(l => l.Value.Count)
                        .ThenBy(l => l.Key).Take(5))
            {
                double percent = (double) s.Value.Count / entireDatabaseNormalized.Count;
                clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb].Add(s.Key,
                    new List<double>());

                for (var i = 0; i < producedSubsets.Count; i++)
                    // to enable generalized average calculation
                {
                    clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb][s.Key]
                        .Add(percent);
                }
            }

            IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                entireSkylineBuckets =
                    prefSQL.Evaluation.ClusterAnalysis.GetBuckets(entireSkylineNormalized,
                        skylineAttributeColumns);

            IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                aggregatedEntireSkylineBuckets =
                    prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireSkylineBuckets);

            FillTopBuckets(clusterAnalysisTopBuckets,
                ClusterAnalysis.EntireSkyline, entireSkylineBuckets,
                entireSkylineNormalized.Count, entireDatabaseNormalized.Count,
                entireSkylineNormalized.Count);
            foreach (
                KeyValuePair<BigInteger, List<double>> bucket in
                    clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline])
            {
                double percent =
                    clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline][
                        bucket.Key][0];

                for (var i = 1; i < producedSubsets.Count; i++)
                    // to enable generalized average calculation
                {
                    clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline][
                        bucket.Key].Add(percent);
                }
            }

            var clusterAnalysisForMedian = new prefSQL.Evaluation.ClusterAnalysis(entireDatabaseNormalized,
                skylineAttributeColumns);

            IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                entireDatabaseMedianBuckets =
                    clusterAnalysisForMedian.GetBuckets(entireDatabaseNormalized,
                        skylineAttributeColumns, true);

            IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                aggregatedEntireDatabaseMedianBuckets =
                    prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireDatabaseMedianBuckets);

            foreach (
                KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> s in
                    entireDatabaseMedianBuckets.OrderByDescending(l => l.Value.Count)
                        .ThenBy(l => l.Key).Take(5))
            {
                double percent = (double) s.Value.Count / entireDatabaseNormalized.Count;
                clusterAnalysisMedianTopBuckets[ClusterAnalysis.EntireDb].Add(
                    s.Key,
                    new List<double>());

                for (var i = 0; i < producedSubsets.Count; i++)
                    // to enable generalized average calculation
                {
                    clusterAnalysisMedianTopBuckets[ClusterAnalysis.EntireDb][
                        s.Key]
                        .Add(percent);
                }
            }

            IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                entireSkylineMedianBuckets =
                    clusterAnalysisForMedian.GetBuckets(entireSkylineNormalized,
                        skylineAttributeColumns, true);

            IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                aggregatedEntireSkylineMedianBuckets =
                    prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireSkylineMedianBuckets);

            FillTopBuckets(clusterAnalysisMedianTopBuckets,
                ClusterAnalysis.EntireSkyline, entireSkylineMedianBuckets,
                entireSkylineNormalized.Count, entireDatabaseNormalized.Count,
                entireSkylineNormalized.Count);

            foreach (
                KeyValuePair<BigInteger, List<double>> bucket in
                    clusterAnalysisMedianTopBuckets[
                        ClusterAnalysis.EntireSkyline])
            {
                double percent =
                    clusterAnalysisMedianTopBuckets[
                        ClusterAnalysis.EntireSkyline][bucket.Key][0];

                for (var i = 1; i < producedSubsets.Count; i++)
                    // to enable generalized average calculation
                {
                    clusterAnalysisMedianTopBuckets[
                        ClusterAnalysis.EntireSkyline][bucket.Key].Add(percent);
                }
            }
            strSQL += " SAMPLE BY RANDOM_SUBSETS COUNT " + SubsetsCount +
                      " DIMENSION " + SubsetDimension;

            string strQuery;
            string operators;
            int numberOfRecords;
            string[] parameter;

            PrefSQLModel prefSqlModel = parser.GetPrefSqlModelFromPreferenceSql(strSQL);
            string ansiSql = parser.GetAnsiSqlFromPrefSqlModel(prefSqlModel);
            prefSQL.SQLParser.Helper.DetermineParameters(ansiSql, out parameter,
                out strQuery, out operators,
                out numberOfRecords);

            var subsetObjects = new List<long>();
            var subsetTime = new List<long>();
            var subsetTimeElapsed = new List<long>();
            var setCoverageSecondRandom = new List<double>();
            var setCoverageSample = new List<double>();
            var setCoverageBestRank = new List<double>();
            var setCoverageSumRank = new List<double>();

            var representationErrorSecondRandom = new List<double>();
            var representationErrorSample = new List<double>();
            var representationErrorBestRank = new List<double>();
            var representationErrorSumRank = new List<double>();

            var representationErrorSumSecondRandom = new List<double>();
            var representationErrorSumSample = new List<double>();
            var representationErrorSumBestRank = new List<double>();
            var representationErrorSumSumRank = new List<double>();

            var dominatedObjectsCountSecondRandom = new List<double>();
            var dominatedObjectsCountSample = new List<double>();
            var dominatedObjectsCountBestRank = new List<double>();
            var dominatedObjectsCountSumRank = new List<double>();

            var dominatedObjectsOfBestObjectSecondRandom = new List<double>();
            var dominatedObjectsOfBestObjectSample = new List<double>();
            var dominatedObjectsOfBestObjectBestRank = new List<double>();
            var dominatedObjectsOfBestObjectSumRank = new List<double>();

            var subsetCount = 1;
            foreach (IEnumerable<CLRSafeHashSet<int>> subset in producedSubsets)
            {
                Console.WriteLine(strPreferenceSet + " (" + subsetCount + " / " +
                                  producedSubsets.Count + ")");

                sw.Restart();
                var subsetsProducer = new FixedSkylineSamplingSubsetsProducer(subset);
                var utility = new SkylineSamplingUtility(subsetsProducer);
                var skylineSample = new SkylineSampling(utility)
                {
                    SubsetCount = prefSqlModel.SkylineSampleCount,
                    SubsetDimension = prefSqlModel.SkylineSampleDimension,
                    SelectedStrategy = parser.SkylineType
                };

                DataTable sampleSkylineDataTable = skylineSample.GetSkylineTable(strQuery,
                    operators);

                sw.Stop();

                subsetObjects.Add(sampleSkylineDataTable.Rows.Count);
                subsetTime.Add(skylineSample.TimeMilliseconds);
                subsetTimeElapsed.Add(sw.ElapsedMilliseconds);

                IReadOnlyDictionary<long, object[]> sampleSkylineDatabase =
                    prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId(
                        sampleSkylineDataTable, 0);
                IReadOnlyDictionary<long, object[]> sampleSkylineNormalized =
                    prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId(
                        sampleSkylineDataTable, 0);
                SkylineSamplingHelper.NormalizeColumns(sampleSkylineNormalized,
                    skylineAttributeColumns);

                IReadOnlyDictionary<long, object[]> secondRandomSampleDatabase = new Dictionary<long, object[]>();
                IReadOnlyDictionary<long, object[]> secondRandomSampleNormalized = new Dictionary<long, object[]>();
                      IReadOnlyDictionary<long, object[]> entireSkylineDataTableBestRankDatabase = new Dictionary<long, object[]>();
                IReadOnlyDictionary<long, object[]> entireSkylineDataTableSumRankDatabase = new Dictionary<long, object[]>();
                IReadOnlyDictionary<long, object[]> entireSkylineDataTableBestRankNormalized = new Dictionary<long, object[]>();
                IReadOnlyDictionary<long, object[]> entireSkylineDataTableSumRankNormalized = new Dictionary<long, object[]>();

               if (ExcessiveTests)
                {
               secondRandomSampleDatabase =
                    SkylineSamplingHelper.GetRandomSample(entireSkylineDatabase,
                        sampleSkylineDataTable.Rows.Count);
                var secondRandomSampleNormalizedToBeCreated = new Dictionary<long, object[]>();
                foreach (KeyValuePair<long, object[]> k in secondRandomSampleDatabase)
                {
                    var newValue = new object[k.Value.Length];
                    k.Value.CopyTo(newValue, 0);
                    secondRandomSampleNormalizedToBeCreated.Add(k.Key, newValue);
                }
                secondRandomSampleNormalized =
                    new ReadOnlyDictionary<long, object[]>(
                        secondRandomSampleNormalizedToBeCreated);
                SkylineSamplingHelper.NormalizeColumns(secondRandomSampleNormalized,
                    skylineAttributeColumns);

                    entireSkylineDataTableBestRankNormalized =
                        GetEntireSkylineDataTableRankNormalized(entireSkylineDataTable.Copy(),
                            entireDataTableSkylineValues, skylineAttributeColumns,
                            sampleSkylineDataTable.Rows.Count, 1,
                            out entireSkylineDataTableBestRankDatabase);

                    entireSkylineDataTableSumRankNormalized =
                        GetEntireSkylineDataTableRankNormalized(entireSkylineDataTable.Copy(),
                            entireDataTableSkylineValues, skylineAttributeColumns,
                            sampleSkylineDataTable.Rows.Count, 2,
                            out entireSkylineDataTableSumRankDatabase);

                        IReadOnlyDictionary<long, object[]> baseRandomSampleNormalized =
                            SkylineSamplingHelper.GetRandomSample(entireSkylineNormalized,
                                sampleSkylineDataTable.Rows.Count);

                        double setCoverageCoveredBySecondRandomSample = SetCoverage.GetCoverage(
                                    baseRandomSampleNormalized,
                                    secondRandomSampleNormalized, skylineAttributeColumns) * 100.0;
                        double setCoverageCoveredBySkylineSample = SetCoverage.GetCoverage(
                                baseRandomSampleNormalized,
                                sampleSkylineNormalized, skylineAttributeColumns) * 100.0;
                        double setCoverageCoveredByEntireBestRank = SetCoverage.GetCoverage(
                                baseRandomSampleNormalized,
                                entireSkylineDataTableBestRankNormalized, skylineAttributeColumns) * 100.0;
                        double setCoverageCoveredByEntireSumRank = SetCoverage.GetCoverage(baseRandomSampleNormalized,
                                entireSkylineDataTableSumRankNormalized, skylineAttributeColumns) *
                                            100.0;

                    setCoverageSecondRandom.Add(setCoverageCoveredBySecondRandomSample);
                    setCoverageSample.Add(setCoverageCoveredBySkylineSample);
                    setCoverageBestRank.Add(setCoverageCoveredByEntireBestRank);
                    setCoverageSumRank.Add(setCoverageCoveredByEntireSumRank);

                    Dictionary<long, double>.ValueCollection baseRepresentationErrorSecondRandomSample = SetCoverage
                    .GetRepresentationError(
                        GetReducedSkyline(entireSkylineNormalized, secondRandomSampleNormalized),
                        secondRandomSampleNormalized, skylineAttributeColumns);
                    Dictionary<long, double>.ValueCollection baseRepresentationErrorSkylineSample = SetCoverage.GetRepresentationError(
                        GetReducedSkyline(entireSkylineNormalized, sampleSkylineNormalized),
                        sampleSkylineNormalized, skylineAttributeColumns);
                    Dictionary<long, double>.ValueCollection baseRepresentationErrorEntireBestRank =
                        SetCoverage.GetRepresentationError(
                            GetReducedSkyline(entireSkylineNormalized,
                                entireSkylineDataTableBestRankNormalized),
                            entireSkylineDataTableBestRankNormalized, skylineAttributeColumns);
                    Dictionary<long, double>.ValueCollection baseRepresentationErrorEntireSumRank =
                        SetCoverage.GetRepresentationError(
                            GetReducedSkyline(entireSkylineNormalized,
                                entireSkylineDataTableSumRankNormalized),
                            entireSkylineDataTableSumRankNormalized, skylineAttributeColumns);

                    representationErrorSecondRandom.Add(baseRepresentationErrorSecondRandomSample.Max()*100.0);
                    representationErrorSample.Add(baseRepresentationErrorSkylineSample.Max() * 100.0);
                    representationErrorBestRank.Add(baseRepresentationErrorEntireBestRank.Max() * 100.0);
                    representationErrorSumRank.Add(baseRepresentationErrorEntireSumRank.Max() * 100.0);

                    representationErrorSumSecondRandom.Add(baseRepresentationErrorSecondRandomSample.Sum() * 100.0);
                    representationErrorSumSample.Add(baseRepresentationErrorSkylineSample.Sum() * 100.0);
                    representationErrorSumBestRank.Add(baseRepresentationErrorEntireBestRank.Sum() * 100.0);
                    representationErrorSumSumRank.Add(baseRepresentationErrorEntireSumRank.Sum() * 100.0);

                    var dominatedObjectsCountRandomSample =
                        new DominatedObjects(entireDatabase,
                            secondRandomSampleDatabase,
                            skylineAttributeColumns);
                    var dominatedObjectsCountSampleSkyline =
                        new DominatedObjects(entireDatabase, sampleSkylineDatabase,
                            skylineAttributeColumns);
                    var dominatedObjectsCountEntireSkylineBestRank =
                        new DominatedObjects(entireDatabase,
                            entireSkylineDataTableBestRankDatabase, skylineAttributeColumns);
                    var dominatedObjectsCountEntireSkylineSumRank =
                        new DominatedObjects(entireDatabase,
                            entireSkylineDataTableSumRankDatabase, skylineAttributeColumns);

                    dominatedObjectsCountSecondRandom.Add(
                        dominatedObjectsCountRandomSample.NumberOfDistinctDominatedObjects);
                    dominatedObjectsCountSample.Add(
                        dominatedObjectsCountSampleSkyline.NumberOfDistinctDominatedObjects);
                    dominatedObjectsCountBestRank.Add(
                        dominatedObjectsCountEntireSkylineBestRank.NumberOfDistinctDominatedObjects);
                    dominatedObjectsCountSumRank.Add(
                        dominatedObjectsCountEntireSkylineSumRank.NumberOfDistinctDominatedObjects);

                    dominatedObjectsOfBestObjectSecondRandom.Add(
                        dominatedObjectsCountRandomSample
                            .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value);
                    dominatedObjectsOfBestObjectSample.Add(
                        dominatedObjectsCountSampleSkyline
                            .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value);
                    dominatedObjectsOfBestObjectBestRank.Add(
                        dominatedObjectsCountEntireSkylineBestRank
                            .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value);
                    dominatedObjectsOfBestObjectSumRank.Add(
                        dominatedObjectsCountEntireSkylineSumRank
                            .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value);

                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        sampleBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetBuckets(sampleSkylineNormalized,
                                skylineAttributeColumns);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedSampleBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(sampleBuckets);
                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        randomBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetBuckets(secondRandomSampleNormalized,
                                skylineAttributeColumns);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedRandomBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(randomBuckets);
                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        bestRankBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetBuckets(
                                entireSkylineDataTableBestRankNormalized,
                                skylineAttributeColumns);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedBestRankBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(
                                bestRankBuckets);
                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        sumRankBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetBuckets(
                                entireSkylineDataTableSumRankNormalized,
                                skylineAttributeColumns);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedSumRankBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(
                                sumRankBuckets);

                    FillTopBuckets(clusterAnalysisTopBuckets,
                        ClusterAnalysis.SampleSkyline, sampleBuckets,
                        sampleSkylineNormalized.Count, entireDatabaseNormalized.Count,
                        entireSkylineNormalized.Count);
                    FillTopBuckets(clusterAnalysisTopBuckets,
                        ClusterAnalysis.RandomSkyline, randomBuckets,
                        secondRandomSampleNormalized.Count, entireDatabaseNormalized.Count,
                        entireSkylineNormalized.Count);
                    FillTopBuckets(clusterAnalysisTopBuckets,
                        ClusterAnalysis.BestRank, bestRankBuckets,
                        entireSkylineDataTableBestRankNormalized.Count,
                        entireDatabaseNormalized.Count, entireSkylineNormalized.Count);
                    FillTopBuckets(clusterAnalysisTopBuckets,
                        ClusterAnalysis.SumRank, sumRankBuckets,
                        entireSkylineDataTableSumRankNormalized.Count,
                        entireDatabaseNormalized.Count, entireSkylineNormalized.Count);

                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        sampleMedianBuckets =
                            clusterAnalysisForMedian.GetBuckets(sampleSkylineNormalized,
                                skylineAttributeColumns, true);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedSampleMedianBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(sampleMedianBuckets);
                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        randomMedianBuckets =
                            clusterAnalysisForMedian.GetBuckets(secondRandomSampleNormalized,
                                skylineAttributeColumns, true);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedRandomMedianBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(randomMedianBuckets);
                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        bestRankMedianBuckets =
                            clusterAnalysisForMedian.GetBuckets(
                                entireSkylineDataTableBestRankNormalized,
                                skylineAttributeColumns, true);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedBestRankMedianBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(
                                bestRankMedianBuckets);
                    IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>>
                        sumRankMedianBuckets =
                            clusterAnalysisForMedian.GetBuckets(
                                entireSkylineDataTableSumRankNormalized,
                                skylineAttributeColumns, true);
                    IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>>
                        aggregatedSumRankMedianBuckets =
                            prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(
                                sumRankMedianBuckets);

                    FillTopBuckets(clusterAnalysisMedianTopBuckets,
                        ClusterAnalysis.SampleSkyline, sampleMedianBuckets,
                        sampleSkylineNormalized.Count, entireDatabaseNormalized.Count,
                        entireSkylineNormalized.Count);
                    FillTopBuckets(clusterAnalysisMedianTopBuckets,
                        ClusterAnalysis.RandomSkyline, randomMedianBuckets,
                        secondRandomSampleNormalized.Count, entireDatabaseNormalized.Count,
                        entireSkylineNormalized.Count);
                    FillTopBuckets(clusterAnalysisMedianTopBuckets,
                        ClusterAnalysis.BestRank, bestRankMedianBuckets,
                        entireSkylineDataTableBestRankNormalized.Count,
                        entireDatabaseNormalized.Count, entireSkylineNormalized.Count);
                    FillTopBuckets(clusterAnalysisMedianTopBuckets,
                        ClusterAnalysis.SumRank, sumRankMedianBuckets,
                        entireSkylineDataTableSumRankNormalized.Count,
                        entireDatabaseNormalized.Count, entireSkylineNormalized.Count);

                    var caEntireDbNew = new List<double>();
                    var caEntireSkylineNew = new List<double>();
                    var caSampleSkylineNew = new List<double>();
                    var caRandomSkylineNew = new List<double>();
                    var caBestRankNew = new List<double>();
                    var caSumRankNew = new List<double>();

                    for (var ii = 0; ii < skylineAttributeColumns.Length; ii++)
                    {
                        int entireSkyline = aggregatedEntireSkylineBuckets.ContainsKey(ii)
                            ? aggregatedEntireSkylineBuckets[ii].Count
                            : 0;
                        int sampleSkyline = aggregatedSampleBuckets.ContainsKey(ii)
                            ? aggregatedSampleBuckets[ii].Count
                            : 0;
                        int randomSkyline = aggregatedRandomBuckets.ContainsKey(ii)
                            ? aggregatedRandomBuckets[ii].Count
                            : 0;
                        double entireSkylinePercent = (double)entireSkyline /
                                                      entireSkylineNormalized.Count;
                        double sampleSkylinePercent = (double) sampleSkyline /
                                                      sampleSkylineNormalized.Count;
                        double randomSkylinePercent = (double)randomSkyline /
                                                      secondRandomSampleNormalized.Count;
                        int entireDb = aggregatedEntireDatabaseBuckets.ContainsKey(ii)
                            ? aggregatedEntireDatabaseBuckets[ii].Count
                            : 0;
                        double entireDbPercent = (double) entireDb /
                                                 entireDatabaseNormalized.Count;

                        int bestRank = aggregatedBestRankBuckets.ContainsKey(ii)
                            ? aggregatedBestRankBuckets[ii].Count
                            : 0;
                        int sumRank = aggregatedSumRankBuckets.ContainsKey(ii)
                            ? aggregatedSumRankBuckets[ii].Count
                            : 0;

                        double bestRankPercent = (double) bestRank /
                                                 entireSkylineDataTableBestRankNormalized.Count;
                        double sumRankPercent = (double) sumRank /
                                                entireSkylineDataTableSumRankNormalized.Count;
                        caEntireDbNew.Add(entireDbPercent);
                        caEntireSkylineNew.Add(entireSkylinePercent);
                        caSampleSkylineNew.Add(sampleSkylinePercent);
                        caRandomSkylineNew.Add(randomSkylinePercent);
                        caBestRankNew.Add(bestRankPercent);
                        caSumRankNew.Add(sumRankPercent);
                    }

                    var caMedianEntireDbNew = new List<double>();
                    var caMedianEntireSkylineNew = new List<double>();
                    var caMedianSampleSkylineNew = new List<double>();
                    var caMedianRandomSkylineNew = new List<double>();
                    var caMedianBestRankNew = new List<double>();
                    var caMedianSumRankNew = new List<double>();

                    for (var ii = 0; ii < skylineAttributeColumns.Length; ii++)
                    {
                        int entireSkyline = aggregatedEntireSkylineMedianBuckets.ContainsKey(ii)
                            ? aggregatedEntireSkylineMedianBuckets[ii].Count
                            : 0;
                        int sampleSkyline = aggregatedSampleMedianBuckets.ContainsKey(ii)
                            ? aggregatedSampleMedianBuckets[ii].Count
                            : 0;
                        int randomSkyline = aggregatedRandomMedianBuckets.ContainsKey(ii)
                            ? aggregatedRandomMedianBuckets[ii].Count
                            : 0;
                        double entireSkylinePercent = (double)entireSkyline /
                                                      entireSkylineNormalized.Count;
                        double sampleSkylinePercent = (double) sampleSkyline /
                                                      sampleSkylineNormalized.Count;
                        double randomSkylinePercent = (double)randomSkyline /
                                                      secondRandomSampleNormalized.Count;
                        int entireDb = aggregatedEntireDatabaseMedianBuckets.ContainsKey(ii)
                            ? aggregatedEntireDatabaseMedianBuckets[ii].Count
                            : 0;
                        double entireDbPercent = (double) entireDb /
                                                 entireDatabaseNormalized.Count;

                        int bestRank = aggregatedBestRankMedianBuckets.ContainsKey(ii)
                            ? aggregatedBestRankMedianBuckets[ii].Count
                            : 0;
                        int sumRank = aggregatedSumRankMedianBuckets.ContainsKey(ii)
                            ? aggregatedSumRankMedianBuckets[ii].Count
                            : 0;

                        double bestRankPercent = (double) bestRank /
                                                 entireSkylineDataTableBestRankNormalized.Count;
                        double sumRankPercent = (double) sumRank /
                                                entireSkylineDataTableSumRankNormalized.Count;
                        caMedianEntireDbNew.Add(entireDbPercent);
                        caMedianEntireSkylineNew.Add(entireSkylinePercent);
                        caMedianSampleSkylineNew.Add(sampleSkylinePercent);
                        caMedianRandomSkylineNew.Add(randomSkylinePercent);
                        caMedianBestRankNew.Add(bestRankPercent);
                        caMedianSumRankNew.Add(sumRankPercent);
                    }

                clusterAnalysis[ClusterAnalysis.EntireDb].Add(caEntireDbNew);
                clusterAnalysis[ClusterAnalysis.EntireSkyline].Add(
                    caEntireSkylineNew);
                clusterAnalysis[ClusterAnalysis.SampleSkyline].Add(
                    caSampleSkylineNew);
                clusterAnalysis[ClusterAnalysis.RandomSkyline].Add(
                    caRandomSkylineNew);
                clusterAnalysis[ClusterAnalysis.BestRank].Add(
                    caBestRankNew);
                clusterAnalysis[ClusterAnalysis.SumRank].Add(
                    caSumRankNew);

                clusterAnalysisMedian[ClusterAnalysis.EntireDb].Add(
                    caMedianEntireDbNew);
                clusterAnalysisMedian[ClusterAnalysis.EntireSkyline].Add(
                    caMedianEntireSkylineNew);
                clusterAnalysisMedian[ClusterAnalysis.SampleSkyline].Add(
                    caMedianSampleSkylineNew);
                clusterAnalysisMedian[ClusterAnalysis.RandomSkyline].Add(
                    caMedianRandomSkylineNew);
                clusterAnalysisMedian[ClusterAnalysis.BestRank].Add(
                    caMedianBestRankNew);
                clusterAnalysisMedian[ClusterAnalysis.SumRank].Add(
                    caMedianSumRankNew);
                }

                subsetCount++;
            }

            Dictionary<ClusterAnalysis, string> clusterAnalysisStrings =
                GetClusterAnalysisStrings(skylineAttributeColumns, clusterAnalysis);
            Dictionary<ClusterAnalysis, string> clusterAnalysisMedianStrings =
                GetClusterAnalysisStrings(skylineAttributeColumns, clusterAnalysisMedian);
            Dictionary<ClusterAnalysis, string> clusterAnalysisTopBucketsStrings =
                GetClusterAnalysisTopBucketsStrings(clusterAnalysisTopBuckets, ExcessiveTests);
            Dictionary<ClusterAnalysis, string> clusterAnalysisMedianTopBucketsStrings =
                GetClusterAnalysisTopBucketsStrings(clusterAnalysisMedianTopBuckets, ExcessiveTests);

            var time = (long) (subsetTime.Average() + .5);
            var objects = (long) (subsetObjects.Average() + .5);
            var elapsed = (long) (subsetTimeElapsed.Average() + .5);

            Console.WriteLine("subsetTime");
            foreach (var i in subsetTime)
            {
                Console.WriteLine(i);
            }
            Console.WriteLine("");

            Console.WriteLine("subsetObjects");
            foreach (var i in subsetObjects)
            {
                Console.WriteLine(i);
            }
            Console.WriteLine("");

            reportDimensions.Add(preferences.Count);
            reportSkylineSize.Add(objects);
            reportTimeTotal.Add(elapsed);
            reportTimeAlgorithm.Add(time);
            reportMinCorrelation.Add(minCorrelation);
            reportMaxCorrelation.Add(maxCorrelation);
            reportCardinality.Add(cardinality);

            var setCoverageSingle =
                new Dictionary<SkylineTypesSingle, List<double>>
                {
                    {SkylineTypesSingle.Random, setCoverageSecondRandom},
                    {SkylineTypesSingle.Sample, setCoverageSample},
                    {SkylineTypesSingle.BestRank, setCoverageBestRank},
                    {SkylineTypesSingle.SumRank, setCoverageSumRank}
                };

            var representationErrorSingle =
                new Dictionary<SkylineTypesSingle, List<double>>
                {
                    {SkylineTypesSingle.Random, representationErrorSecondRandom},
                    {SkylineTypesSingle.Sample, representationErrorSample},
                    {SkylineTypesSingle.BestRank, representationErrorBestRank},
                    {SkylineTypesSingle.SumRank, representationErrorSumRank}
                };

            var representationErrorSumSingle =
                new Dictionary<SkylineTypesSingle, List<double>>
                {
                    {SkylineTypesSingle.Random, representationErrorSumSecondRandom},
                    {SkylineTypesSingle.Sample, representationErrorSumSample},
                    {SkylineTypesSingle.BestRank, representationErrorSumBestRank},
                    {SkylineTypesSingle.SumRank, representationErrorSumSumRank}
                };

            var dominatedObjectsCountSingle =
                new Dictionary<SkylineTypesSingle, List<double>>()
                {
                    {
                        SkylineTypesSingle.Random,
                        dominatedObjectsCountSecondRandom
                    },
                    {
                        SkylineTypesSingle.Sample,
                        dominatedObjectsCountSample
                    },
                    {
                        SkylineTypesSingle.BestRank,
                        dominatedObjectsCountBestRank
                    },
                    {
                        SkylineTypesSingle.SumRank,
                        dominatedObjectsCountSumRank
                    }
                };

            var dominatedObjectsOfBestObjectSingle =
                new Dictionary<SkylineTypesSingle, List<double>>
                {
                    {
                        SkylineTypesSingle.Random,
                        dominatedObjectsOfBestObjectSecondRandom
                    },
                    {
                        SkylineTypesSingle.Sample,
                        dominatedObjectsOfBestObjectSample
                    },
                    {
                        SkylineTypesSingle.BestRank,
                        dominatedObjectsOfBestObjectBestRank
                    },
                    {
                        SkylineTypesSingle.SumRank,
                        dominatedObjectsOfBestObjectSumRank
                    }
                };

            AddToReports(_reportsLong, subsetObjects, subsetTime,
                _reportsDouble);
            if (ExcessiveTests)
            {
            AddToSetCoverage(_setCoverage, setCoverageSingle);
            AddToSetCoverage(_representationError,
                representationErrorSingle);
            AddToSetCoverage(_representationErrorSum,
                representationErrorSumSingle);
            AddToSetCoverage(_dominatedObjectsCount,
                dominatedObjectsCountSingle);
            AddToSetCoverage(_dominatedObjectsOfBestObject,
                dominatedObjectsOfBestObjectSingle);
            }

            string strLine = FormatLineString(strPreferenceSet, strTrial,
                preferences.Count, objects,
                elapsed, time, subsetTime.Min(), subsetTime.Max(),
                MyMathematic.GetSampleVariance(subsetTime),
                MyMathematic.GetSampleStdDeviation(subsetTime),
                Mathematic.Median(subsetTime), Mathematic.LowerQuartile(subsetTime),
                Mathematic.UpperQuartile(subsetTime), subsetObjects.Min(),
                subsetObjects.Max(), MyMathematic.GetSampleVariance(subsetObjects),
                MyMathematic.GetSampleStdDeviation(subsetObjects),
                Mathematic.Median(subsetObjects), Mathematic.LowerQuartile(subsetObjects),
                Mathematic.UpperQuartile(subsetObjects),
                setCoverageSingle, representationErrorSingle,
                representationErrorSumSingle, dominatedObjectsCountSingle,
                dominatedObjectsOfBestObjectSingle,
                clusterAnalysisStrings, clusterAnalysisMedianStrings,
                clusterAnalysisTopBucketsStrings, clusterAnalysisMedianTopBucketsStrings,
                minCorrelation, maxCorrelation,
                cardinality, numberOfMoves, numberOfComparisons);
            return strLine;
        }
示例#2
0
        private static void FillTopBuckets(
            Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisTopBuckets,
            ClusterAnalysis sampleSkylineType,
            IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> skylineTypeBuckets,
            int skylineCount, int entireDbCount, int entireSkylineCount)
        {
            List<KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>>> sortedTop5 =
                skylineTypeBuckets.OrderByDescending(l => l.Value.Count).ThenBy(l => l.Key).Take(5).ToList();
            // track top 5 buckets

            foreach (KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> skylineTypeBucket in sortedTop5
                )
            {
                if (!clusterAnalysisTopBuckets[sampleSkylineType].ContainsKey(skylineTypeBucket.Key))
                {
                    clusterAnalysisTopBuckets[sampleSkylineType].Add(skylineTypeBucket.Key, new List<double>());
                }
                double percent = (double) skylineTypeBucket.Value.Count / skylineCount;
                clusterAnalysisTopBuckets[sampleSkylineType][skylineTypeBucket.Key].Add(percent);
            }

            foreach (
                KeyValuePair<BigInteger, List<double>> entireDbBucket in
                    clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb])
                // additionally track top 5 buckets of entire db
            {
                if (!clusterAnalysisTopBuckets[sampleSkylineType].ContainsKey(entireDbBucket.Key))
                {
                    clusterAnalysisTopBuckets[sampleSkylineType].Add(entireDbBucket.Key, new List<double>());
                }

                if (!skylineTypeBuckets.ContainsKey(entireDbBucket.Key)) // not contained => percentage = 0
                {
                    clusterAnalysisTopBuckets[sampleSkylineType][entireDbBucket.Key].Add(0);
                }
                else if (sortedTop5.All(item => item.Key != entireDbBucket.Key))
                    // else: already added in previous foreach => no need to add again
                {
                    double percent = (double) skylineTypeBuckets[entireDbBucket.Key].Count / entireDbCount;
                    clusterAnalysisTopBuckets[sampleSkylineType][entireDbBucket.Key].Add(percent);
                }
            }

            foreach (
                KeyValuePair<BigInteger, List<double>> entireSkylineBucket in
                    clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline])
                // additionally track top 5 buckets of entire skyline
            {
                if (!clusterAnalysisTopBuckets[sampleSkylineType].ContainsKey(entireSkylineBucket.Key))
                {
                    clusterAnalysisTopBuckets[sampleSkylineType].Add(entireSkylineBucket.Key, new List<double>());
                }

                if (!skylineTypeBuckets.ContainsKey(entireSkylineBucket.Key)) // not contained => percentage = 0
                {
                    clusterAnalysisTopBuckets[sampleSkylineType][entireSkylineBucket.Key].Add(0);
                }
                else if (sortedTop5.All(item => item.Key != entireSkylineBucket.Key))
                    // else: already added in previous foreach => no need to add again
                {
                    double percent = (double) skylineTypeBuckets[entireSkylineBucket.Key].Count / entireSkylineCount;
                    clusterAnalysisTopBuckets[sampleSkylineType][entireSkylineBucket.Key].Add(percent);
                }
            }
        }