public void TestSamplingOnlyNonDominatedObjectsWithinSampleSkylineViaGetSkyline() { string skylineSampleSQL = TestContext.DataRow["skylineSampleSQL"].ToString(); string entireSkylineSQL = TestContext.DataRow["entireSkylineSQL"].ToString(); string testComment = TestContext.DataRow["comment"].ToString(); Debug.WriteLine(testComment); Debug.WriteLine(skylineSampleSQL); var common = new SQLCommon { SkylineType = new SkylineBNL() { Provider = Helper.ProviderName, ConnectionString = Helper.ConnectionString } }; var prefSqlModelSkylineSample = common.GetPrefSqlModelFromPreferenceSql(skylineSampleSQL); var prefSqlModelEntireSkyline = common.GetPrefSqlModelFromPreferenceSql(entireSkylineSQL); var subjectUnderTest = new SQLParser.Helper { ConnectionString = Helper.ConnectionString, DriverString = Helper.ProviderName }; var sw = new Stopwatch(); sw.Start(); var entireSkyline = subjectUnderTest.GetResults( common.GetAnsiSqlFromPrefSqlModel(prefSqlModelEntireSkyline), common.SkylineType, prefSqlModelEntireSkyline, false); sw.Stop(); Debug.WriteLine("ORIG ElapsedMilliseconds={0}", sw.ElapsedMilliseconds); Debug.WriteLine("ORIG Algorithm ElapsedMilliseconds={0}", subjectUnderTest.TimeInMilliseconds); sw.Restart(); var sampleSkyline = subjectUnderTest.GetResults( common.GetAnsiSqlFromPrefSqlModel(prefSqlModelSkylineSample), common.SkylineType, prefSqlModelSkylineSample, false); sw.Stop(); Debug.WriteLine("SMPL ElapsedMilliseconds={0}", sw.ElapsedMilliseconds); Debug.WriteLine("SMPL Algorithm ElapsedMilliseconds={0}", subjectUnderTest.TimeInMilliseconds); var entireSkylineObjectsIds = GetHashSetOfIdsFromDataTable(entireSkyline); var sampleSkylineObjectsIds = GetHashSetOfIdsFromDataTable(sampleSkyline); Debug.WriteLine("ORIG Count={0}", entireSkylineObjectsIds.Count); Debug.WriteLine("SMPL Count={0}", sampleSkylineObjectsIds.Count); Assert.IsTrue(sampleSkylineObjectsIds.IsSubsetOf(entireSkylineObjectsIds), "Dominated objects contained in Sample Skyline (i.e., objects which are not contained in the entire Skyline)."); }
public void TestSamplingObjectsWithinEntireSkylineCount() { var entireSkylineSQL = TestContext.DataRow["entireSkylineSQL"].ToString(); var testComment = TestContext.DataRow["comment"].ToString(); Debug.WriteLine(testComment); var common = new SQLCommon {SkylineType = new SkylineBNL()}; var prefSqlModelEntireSkyline = common.GetPrefSqlModelFromPreferenceSql(entireSkylineSQL); var subjectUnderTest = new SQLParser.Helper { ConnectionString = Helper.ConnectionString, DriverString = Helper.ProviderName }; var entireSkyline = subjectUnderTest.GetResults( common.GetAnsiSqlFromPrefSqlModel(prefSqlModelEntireSkyline), common.SkylineType, prefSqlModelEntireSkyline, false); var expected = TestContext.DataRow["entireCount"].ToString(); var actual = entireSkyline.Rows.Count.ToString(CultureInfo.InvariantCulture); Assert.AreEqual(expected, actual, "Entire Skyline contains unexpected number of objects."); }
private DataTable GetSQLFromPreferences(ArrayList preferences, bool cardinality) { SQLCommon common = new SQLCommon(); string strPrefSQL = "SELECT cars.id FROM "; if (TableSize == Size.Small) { strPrefSQL += "cars_small"; } else if (TableSize == Size.Medium) { strPrefSQL += "cars_medium"; } else if (TableSize == Size.Large) { strPrefSQL += "cars_large"; } strPrefSQL += " cars "; strPrefSQL += "SKYLINE OF "; for (int i = 0; i < preferences.Count; i++) { strPrefSQL += preferences[i] + ","; } strPrefSQL = strPrefSQL.TrimEnd(','); PrefSQLModel prefModel = common.GetPrefSqlModelFromPreferenceSql(strPrefSQL); string strSQL = "SELECT "; for (int i = 0; i < prefModel.Skyline.Count; i++) { if (cardinality) { strSQL += "COUNT(DISTINCT " + prefModel.Skyline[i].Expression + "),"; } else { strSQL += prefModel.Skyline[i].Expression + ","; } } strSQL = strSQL.TrimEnd(',') + " FROM cars "; strSQL += GetJoinsForPreferences(strSQL); DataTable dt = Helper.ExecuteStatement(strSQL); return dt; }
public void TestSkylineAmountOfTupelsMSSQLCLR() { string skylineSampleSql = TestContext.DataRow["skylineSQL"].ToString(); SQLCommon common = new SQLCommon(); common.SkylineType = new SkylineSQL(); PrefSQLModel model = common.GetPrefSqlModelFromPreferenceSql(skylineSampleSql); string sqlNative = common.GetAnsiSqlFromPrefSqlModel(model); common.SkylineType = new SkylineBNL(); string sqlBNL = common.ParsePreferenceSQL(skylineSampleSql); common.SkylineType = new SkylineBNLSort(); string sqlBNLSort = common.ParsePreferenceSQL(skylineSampleSql); common.SkylineType = new SkylineHexagon(); string sqlHexagon = common.ParsePreferenceSQL(skylineSampleSql); //D&Q does not run with CLR common.SkylineType = new SkylineDQ(); string sqlDQ = common.ParsePreferenceSQL(skylineSampleSql); int amountOfTupelsBNL = 0; int amountOfTupelsBNLSort = 0; int amountOfTupelsSQL = 0; int amountOfTupelsHexagon = 0; int amountOfTupelsDQ = 0; SqlConnection cnnSQL = new SqlConnection(Helper.ConnectionString); cnnSQL.InfoMessage += cnnSQL_InfoMessage; try { cnnSQL.Open(); //Native DbCommand command = cnnSQL.CreateCommand(); command.CommandTimeout = 0; //infinite timeout command.CommandText = sqlNative; DbDataReader sqlReader = command.ExecuteReader(); if (sqlReader.HasRows) { while (sqlReader.Read()) { amountOfTupelsSQL++; } } sqlReader.Close(); //BNL command.CommandText = sqlBNL; sqlReader = command.ExecuteReader(); if (sqlReader.HasRows) { while (sqlReader.Read()) { amountOfTupelsBNL++; } } sqlReader.Close(); //BNLSort command.CommandText = sqlBNLSort; sqlReader = command.ExecuteReader(); if (sqlReader.HasRows) { while (sqlReader.Read()) { amountOfTupelsBNLSort++; } } sqlReader.Close(); //Hexagon command.CommandText = sqlHexagon; sqlReader = command.ExecuteReader(); if (sqlReader.HasRows) { while (sqlReader.Read()) { amountOfTupelsHexagon++; } } sqlReader.Close(); //D&Q (does not work with incomparable tuples) if(model.WithIncomparable == false) { command.CommandText = sqlDQ; sqlReader = command.ExecuteReader(); if (sqlReader.HasRows) { while (sqlReader.Read()) { amountOfTupelsDQ++; } } sqlReader.Close(); } cnnSQL.Close(); } catch (Exception ex) { Assert.Fail("Connection failed:" + ex.Message); } int currentDataRowIndex = TestContext.DataRow.Table.Rows.IndexOf(TestContext.DataRow); //Check tuples (every algorithm should deliver the same amount of tuples) Assert.AreEqual(amountOfTupelsSQL, amountOfTupelsBNLSort, 0, "BNLSort Amount of tupels in query " + currentDataRowIndex + " do not match"); Assert.AreEqual(amountOfTupelsSQL, amountOfTupelsBNL, 0, "BNL Amount of tupels in query " + currentDataRowIndex + " do not match"); //Hexagon cannot handle Categorical preference that have no explicit OTHERS if (model.ContainsOpenPreference == false) { Assert.AreEqual(amountOfTupelsSQL, amountOfTupelsHexagon, 0, "Hexagon Amount of tupels in query " + currentDataRowIndex + " do not match"); } //D&Q does not work with incomparable tuples if (model.WithIncomparable == false) { Assert.AreEqual(amountOfTupelsSQL, amountOfTupelsDQ, 0, "Amount of tupels in query " + currentDataRowIndex + " do not match"); } }
public void TestSkylineAmountOfTupelsDataTable() { string skylineSampleSql = TestContext.DataRow["skylineSQL"].ToString(); SQLCommon common = new SQLCommon(); common.SkylineType = new SkylineSQL(); PrefSQLModel model = common.GetPrefSqlModelFromPreferenceSql(skylineSampleSql); DataTable dtNative = common.ExecuteFromPrefSqlModel(Helper.ConnectionString, Helper.ProviderName, model); common.SkylineType = new SkylineBNL(); DataTable dtBNL = common.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, skylineSampleSql); common.SkylineType = new SkylineBNLSort(); DataTable dtBNLSort = common.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, skylineSampleSql); DataTable dtHexagon = new DataTable(); if (model.ContainsOpenPreference == false) { common.SkylineType = new SkylineHexagon(); dtHexagon = common.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, skylineSampleSql); } DataTable dtDQ = new DataTable(); //D&Q does not work with incomparable tuples if (model.WithIncomparable == false) { common.SkylineType = new SkylineDQ(); dtDQ = common.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, skylineSampleSql); } int currentDataRowIndex = TestContext.DataRow.Table.Rows.IndexOf(TestContext.DataRow); //Check tuples (every algorithm should deliver the same amount of tuples) Assert.AreEqual(dtNative.Rows.Count, dtBNL.Rows.Count, 0, "BNL Amount of tupels in query " + currentDataRowIndex + " do not match"); Assert.AreEqual(dtNative.Rows.Count, dtBNLSort.Rows.Count, 0, "BNLSort Amount of tupels in query " + currentDataRowIndex + " do not match"); //Hexagon cannot handle Categorical preference that have no explicit OTHERS if (model.ContainsOpenPreference == false) { Assert.AreEqual(dtNative.Rows.Count, dtHexagon.Rows.Count, 0, "Hexagon Amount of tupels in query " + currentDataRowIndex + " do not match"); } //D&Q does not work with incomparable tuples if (model.WithIncomparable == false) { Assert.AreEqual(dtNative.Rows.Count, dtDQ.Rows.Count, 0, "D&Q Amount of tupels in query " + currentDataRowIndex + " do not match"); } }
internal string MeasurePerformance(int iTrial, int iPreferenceIndex, ArrayList listPreferences, ArrayList preferences, SQLCommon parser, Stopwatch sw, List<long> reportDimensions, List<long> reportSkylineSize, List<long> reportTimeTotal, List<long> reportTimeAlgorithm, List<double> reportMinCorrelation, List<double> reportMaxCorrelation, double minCorrelation, double maxCorrelation, List<double> reportCardinality, double cardinality, string strSQL, string strPreferenceSet, string strTrial, List<long> reportNumberOfMoves, long numberOfMoves, List<long> reportNumberOfComparisons, long numberOfComparisons) { Dictionary<ClusterAnalysis, List<List<double>>> clusterAnalysis; Dictionary<ClusterAnalysis, List<List<double>>> clusterAnalysisMedian; Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisTopBuckets; Dictionary<ClusterAnalysis, Dictionary<BigInteger, List<double>>> clusterAnalysisMedianTopBuckets; List<IEnumerable<CLRSafeHashSet<int>>> producedSubsets = ProduceSubsets(preferences); InitClusterAnalysisDataStructures(out clusterAnalysis); InitClusterAnalysisDataStructures(out clusterAnalysisMedian); InitClusterAnalysisTopBucketsDataStructures( out clusterAnalysisTopBuckets); InitClusterAnalysisTopBucketsDataStructures( out clusterAnalysisMedianTopBuckets); var entireSkylineDataTable = new DataTable(); if (ExcessiveTests) { entireSkylineDataTable = parser.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, strSQL); } else { entireSkylineDataTable = parser.ParseAndExecutePrefSQL(Helper.ConnectionString, Helper.ProviderName, strSQL+ " SAMPLE BY RANDOM_SUBSETS COUNT " + SubsetsCount + " DIMENSION " + SubsetDimension); } List<long[]> entireDataTableSkylineValues = parser.SkylineType.Strategy.SkylineValues; int[] skylineAttributeColumns = SkylineSamplingHelper.GetSkylineAttributeColumns(entireSkylineDataTable); IReadOnlyDictionary<long, object[]> entireSkylineDatabase = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( entireSkylineDataTable, 0); IReadOnlyDictionary<long, object[]> entireSkylineNormalized = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( entireSkylineDataTable, 0); SkylineSamplingHelper.NormalizeColumns(entireSkylineNormalized, skylineAttributeColumns); DataTable entireDataTable; IReadOnlyDictionary<long, object[]> entireDatabaseNormalized = GetEntireDatabaseNormalized(parser, strSQL, skylineAttributeColumns, out entireDataTable); IReadOnlyDictionary<long, object[]> entireDatabase = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( entireDataTable, 0); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireDatabaseBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(entireDatabaseNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireDatabaseBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireDatabaseBuckets); foreach ( KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> s in entireDatabaseBuckets.OrderByDescending(l => l.Value.Count) .ThenBy(l => l.Key).Take(5)) { double percent = (double) s.Value.Count / entireDatabaseNormalized.Count; clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb].Add(s.Key, new List<double>()); for (var i = 0; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisTopBuckets[ClusterAnalysis.EntireDb][s.Key] .Add(percent); } } IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireSkylineBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(entireSkylineNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireSkylineBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireSkylineBuckets); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.EntireSkyline, entireSkylineBuckets, entireSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); foreach ( KeyValuePair<BigInteger, List<double>> bucket in clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline]) { double percent = clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline][ bucket.Key][0]; for (var i = 1; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisTopBuckets[ClusterAnalysis.EntireSkyline][ bucket.Key].Add(percent); } } var clusterAnalysisForMedian = new prefSQL.Evaluation.ClusterAnalysis(entireDatabaseNormalized, skylineAttributeColumns); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireDatabaseMedianBuckets = clusterAnalysisForMedian.GetBuckets(entireDatabaseNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireDatabaseMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireDatabaseMedianBuckets); foreach ( KeyValuePair<BigInteger, List<IReadOnlyDictionary<long, object[]>>> s in entireDatabaseMedianBuckets.OrderByDescending(l => l.Value.Count) .ThenBy(l => l.Key).Take(5)) { double percent = (double) s.Value.Count / entireDatabaseNormalized.Count; clusterAnalysisMedianTopBuckets[ClusterAnalysis.EntireDb].Add( s.Key, new List<double>()); for (var i = 0; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisMedianTopBuckets[ClusterAnalysis.EntireDb][ s.Key] .Add(percent); } } IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> entireSkylineMedianBuckets = clusterAnalysisForMedian.GetBuckets(entireSkylineNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedEntireSkylineMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(entireSkylineMedianBuckets); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.EntireSkyline, entireSkylineMedianBuckets, entireSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); foreach ( KeyValuePair<BigInteger, List<double>> bucket in clusterAnalysisMedianTopBuckets[ ClusterAnalysis.EntireSkyline]) { double percent = clusterAnalysisMedianTopBuckets[ ClusterAnalysis.EntireSkyline][bucket.Key][0]; for (var i = 1; i < producedSubsets.Count; i++) // to enable generalized average calculation { clusterAnalysisMedianTopBuckets[ ClusterAnalysis.EntireSkyline][bucket.Key].Add(percent); } } strSQL += " SAMPLE BY RANDOM_SUBSETS COUNT " + SubsetsCount + " DIMENSION " + SubsetDimension; string strQuery; string operators; int numberOfRecords; string[] parameter; PrefSQLModel prefSqlModel = parser.GetPrefSqlModelFromPreferenceSql(strSQL); string ansiSql = parser.GetAnsiSqlFromPrefSqlModel(prefSqlModel); prefSQL.SQLParser.Helper.DetermineParameters(ansiSql, out parameter, out strQuery, out operators, out numberOfRecords); var subsetObjects = new List<long>(); var subsetTime = new List<long>(); var subsetTimeElapsed = new List<long>(); var setCoverageSecondRandom = new List<double>(); var setCoverageSample = new List<double>(); var setCoverageBestRank = new List<double>(); var setCoverageSumRank = new List<double>(); var representationErrorSecondRandom = new List<double>(); var representationErrorSample = new List<double>(); var representationErrorBestRank = new List<double>(); var representationErrorSumRank = new List<double>(); var representationErrorSumSecondRandom = new List<double>(); var representationErrorSumSample = new List<double>(); var representationErrorSumBestRank = new List<double>(); var representationErrorSumSumRank = new List<double>(); var dominatedObjectsCountSecondRandom = new List<double>(); var dominatedObjectsCountSample = new List<double>(); var dominatedObjectsCountBestRank = new List<double>(); var dominatedObjectsCountSumRank = new List<double>(); var dominatedObjectsOfBestObjectSecondRandom = new List<double>(); var dominatedObjectsOfBestObjectSample = new List<double>(); var dominatedObjectsOfBestObjectBestRank = new List<double>(); var dominatedObjectsOfBestObjectSumRank = new List<double>(); var subsetCount = 1; foreach (IEnumerable<CLRSafeHashSet<int>> subset in producedSubsets) { Console.WriteLine(strPreferenceSet + " (" + subsetCount + " / " + producedSubsets.Count + ")"); sw.Restart(); var subsetsProducer = new FixedSkylineSamplingSubsetsProducer(subset); var utility = new SkylineSamplingUtility(subsetsProducer); var skylineSample = new SkylineSampling(utility) { SubsetCount = prefSqlModel.SkylineSampleCount, SubsetDimension = prefSqlModel.SkylineSampleDimension, SelectedStrategy = parser.SkylineType }; DataTable sampleSkylineDataTable = skylineSample.GetSkylineTable(strQuery, operators); sw.Stop(); subsetObjects.Add(sampleSkylineDataTable.Rows.Count); subsetTime.Add(skylineSample.TimeMilliseconds); subsetTimeElapsed.Add(sw.ElapsedMilliseconds); IReadOnlyDictionary<long, object[]> sampleSkylineDatabase = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( sampleSkylineDataTable, 0); IReadOnlyDictionary<long, object[]> sampleSkylineNormalized = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId( sampleSkylineDataTable, 0); SkylineSamplingHelper.NormalizeColumns(sampleSkylineNormalized, skylineAttributeColumns); IReadOnlyDictionary<long, object[]> secondRandomSampleDatabase = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> secondRandomSampleNormalized = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableBestRankDatabase = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableSumRankDatabase = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableBestRankNormalized = new Dictionary<long, object[]>(); IReadOnlyDictionary<long, object[]> entireSkylineDataTableSumRankNormalized = new Dictionary<long, object[]>(); if (ExcessiveTests) { secondRandomSampleDatabase = SkylineSamplingHelper.GetRandomSample(entireSkylineDatabase, sampleSkylineDataTable.Rows.Count); var secondRandomSampleNormalizedToBeCreated = new Dictionary<long, object[]>(); foreach (KeyValuePair<long, object[]> k in secondRandomSampleDatabase) { var newValue = new object[k.Value.Length]; k.Value.CopyTo(newValue, 0); secondRandomSampleNormalizedToBeCreated.Add(k.Key, newValue); } secondRandomSampleNormalized = new ReadOnlyDictionary<long, object[]>( secondRandomSampleNormalizedToBeCreated); SkylineSamplingHelper.NormalizeColumns(secondRandomSampleNormalized, skylineAttributeColumns); entireSkylineDataTableBestRankNormalized = GetEntireSkylineDataTableRankNormalized(entireSkylineDataTable.Copy(), entireDataTableSkylineValues, skylineAttributeColumns, sampleSkylineDataTable.Rows.Count, 1, out entireSkylineDataTableBestRankDatabase); entireSkylineDataTableSumRankNormalized = GetEntireSkylineDataTableRankNormalized(entireSkylineDataTable.Copy(), entireDataTableSkylineValues, skylineAttributeColumns, sampleSkylineDataTable.Rows.Count, 2, out entireSkylineDataTableSumRankDatabase); IReadOnlyDictionary<long, object[]> baseRandomSampleNormalized = SkylineSamplingHelper.GetRandomSample(entireSkylineNormalized, sampleSkylineDataTable.Rows.Count); double setCoverageCoveredBySecondRandomSample = SetCoverage.GetCoverage( baseRandomSampleNormalized, secondRandomSampleNormalized, skylineAttributeColumns) * 100.0; double setCoverageCoveredBySkylineSample = SetCoverage.GetCoverage( baseRandomSampleNormalized, sampleSkylineNormalized, skylineAttributeColumns) * 100.0; double setCoverageCoveredByEntireBestRank = SetCoverage.GetCoverage( baseRandomSampleNormalized, entireSkylineDataTableBestRankNormalized, skylineAttributeColumns) * 100.0; double setCoverageCoveredByEntireSumRank = SetCoverage.GetCoverage(baseRandomSampleNormalized, entireSkylineDataTableSumRankNormalized, skylineAttributeColumns) * 100.0; setCoverageSecondRandom.Add(setCoverageCoveredBySecondRandomSample); setCoverageSample.Add(setCoverageCoveredBySkylineSample); setCoverageBestRank.Add(setCoverageCoveredByEntireBestRank); setCoverageSumRank.Add(setCoverageCoveredByEntireSumRank); Dictionary<long, double>.ValueCollection baseRepresentationErrorSecondRandomSample = SetCoverage .GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, secondRandomSampleNormalized), secondRandomSampleNormalized, skylineAttributeColumns); Dictionary<long, double>.ValueCollection baseRepresentationErrorSkylineSample = SetCoverage.GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, sampleSkylineNormalized), sampleSkylineNormalized, skylineAttributeColumns); Dictionary<long, double>.ValueCollection baseRepresentationErrorEntireBestRank = SetCoverage.GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, entireSkylineDataTableBestRankNormalized), entireSkylineDataTableBestRankNormalized, skylineAttributeColumns); Dictionary<long, double>.ValueCollection baseRepresentationErrorEntireSumRank = SetCoverage.GetRepresentationError( GetReducedSkyline(entireSkylineNormalized, entireSkylineDataTableSumRankNormalized), entireSkylineDataTableSumRankNormalized, skylineAttributeColumns); representationErrorSecondRandom.Add(baseRepresentationErrorSecondRandomSample.Max()*100.0); representationErrorSample.Add(baseRepresentationErrorSkylineSample.Max() * 100.0); representationErrorBestRank.Add(baseRepresentationErrorEntireBestRank.Max() * 100.0); representationErrorSumRank.Add(baseRepresentationErrorEntireSumRank.Max() * 100.0); representationErrorSumSecondRandom.Add(baseRepresentationErrorSecondRandomSample.Sum() * 100.0); representationErrorSumSample.Add(baseRepresentationErrorSkylineSample.Sum() * 100.0); representationErrorSumBestRank.Add(baseRepresentationErrorEntireBestRank.Sum() * 100.0); representationErrorSumSumRank.Add(baseRepresentationErrorEntireSumRank.Sum() * 100.0); var dominatedObjectsCountRandomSample = new DominatedObjects(entireDatabase, secondRandomSampleDatabase, skylineAttributeColumns); var dominatedObjectsCountSampleSkyline = new DominatedObjects(entireDatabase, sampleSkylineDatabase, skylineAttributeColumns); var dominatedObjectsCountEntireSkylineBestRank = new DominatedObjects(entireDatabase, entireSkylineDataTableBestRankDatabase, skylineAttributeColumns); var dominatedObjectsCountEntireSkylineSumRank = new DominatedObjects(entireDatabase, entireSkylineDataTableSumRankDatabase, skylineAttributeColumns); dominatedObjectsCountSecondRandom.Add( dominatedObjectsCountRandomSample.NumberOfDistinctDominatedObjects); dominatedObjectsCountSample.Add( dominatedObjectsCountSampleSkyline.NumberOfDistinctDominatedObjects); dominatedObjectsCountBestRank.Add( dominatedObjectsCountEntireSkylineBestRank.NumberOfDistinctDominatedObjects); dominatedObjectsCountSumRank.Add( dominatedObjectsCountEntireSkylineSumRank.NumberOfDistinctDominatedObjects); dominatedObjectsOfBestObjectSecondRandom.Add( dominatedObjectsCountRandomSample .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); dominatedObjectsOfBestObjectSample.Add( dominatedObjectsCountSampleSkyline .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); dominatedObjectsOfBestObjectBestRank.Add( dominatedObjectsCountEntireSkylineBestRank .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); dominatedObjectsOfBestObjectSumRank.Add( dominatedObjectsCountEntireSkylineSumRank .NumberOfObjectsDominatedByEachObjectOrderedByDescCount.First().Value); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sampleBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(sampleSkylineNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSampleBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(sampleBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> randomBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets(secondRandomSampleNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedRandomBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(randomBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> bestRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets( entireSkylineDataTableBestRankNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedBestRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( bestRankBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sumRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetBuckets( entireSkylineDataTableSumRankNormalized, skylineAttributeColumns); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSumRankBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( sumRankBuckets); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.SampleSkyline, sampleBuckets, sampleSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.RandomSkyline, randomBuckets, secondRandomSampleNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.BestRank, bestRankBuckets, entireSkylineDataTableBestRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisTopBuckets, ClusterAnalysis.SumRank, sumRankBuckets, entireSkylineDataTableSumRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sampleMedianBuckets = clusterAnalysisForMedian.GetBuckets(sampleSkylineNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSampleMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(sampleMedianBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> randomMedianBuckets = clusterAnalysisForMedian.GetBuckets(secondRandomSampleNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedRandomMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets(randomMedianBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> bestRankMedianBuckets = clusterAnalysisForMedian.GetBuckets( entireSkylineDataTableBestRankNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedBestRankMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( bestRankMedianBuckets); IReadOnlyDictionary<BigInteger, List<IReadOnlyDictionary<long, object[]>>> sumRankMedianBuckets = clusterAnalysisForMedian.GetBuckets( entireSkylineDataTableSumRankNormalized, skylineAttributeColumns, true); IReadOnlyDictionary<int, List<IReadOnlyDictionary<long, object[]>>> aggregatedSumRankMedianBuckets = prefSQL.Evaluation.ClusterAnalysis.GetAggregatedBuckets( sumRankMedianBuckets); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.SampleSkyline, sampleMedianBuckets, sampleSkylineNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.RandomSkyline, randomMedianBuckets, secondRandomSampleNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.BestRank, bestRankMedianBuckets, entireSkylineDataTableBestRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); FillTopBuckets(clusterAnalysisMedianTopBuckets, ClusterAnalysis.SumRank, sumRankMedianBuckets, entireSkylineDataTableSumRankNormalized.Count, entireDatabaseNormalized.Count, entireSkylineNormalized.Count); var caEntireDbNew = new List<double>(); var caEntireSkylineNew = new List<double>(); var caSampleSkylineNew = new List<double>(); var caRandomSkylineNew = new List<double>(); var caBestRankNew = new List<double>(); var caSumRankNew = new List<double>(); for (var ii = 0; ii < skylineAttributeColumns.Length; ii++) { int entireSkyline = aggregatedEntireSkylineBuckets.ContainsKey(ii) ? aggregatedEntireSkylineBuckets[ii].Count : 0; int sampleSkyline = aggregatedSampleBuckets.ContainsKey(ii) ? aggregatedSampleBuckets[ii].Count : 0; int randomSkyline = aggregatedRandomBuckets.ContainsKey(ii) ? aggregatedRandomBuckets[ii].Count : 0; double entireSkylinePercent = (double)entireSkyline / entireSkylineNormalized.Count; double sampleSkylinePercent = (double) sampleSkyline / sampleSkylineNormalized.Count; double randomSkylinePercent = (double)randomSkyline / secondRandomSampleNormalized.Count; int entireDb = aggregatedEntireDatabaseBuckets.ContainsKey(ii) ? aggregatedEntireDatabaseBuckets[ii].Count : 0; double entireDbPercent = (double) entireDb / entireDatabaseNormalized.Count; int bestRank = aggregatedBestRankBuckets.ContainsKey(ii) ? aggregatedBestRankBuckets[ii].Count : 0; int sumRank = aggregatedSumRankBuckets.ContainsKey(ii) ? aggregatedSumRankBuckets[ii].Count : 0; double bestRankPercent = (double) bestRank / entireSkylineDataTableBestRankNormalized.Count; double sumRankPercent = (double) sumRank / entireSkylineDataTableSumRankNormalized.Count; caEntireDbNew.Add(entireDbPercent); caEntireSkylineNew.Add(entireSkylinePercent); caSampleSkylineNew.Add(sampleSkylinePercent); caRandomSkylineNew.Add(randomSkylinePercent); caBestRankNew.Add(bestRankPercent); caSumRankNew.Add(sumRankPercent); } var caMedianEntireDbNew = new List<double>(); var caMedianEntireSkylineNew = new List<double>(); var caMedianSampleSkylineNew = new List<double>(); var caMedianRandomSkylineNew = new List<double>(); var caMedianBestRankNew = new List<double>(); var caMedianSumRankNew = new List<double>(); for (var ii = 0; ii < skylineAttributeColumns.Length; ii++) { int entireSkyline = aggregatedEntireSkylineMedianBuckets.ContainsKey(ii) ? aggregatedEntireSkylineMedianBuckets[ii].Count : 0; int sampleSkyline = aggregatedSampleMedianBuckets.ContainsKey(ii) ? aggregatedSampleMedianBuckets[ii].Count : 0; int randomSkyline = aggregatedRandomMedianBuckets.ContainsKey(ii) ? aggregatedRandomMedianBuckets[ii].Count : 0; double entireSkylinePercent = (double)entireSkyline / entireSkylineNormalized.Count; double sampleSkylinePercent = (double) sampleSkyline / sampleSkylineNormalized.Count; double randomSkylinePercent = (double)randomSkyline / secondRandomSampleNormalized.Count; int entireDb = aggregatedEntireDatabaseMedianBuckets.ContainsKey(ii) ? aggregatedEntireDatabaseMedianBuckets[ii].Count : 0; double entireDbPercent = (double) entireDb / entireDatabaseNormalized.Count; int bestRank = aggregatedBestRankMedianBuckets.ContainsKey(ii) ? aggregatedBestRankMedianBuckets[ii].Count : 0; int sumRank = aggregatedSumRankMedianBuckets.ContainsKey(ii) ? aggregatedSumRankMedianBuckets[ii].Count : 0; double bestRankPercent = (double) bestRank / entireSkylineDataTableBestRankNormalized.Count; double sumRankPercent = (double) sumRank / entireSkylineDataTableSumRankNormalized.Count; caMedianEntireDbNew.Add(entireDbPercent); caMedianEntireSkylineNew.Add(entireSkylinePercent); caMedianSampleSkylineNew.Add(sampleSkylinePercent); caMedianRandomSkylineNew.Add(randomSkylinePercent); caMedianBestRankNew.Add(bestRankPercent); caMedianSumRankNew.Add(sumRankPercent); } clusterAnalysis[ClusterAnalysis.EntireDb].Add(caEntireDbNew); clusterAnalysis[ClusterAnalysis.EntireSkyline].Add( caEntireSkylineNew); clusterAnalysis[ClusterAnalysis.SampleSkyline].Add( caSampleSkylineNew); clusterAnalysis[ClusterAnalysis.RandomSkyline].Add( caRandomSkylineNew); clusterAnalysis[ClusterAnalysis.BestRank].Add( caBestRankNew); clusterAnalysis[ClusterAnalysis.SumRank].Add( caSumRankNew); clusterAnalysisMedian[ClusterAnalysis.EntireDb].Add( caMedianEntireDbNew); clusterAnalysisMedian[ClusterAnalysis.EntireSkyline].Add( caMedianEntireSkylineNew); clusterAnalysisMedian[ClusterAnalysis.SampleSkyline].Add( caMedianSampleSkylineNew); clusterAnalysisMedian[ClusterAnalysis.RandomSkyline].Add( caMedianRandomSkylineNew); clusterAnalysisMedian[ClusterAnalysis.BestRank].Add( caMedianBestRankNew); clusterAnalysisMedian[ClusterAnalysis.SumRank].Add( caMedianSumRankNew); } subsetCount++; } Dictionary<ClusterAnalysis, string> clusterAnalysisStrings = GetClusterAnalysisStrings(skylineAttributeColumns, clusterAnalysis); Dictionary<ClusterAnalysis, string> clusterAnalysisMedianStrings = GetClusterAnalysisStrings(skylineAttributeColumns, clusterAnalysisMedian); Dictionary<ClusterAnalysis, string> clusterAnalysisTopBucketsStrings = GetClusterAnalysisTopBucketsStrings(clusterAnalysisTopBuckets, ExcessiveTests); Dictionary<ClusterAnalysis, string> clusterAnalysisMedianTopBucketsStrings = GetClusterAnalysisTopBucketsStrings(clusterAnalysisMedianTopBuckets, ExcessiveTests); var time = (long) (subsetTime.Average() + .5); var objects = (long) (subsetObjects.Average() + .5); var elapsed = (long) (subsetTimeElapsed.Average() + .5); Console.WriteLine("subsetTime"); foreach (var i in subsetTime) { Console.WriteLine(i); } Console.WriteLine(""); Console.WriteLine("subsetObjects"); foreach (var i in subsetObjects) { Console.WriteLine(i); } Console.WriteLine(""); reportDimensions.Add(preferences.Count); reportSkylineSize.Add(objects); reportTimeTotal.Add(elapsed); reportTimeAlgorithm.Add(time); reportMinCorrelation.Add(minCorrelation); reportMaxCorrelation.Add(maxCorrelation); reportCardinality.Add(cardinality); var setCoverageSingle = new Dictionary<SkylineTypesSingle, List<double>> { {SkylineTypesSingle.Random, setCoverageSecondRandom}, {SkylineTypesSingle.Sample, setCoverageSample}, {SkylineTypesSingle.BestRank, setCoverageBestRank}, {SkylineTypesSingle.SumRank, setCoverageSumRank} }; var representationErrorSingle = new Dictionary<SkylineTypesSingle, List<double>> { {SkylineTypesSingle.Random, representationErrorSecondRandom}, {SkylineTypesSingle.Sample, representationErrorSample}, {SkylineTypesSingle.BestRank, representationErrorBestRank}, {SkylineTypesSingle.SumRank, representationErrorSumRank} }; var representationErrorSumSingle = new Dictionary<SkylineTypesSingle, List<double>> { {SkylineTypesSingle.Random, representationErrorSumSecondRandom}, {SkylineTypesSingle.Sample, representationErrorSumSample}, {SkylineTypesSingle.BestRank, representationErrorSumBestRank}, {SkylineTypesSingle.SumRank, representationErrorSumSumRank} }; var dominatedObjectsCountSingle = new Dictionary<SkylineTypesSingle, List<double>>() { { SkylineTypesSingle.Random, dominatedObjectsCountSecondRandom }, { SkylineTypesSingle.Sample, dominatedObjectsCountSample }, { SkylineTypesSingle.BestRank, dominatedObjectsCountBestRank }, { SkylineTypesSingle.SumRank, dominatedObjectsCountSumRank } }; var dominatedObjectsOfBestObjectSingle = new Dictionary<SkylineTypesSingle, List<double>> { { SkylineTypesSingle.Random, dominatedObjectsOfBestObjectSecondRandom }, { SkylineTypesSingle.Sample, dominatedObjectsOfBestObjectSample }, { SkylineTypesSingle.BestRank, dominatedObjectsOfBestObjectBestRank }, { SkylineTypesSingle.SumRank, dominatedObjectsOfBestObjectSumRank } }; AddToReports(_reportsLong, subsetObjects, subsetTime, _reportsDouble); if (ExcessiveTests) { AddToSetCoverage(_setCoverage, setCoverageSingle); AddToSetCoverage(_representationError, representationErrorSingle); AddToSetCoverage(_representationErrorSum, representationErrorSumSingle); AddToSetCoverage(_dominatedObjectsCount, dominatedObjectsCountSingle); AddToSetCoverage(_dominatedObjectsOfBestObject, dominatedObjectsOfBestObjectSingle); } string strLine = FormatLineString(strPreferenceSet, strTrial, preferences.Count, objects, elapsed, time, subsetTime.Min(), subsetTime.Max(), MyMathematic.GetSampleVariance(subsetTime), MyMathematic.GetSampleStdDeviation(subsetTime), Mathematic.Median(subsetTime), Mathematic.LowerQuartile(subsetTime), Mathematic.UpperQuartile(subsetTime), subsetObjects.Min(), subsetObjects.Max(), MyMathematic.GetSampleVariance(subsetObjects), MyMathematic.GetSampleStdDeviation(subsetObjects), Mathematic.Median(subsetObjects), Mathematic.LowerQuartile(subsetObjects), Mathematic.UpperQuartile(subsetObjects), setCoverageSingle, representationErrorSingle, representationErrorSumSingle, dominatedObjectsCountSingle, dominatedObjectsOfBestObjectSingle, clusterAnalysisStrings, clusterAnalysisMedianStrings, clusterAnalysisTopBucketsStrings, clusterAnalysisMedianTopBucketsStrings, minCorrelation, maxCorrelation, cardinality, numberOfMoves, numberOfComparisons); return strLine; }
private static IReadOnlyDictionary<long, object[]> GetEntireDatabaseNormalized(SQLCommon parser, string strSQL, int[] skylineAttributeColumns, out DataTable dtEntire) { DbProviderFactory factory = DbProviderFactories.GetFactory(Helper.ProviderName); // use the factory object to create Data access objects. DbConnection connection = factory.CreateConnection(); // will return the connection object (i.e. SqlConnection ...) connection.ConnectionString = Helper.ConnectionString; dtEntire = new DataTable(); connection.Open(); DbDataAdapter dap = factory.CreateDataAdapter(); DbCommand selectCommand = connection.CreateCommand(); selectCommand.CommandTimeout = 0; //infinite timeout string strQueryEntire; string operatorsEntire; int numberOfRecordsEntire; string[] parameterEntire; string ansiSqlEntire = parser.GetAnsiSqlFromPrefSqlModel( parser.GetPrefSqlModelFromPreferenceSql(strSQL)); prefSQL.SQLParser.Helper.DetermineParameters(ansiSqlEntire, out parameterEntire, out strQueryEntire, out operatorsEntire, out numberOfRecordsEntire); selectCommand.CommandText = strQueryEntire; dap.SelectCommand = selectCommand; dtEntire = new DataTable(); dap.Fill(dtEntire); for (var ii = 0; ii < skylineAttributeColumns.Length; ii++) { dtEntire.Columns.RemoveAt(0); } IReadOnlyDictionary<long, object[]> entireDatabaseNormalized = prefSQL.SQLSkyline.Helper.GetDatabaseAccessibleByUniqueId(dtEntire, 0); SkylineSamplingHelper.NormalizeColumns(entireDatabaseNormalized, skylineAttributeColumns); return entireDatabaseNormalized; }