// counts the amount of times a attribute is used in the workload queries static void freqCounter(string v, string k, int times) { v = trimString(v); if (double.TryParse(v, out double test)) { return; } tuple <string, string> temp = new tuple <string, string>(k, v); if (freqDict.ContainsKey(temp)) { freqDict[temp] += times; } else { freqDict.Add(temp, times); } if (maxDict.ContainsKey(k)) { if (maxDict[k] < freqDict[temp]) { maxDict[k] = freqDict[temp]; } } else { maxDict.Add(k, freqDict[temp]); } }
// calculates the simulator of a tuple and a query static tuple <int, double>[] calculateTopK(string[] columns, string[] values, int k) { string sql = "select * from autompg"; SQLiteCommand com = new SQLiteCommand(sql, m_dbConnection); SQLiteDataReader reader = com.ExecuteReader(); tuple <int, double>[] topKTuples = new tuple <int, double> [k]; for (int i = 0; i < k; i++) { topKTuples[i] = new tuple <int, double>(-1, 0); } Dictionary <int, double> MissingAttributesValues = new Dictionary <int, double>(); while (reader.Read()) { double simScore = 0; for (int i = 0; i < values.Length; i++) { string sqlMeta = "select idfqf from idfqf where column = '" + columns[i] + "' and value = " + values[i]; SQLiteCommand metaCom = new SQLiteCommand(sqlMeta, meta_dbConnection); SQLiteDataReader metaReader = metaCom.ExecuteReader(); if (metaReader.Read()) { // numerical similarity if (double.TryParse(values[i], out double q)) { // t value van de tuple, q value van de query double innerproduct = ((double.Parse(reader[columns[i]].ToString()) - q) / hColumnDict[columns[i]]); simScore += Math.Exp(-0.5 * innerproduct * innerproduct) * (double)metaReader.GetValue(0); } // categorical similarity else { string sqlJaq = "select jacq from jacquard where value_1 = " + values[i] + " and value_2 = '" + reader[columns[i]] + "'"; SQLiteCommand jaqCom = new SQLiteCommand(sqlJaq, meta_dbConnection); SQLiteDataReader jaqReader = jaqCom.ExecuteReader(); if (jaqReader.Read()) { simScore += (double)jaqReader.GetValue(0) * (double)metaReader.GetValue(0); } } } else if (double.TryParse(values[i], out double q)) { double denom = 0; sql = "select " + columns[i] + " from autompg"; SQLiteCommand idfcom = new SQLiteCommand(sql, m_dbConnection); SQLiteDataReader r = idfcom.ExecuteReader(); while (r.Read()) { double ti = double.Parse(r[columns[i]].ToString()); double diff = ti - q; denom += Math.Exp(-0.5 * (diff / hColumnDict[columns[i]]) * (diff / hColumnDict[columns[i]])); } double idfnumber = Math.Log10(N / denom); double innerproduct = ((double.Parse(reader[columns[i]].ToString()) - q) / hColumnDict[columns[i]]); simScore += Math.Exp(-0.5 * innerproduct * innerproduct) * idfnumber; } } double missingscore = 0; // calculate missing score for (int i = 0; i < categoricalNames.Length; i++) { string sqlMeta = "select idfqf from idfqf where column = '" + categoricalNames[i] + "' and value = '" + reader[categoricalNames[i]] + "'"; SQLiteCommand metaCom = new SQLiteCommand(sqlMeta, meta_dbConnection); SQLiteDataReader metaReader = metaCom.ExecuteReader(); if (metaReader.Read()) { missingscore += Math.Log10((double)metaReader.GetValue(0)); } } MissingAttributesValues.Add(int.Parse(reader["id"].ToString()), missingscore); //calculate top k using similarity score for (int i = 0; i < k; i++) { if (topKTuples[i].first == -1) { topKTuples[i] = new tuple <int, double>(int.Parse(reader["id"].ToString()), simScore); break; } if (topKTuples[i].second == simScore) { if (MissingAttributesValues[topKTuples[i].first] < MissingAttributesValues[int.Parse(reader["id"].ToString())]) { for (int j = k - 1; j > i; j--) { topKTuples[j] = topKTuples[j - 1]; } topKTuples[i] = new tuple <int, double>(int.Parse(reader["id"].ToString()), simScore); break; } } if (topKTuples[i].second < simScore) { for (int j = k - 1; j > i; j--) { topKTuples[j] = topKTuples[j - 1]; } topKTuples[i] = new tuple <int, double>(int.Parse(reader["id"].ToString()), simScore); break; } } } return(topKTuples); }