private void button6_Click(object sender, EventArgs e) { if (textBox5.Text == "") { OpenFileDialog openFileDialog3 = new OpenFileDialog(); openFileDialog3.InitialDirectory = "c:\\"; openFileDialog3.Filter = "db files (*.db)|*.db|All files (*.*)|*.*"; openFileDialog3.FilterIndex = 2; openFileDialog3.RestoreDirectory = true; if (openFileDialog3.ShowDialog() == DialogResult.OK) { connect_To_SqlMeta(openFileDialog3.FileName); MessageBox.Show("Connection succeeded"); MetaDbFiller.CreateMetaTables(); MetaDbFiller.FillMetaDb(); MessageBox.Show("MetaDB filling Process succeeded"); } } else if (File.Exists(textBox5.Text)) { connect_To_SqlMeta(textBox5.Text); MessageBox.Show("Connection succeeded"); MetaDbFiller.LoadMetaDB(); MessageBox.Show("MetaDB filling Process succeeded"); } else { MetaDbFiller.dbConnection = DatabaseConnection.CreateEmptyDb(textBox5.Text); MessageBox.Show("New metaDB file made"); MetaDbFiller.CreateMetaTables(); MetaDbFiller.FillMetaDb(); MessageBox.Show("MetaDB filling Process succeeded"); } }
/// <summary> /// Get All Qfs for non numerical values /// </summary> /// <param name="Workload"></param> /// <param name="columname"></param> public static void GetNonNumericalQfs(SQLQuery[] Workload, string columname) { Dictionary <object, int> pairing = new Dictionary <object, int>(); //coupling between the location in the timeDictionary and a string List <int> timeDictionary = new List <int>(); //raw amount of occurrences int counter = 0; foreach (SQLQuery query in Workload) { if (query.requiredValues.ContainsKey(columname)) { object[] values = query.requiredValues[columname]; int times = query.times; foreach (var value in values) { if (!pairing.ContainsKey(value))//saves pairing between location in timedictionary and string index { pairing.Add(value, counter); timeDictionary.Add(times); counter++; } else { int index = pairing[value]; timeDictionary[index] += times; } } } } Dictionary <string, double> qfs = new Dictionary <string, double>(); if (timeDictionary.Count == 0) { foreach (var entry in (MetaDbFiller.idfs[columname] as Dictionary <string, double>)) { qfs.Add(entry.Key, 1.0);//add qf value of 1 if never searched for yet } } else { double rqfmax = (double)timeDictionary.Max(); foreach (var pair in pairing) { double qf = timeDictionary[pair.Value] / rqfmax; if (double.IsNaN(qf)) { throw new Exception("value is NaN in column: " + columname); } qfs.Add(pair.Key.ToString(), qf);//add scalar qf if not } } MetaDbFiller.AddQfMetaTable(columname, qfs); }
/// <summary> /// get all numerical qfs /// </summary> /// <param name="Workload"></param> /// <param name="columname"></param> public static void GetNumericalQf(SQLQuery[] Workload, string columname) { double size = TableProccessor.GetIntervalSize(columname); ColumnProperties properties = TableProccessor.ColumnProperties[columname]; List <SQLQuery> relevantQueries = new List <SQLQuery>(); int total = 0; foreach (var query in Workload) { if (query.requiredValues.ContainsKey(columname))//only relevant queries are being used { relevantQueries.Add(query); total += query.times; } } Dictionary <double, double> Qfs = new Dictionary <double, double>(); Dictionary <double, double> RQfs = new Dictionary <double, double>(); double RQfMax = 0; for (double d /*bij het ontbijt*/ = properties.min; d <= properties.max; d += size) { double qf = getNumericalQFFromU(d, relevantQueries, columname, total); RQfs.Add(d, qf); if (Math.Abs(RQfMax) < qf)//also take possible negative values into account { RQfMax = qf; } } foreach (var Qf in RQfs) { double d; if (RQfMax == 0)//never queried { d = 1; } else { d = Qf.Value / RQfMax; } if (double.IsNaN(d)) { throw new Exception("value is NaN in column: " + columname); } Qfs.Add(Qf.Key, d); } MetaDbFiller.AddQfMetaTable(columname, Qfs); relevantQueries = null;//memory cleanup GC.Collect(); }
private void button2_Click(object sender, EventArgs e) { OpenFileDialog openFileDialog3 = new OpenFileDialog(); openFileDialog3.InitialDirectory = "c:\\"; openFileDialog3.Filter = "db files (*.db)|*.db|All files (*.*)|*.*"; openFileDialog3.FilterIndex = 2; openFileDialog3.RestoreDirectory = true; if (openFileDialog3.ShowDialog() == DialogResult.OK) { connect_To_SqlMeta(openFileDialog3.FileName); MessageBox.Show("Connection succeeded"); MetaDbFiller.LoadMetaDB(); } }
/// <summary> /// should calculate the idf values and tell the metaDbFiller to fill the metaDB /// </summary> /// <param name="table"></param> public static void Process() { //voor iedere column check if numerical foreach (var column in ColumnProperties) { if (column.Key.ToLower() == "id") { continue; } column.Value.distinctValues = GetDistinct(column.Key); if (column.Value.numerical.HasValue && !column.Value.numerical.Value) { int max = 0; // we decided to devide by the maximum number of occurrences instead of amount of documents // because the intention of using IDF is to have the most common terms being log(1) (frequent terms occur in basically any document) Dictionary <string, int> Dfs = GetDfsForText(column.Key, ref max); // implement idf double idfDevider = Math.Abs(Math.Log10(1.0d / max)); // to make idf a scalar Dictionary <string, double> IDFS = new Dictionary <string, double>(); foreach (var DF in Dfs) { double idf; // TODO Deze berekening geeft nog waarden boven 1 idf = Math.Abs(Math.Log10((double)numberofRows / (double)DF.Value)); //kan heel goed door opencl gedaan IDFS.Add(DF.Key, idf); } //write to table in metadb MetaDbFiller.IdfMetaTable(column.Key, IDFS); } else { Dictionary <double, double> Idfs = getIdfsForNumerical(column.Key); // write to table in metadb MetaDbFiller.IdfMetaTable(column.Key, Idfs); } } //throw new NotImplementedException("meta table not yet implemented, check its idf for values"); int i = 0; }
/// <summary> /// gets all the jaccard coefficients /// </summary> /// <param name="Workload"></param> /// <param name="columname"></param> public static void GetJaquards(SQLQuery[] Workload, string columname) { bool useful = false; foreach (var query in Workload)//first check if there is any need at all to calculate { if (!useful && query.requiredValues.ContainsKey(columname) && query.requiredValues[columname].Length > 1) { useful = true; break; } } if (!useful) { return; } List <SQLQuery> relevantQueries = new List <SQLQuery>(); foreach (var query in Workload)//only select queries with any relevant info { if (query.requiredValues.ContainsKey(columname)) { relevantQueries.Add(query); } } Dictionary <string, object> jaquards = new Dictionary <string, object>(); foreach (var idf in MetaDbFiller.idfs[columname] as Dictionary <string, double> ) { Dictionary <string, double> jaquard = new Dictionary <string, double>(); foreach (var idf2 in MetaDbFiller.idfs[columname] as Dictionary <string, double> ) { jaquard.Add(idf2.Key, Jaquard(relevantQueries, idf.Key, idf2.Key, columname)); } jaquards.Add(idf.Key, jaquard); } MetaDbFiller.AddJaquardsMetaTable(columname, jaquards); }