Пример #1
0
 private void button6_Click(object sender, EventArgs e)
 {
     if (textBox5.Text == "")
     {
         OpenFileDialog openFileDialog3 = new OpenFileDialog();
         openFileDialog3.InitialDirectory = "c:\\";
         openFileDialog3.Filter           = "db files (*.db)|*.db|All files (*.*)|*.*";
         openFileDialog3.FilterIndex      = 2;
         openFileDialog3.RestoreDirectory = true;
         if (openFileDialog3.ShowDialog() == DialogResult.OK)
         {
             connect_To_SqlMeta(openFileDialog3.FileName);
             MessageBox.Show("Connection succeeded");
             MetaDbFiller.CreateMetaTables();
             MetaDbFiller.FillMetaDb();
             MessageBox.Show("MetaDB filling Process succeeded");
         }
     }
     else if (File.Exists(textBox5.Text))
     {
         connect_To_SqlMeta(textBox5.Text);
         MessageBox.Show("Connection succeeded");
         MetaDbFiller.LoadMetaDB();
         MessageBox.Show("MetaDB filling Process succeeded");
     }
     else
     {
         MetaDbFiller.dbConnection = DatabaseConnection.CreateEmptyDb(textBox5.Text);
         MessageBox.Show("New metaDB file made");
         MetaDbFiller.CreateMetaTables();
         MetaDbFiller.FillMetaDb();
         MessageBox.Show("MetaDB filling Process succeeded");
     }
 }
Пример #2
0
    /// <summary>
    /// Get All Qfs for non numerical values
    /// </summary>
    /// <param name="Workload"></param>
    /// <param name="columname"></param>
    public static void GetNonNumericalQfs(SQLQuery[] Workload, string columname)
    {
        Dictionary <object, int> pairing = new Dictionary <object, int>(); //coupling between the location in the timeDictionary and a string
        List <int> timeDictionary        = new List <int>();               //raw amount of occurrences
        int        counter = 0;

        foreach (SQLQuery query in Workload)
        {
            if (query.requiredValues.ContainsKey(columname))
            {
                object[] values = query.requiredValues[columname];
                int      times  = query.times;
                foreach (var value in values)
                {
                    if (!pairing.ContainsKey(value))//saves pairing between location in timedictionary and string index
                    {
                        pairing.Add(value, counter);
                        timeDictionary.Add(times);
                        counter++;
                    }
                    else
                    {
                        int index = pairing[value];
                        timeDictionary[index] += times;
                    }
                }
            }
        }
        Dictionary <string, double> qfs = new Dictionary <string, double>();

        if (timeDictionary.Count == 0)
        {
            foreach (var entry in (MetaDbFiller.idfs[columname] as Dictionary <string, double>))
            {
                qfs.Add(entry.Key, 1.0);//add qf value of 1 if never searched for yet
            }
        }
        else
        {
            double rqfmax = (double)timeDictionary.Max();
            foreach (var pair in pairing)
            {
                double qf = timeDictionary[pair.Value] / rqfmax;


                if (double.IsNaN(qf))
                {
                    throw new Exception("value is NaN in column: " + columname);
                }

                qfs.Add(pair.Key.ToString(), qf);//add scalar qf if not
            }
        }
        MetaDbFiller.AddQfMetaTable(columname, qfs);
    }
Пример #3
0
    /// <summary>
    /// get all numerical qfs
    /// </summary>
    /// <param name="Workload"></param>
    /// <param name="columname"></param>
    public static void GetNumericalQf(SQLQuery[] Workload, string columname)
    {
        double           size            = TableProccessor.GetIntervalSize(columname);
        ColumnProperties properties      = TableProccessor.ColumnProperties[columname];
        List <SQLQuery>  relevantQueries = new List <SQLQuery>();
        int total = 0;

        foreach (var query in Workload)
        {
            if (query.requiredValues.ContainsKey(columname))//only relevant queries are being used
            {
                relevantQueries.Add(query);
                total += query.times;
            }
        }
        Dictionary <double, double> Qfs  = new Dictionary <double, double>();
        Dictionary <double, double> RQfs = new Dictionary <double, double>();
        double RQfMax = 0;

        for (double d /*bij het ontbijt*/ = properties.min; d <= properties.max; d += size)
        {
            double qf = getNumericalQFFromU(d, relevantQueries, columname, total);
            RQfs.Add(d, qf);
            if (Math.Abs(RQfMax) < qf)//also take possible negative values into account
            {
                RQfMax = qf;
            }
        }

        foreach (var Qf in RQfs)
        {
            double d;
            if (RQfMax == 0)//never queried
            {
                d = 1;
            }
            else
            {
                d = Qf.Value / RQfMax;
            }

            if (double.IsNaN(d))
            {
                throw new Exception("value is NaN in column: " + columname);
            }

            Qfs.Add(Qf.Key, d);
        }
        MetaDbFiller.AddQfMetaTable(columname, Qfs);

        relevantQueries = null;//memory cleanup
        GC.Collect();
    }
Пример #4
0
        private void button2_Click(object sender, EventArgs e)
        {
            OpenFileDialog openFileDialog3 = new OpenFileDialog();

            openFileDialog3.InitialDirectory = "c:\\";
            openFileDialog3.Filter           = "db files (*.db)|*.db|All files (*.*)|*.*";
            openFileDialog3.FilterIndex      = 2;
            openFileDialog3.RestoreDirectory = true;
            if (openFileDialog3.ShowDialog() == DialogResult.OK)
            {
                connect_To_SqlMeta(openFileDialog3.FileName);
                MessageBox.Show("Connection succeeded");
                MetaDbFiller.LoadMetaDB();
            }
        }
Пример #5
0
    /// <summary>
    /// should calculate the idf values and tell the metaDbFiller to fill the metaDB
    /// </summary>
    /// <param name="table"></param>
    public static void Process()
    {
        //voor iedere column check if numerical
        foreach (var column in ColumnProperties)
        {
            if (column.Key.ToLower() == "id")
            {
                continue;
            }

            column.Value.distinctValues = GetDistinct(column.Key);

            if (column.Value.numerical.HasValue && !column.Value.numerical.Value)
            {
                int max = 0; // we decided to devide by the maximum number of occurrences instead of amount of documents
                             // because the intention of using IDF is to have the most common terms being log(1) (frequent terms occur in basically any document)


                Dictionary <string, int> Dfs = GetDfsForText(column.Key, ref max);
                // implement idf
                double idfDevider = Math.Abs(Math.Log10(1.0d / max));  // to make idf a scalar
                Dictionary <string, double> IDFS = new Dictionary <string, double>();
                foreach (var DF in Dfs)
                {
                    double idf;                                                          // TODO Deze berekening geeft nog waarden boven 1

                    idf = Math.Abs(Math.Log10((double)numberofRows / (double)DF.Value)); //kan heel goed door opencl gedaan

                    IDFS.Add(DF.Key, idf);
                }

                //write to table in metadb
                MetaDbFiller.IdfMetaTable(column.Key, IDFS);
            }
            else
            {
                Dictionary <double, double> Idfs = getIdfsForNumerical(column.Key);
                // write to table in metadb
                MetaDbFiller.IdfMetaTable(column.Key, Idfs);
            }
        }
        //throw new NotImplementedException("meta table not yet implemented, check its idf for values");
        int i = 0;
    }
Пример #6
0
    /// <summary>
    /// gets all the jaccard coefficients
    /// </summary>
    /// <param name="Workload"></param>
    /// <param name="columname"></param>
    public static void GetJaquards(SQLQuery[] Workload, string columname)
    {
        bool useful = false;

        foreach (var query in Workload)//first check if there is any need at all to calculate
        {
            if (!useful && query.requiredValues.ContainsKey(columname) && query.requiredValues[columname].Length > 1)
            {
                useful = true;
                break;
            }
        }

        if (!useful)
        {
            return;
        }

        List <SQLQuery> relevantQueries = new List <SQLQuery>();

        foreach (var query in Workload)//only select queries with any relevant info
        {
            if (query.requiredValues.ContainsKey(columname))
            {
                relevantQueries.Add(query);
            }
        }

        Dictionary <string, object> jaquards = new Dictionary <string, object>();

        foreach (var idf in MetaDbFiller.idfs[columname] as Dictionary <string, double> )
        {
            Dictionary <string, double> jaquard = new Dictionary <string, double>();
            foreach (var idf2 in MetaDbFiller.idfs[columname] as Dictionary <string, double> )
            {
                jaquard.Add(idf2.Key, Jaquard(relevantQueries, idf.Key, idf2.Key, columname));
            }
            jaquards.Add(idf.Key, jaquard);
        }

        MetaDbFiller.AddJaquardsMetaTable(columname, jaquards);
    }