Example #1
0
        public static void TestAprioriOnAliveSetNoFilter()
        {
            Helpers.Utils.Debug("Testing apriori on alive dataset..");
            Stopwatch sw = new Stopwatch(); sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToInstance("uhs_patient_story".ToUpper(),
                                                     new string[] {
                "INITIAL_PRESENTATION",
                "INT_1_PRES",
                "INT_2_PRES",
                "STATUS"
            }, -1, -1, false, true);

            long t = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug("Gathered MySQL ArffInstance in " + t + "ms.");

            uhsAi.removeDatasetsPerAttributeValue("STATUS", "Y");
            uhsAi.Relation = "uhs_arff_alive";

            Helpers.Utils.Debug("Cleaned ArffInstance in " + (sw.ElapsedMilliseconds - t) + "ms.");

            new Ceka.Algorithms.Associaters.Apriori(uhsAi, 0.2f, 0.1f, true, true, "apriori_result_alive", true);

            Helpers.Utils.Debug("Apriori test done, took " + sw.ElapsedMilliseconds + "ms."); sw.Stop();
        }
Example #2
0
        public static void CompareSizeOfLoadedInstance(string file = "test")
        {
            Helpers.Utils.Debug("Running Memory Size Comparing-Test..");
            Stopwatch sw = new Stopwatch(); sw.Start();

            Loader.ArffLoader al = new Loader.ArffLoader(file);
            al.loadArff();

            ArffInstance ai = al.getInstance();

            long t = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Read & Parsed ARFF file in {0} ms.", t));

            SimpleArffInstance si = new SimpleArffInstance(ai);

            long t2 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Converting complex instance in {0} ms.", (t2 - t)));

            Helpers.Utils.Debug(string.Format("Simple Instance Size: {0} Kb.", si.GetMemorySize()));
            Helpers.Utils.Debug(string.Format("Complex Instance Size: {0} Kb.", ai.GetMemorySize()));

            Helpers.Utils.Debug(string.Format("Finished, took {0} ms.", sw.ElapsedMilliseconds));
        }
Example #3
0
        public static void Evol4_BuildClassifierInstance()
        {
            Helpers.Utils.Debug("Evol4 Classifier..");
            Stopwatch sw = new Stopwatch();

            sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToValidatedInstance("UHS_EXTENDED_STORIES_2",
                                                              new string[] {
                "MNTHS_TO_1",
                "MNTHS_TO_2",
                "AGE",
                "STATUS",
                "T_NONE",
                "T_HORMONE",
                "T_SURGERY",
                "T_PRI_CHEMO",
                "T_ADJ_CHEMO",
                "T_ADJ_RT",
                "T_OOPH",
                "T_PLASTIC",
                "T_HER"
            }, new string[] {
                "MNTHS_TO_1",
                "MNTHS_TO_2"
            }, -1, -1, false, false);

            long em = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Gathered MySQL ArffInstance + preparing.. in {0} ms.", em));

            uhsAi.deletePatternMatchingDatasets(new List <string>()
            {
                "*", "0"
            });

            int ageIndex = uhsAi.getIndexOfAttribute("AGE");

            uhsAi.rebuildAttributeValueByRange(ageIndex, 29);

            uhsAi.refineBackRangedAttribute(ageIndex, 3, 5);

            uhsAi.Relation = "uhs_ext_story_classifier_evol4";

            long em2 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Prepared classifier set in {0} ms.", (em2 - em)));

            new ArffSaver(uhsAi).saveInstance("uhs_ext_story_classifier_evol4");

            Helpers.Utils.Debug(string.Format("Evol done, tooks {0} ms.", sw.ElapsedMilliseconds));
        }
Example #4
0
        public static void Evol3_BuildAprioriInstance()
        {
            Helpers.Utils.Debug("Evol3 Apriori..");
            Stopwatch sw = new Stopwatch();

            sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToInstance("UHS_EXTENDED_STORIES_2",
                                                     new string[] {
                "MNTHS_TO_1",
                "MNTHS_TO_2",
                "AGE",
                "STATUS",
                "T_NONE",
                "T_HORMONE",
                "T_SURGERY",
                "T_PRI_CHEMO",
                "T_ADJ_CHEMO",
                "T_ADJ_RT",
                "T_OOPH",
                "T_PLASTIC",
                "T_HER"
            }, -1, -1, false, false);

            long em = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Gathered MySQL ArffInstance {0} ms.", em));

            uhsAi.integrityCheck();

            uhsAi.rebuildAttributeValueByRange(0, 120);
            uhsAi.rebuildAttributeValueByRange(1, 120);
            uhsAi.rebuildAttributeValueByRange(2, 16);

            uhsAi.deletePatternMatchingDatasets(new List <string> {
                "[-19<->101]"
            });
            uhsAi.deletePatternMatchingDatasets(new List <string> {
                "*", "[-67<->53]"
            });

            uhsAi.removeUnusedAttributeValues();
            uhsAi.Relation = "uhs_ext_story_apriori_evol3";

            long em2 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Prepared apriori set in {0} ms.", (em2 - em)));

            new ArffSaver(uhsAi).saveInstance("uhs_ext_story_apriori_evol3");

            Helpers.Utils.Debug(string.Format("Evol done, tooks {0} ms.", sw.ElapsedMilliseconds));
        }
Example #5
0
        public static void GetDefaultDumpOfAliveDeepClean()
        {
            Helpers.Utils.Debug("Running Apriori on deep cleaned UHS alive dataset from DB..");
            Stopwatch sw = new Stopwatch(); sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToInstance("uhs_patient_story".ToUpper(),
                                                     new string[] {
                "INITIAL_PRESENTATION",
                "INT_1_PRES",
                "INT_2_PRES",
                "STATUS"
            }, -1, -1, false, true);

            long em = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Gathered MySQL ArffInstance in {0} ms.", em));

            uhsAi.removeDatasetsPerAttributeValue("STATUS", "N");

            List <string> pattern1 = new List <string>()
            {
                "Primary|breast|cancer|(and/or|DCIS)", "Primary|breast|cancer|(and/or|DCIS)", "*"
            };
            List <string> pattern2 = new List <string>()
            {
                "Primary|breast|cancer|(and/or|DCIS)", "UNDEFINED", "*"
            };
            List <string> pattern3 = new List <string>()
            {
                "Primary|breast|cancer|(and/or|DCIS)", "*", "UNDEFINED"
            };
            List <string> pattern4 = new List <string>()
            {
                "*", "UNDEFINED", "UNDEFINED"
            };

            uhsAi.deletePatternMatchingDatasets(pattern1);
            uhsAi.deletePatternMatchingDatasets(pattern2);
            uhsAi.deletePatternMatchingDatasets(pattern3);
            uhsAi.deletePatternMatchingDatasets(pattern4);
            uhsAi.Datasets.removeEmptyValueDatasets();

            long t = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Cleansed dataset in {0} ms.", t));

            new ArffSaver(uhsAi).saveInstance("uhs_clean_alive");

            Helpers.Utils.Debug(string.Format("Finished, took {0} ms.", sw.ElapsedMilliseconds));
            sw.Stop();
        }
Example #6
0
        public static void GetDefaultDumpOfAliveAndDead()
        {
            Helpers.Utils.Debug("Dumping default alive and dead ARFF files..");
            Stopwatch sw = new Stopwatch(); sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToInstance("uhs_patient_story".ToUpper(),
                                                     new string[] {
                "INITIAL_PRESENTATION",
                "INT_1_PRES",
                "INT_2_PRES",
                "STATUS"
            }, -1, -1, false, true);

            long em = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Gathered MySQL ArffInstance and stored total file in {0} ms.", em));

            new ArffSaver(uhsAi).saveInstance("uhs_arff_total");

            ArffInstance uhsAi2 = uhsAi.toCopy();

            long em2 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Copied ArffInstance in {0} ms.", (em2 - em)));

            uhsAi.removeDatasetsPerAttributeValue("STATUS", "Y");
            uhsAi.Relation = "uhs_arff_alive";

            long em3 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Separated Alive Set in {0} ms.", (em3 - em2)));

            uhsAi2.removeDatasetsPerAttributeValue("STATUS", "N");
            uhsAi2.Relation = "uhs_arff_dead";

            long em4 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Separated Dead Set in {0} ms.", (em4 - em3)));

            new ArffSaver(uhsAi).saveInstance("uhs_arff_alive");
            new ArffSaver(uhsAi2).saveInstance("uhs_arff_dead");

            long em5 = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Stored both sets in files, took {0} ms.", (em5 - em4)));

            sql.close();

            Helpers.Utils.Debug(string.Format("Dumping done, took {0} ms.", sw.ElapsedMilliseconds));
        }
Example #7
0
        public static void GetDefaultMultithreadedAprioriFlexJsonResultOfAliveAndDead()
        {
            Helpers.Utils.Debug("Getting threaded Apriori Flex Results of Alive and Dead datasets..");
            Stopwatch sw = new Stopwatch(); sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToInstance("uhs_patient_story".ToUpper(),
                                                     new string[] {
                "INITIAL_PRESENTATION",
                "INT_1_PRES",
                "INT_2_PRES",
                "STATUS"
            }, -1, -1, false, true);

            Helpers.Utils.Debug("Gathered MySQL ArffInstance in " + sw.ElapsedMilliseconds + "ms.");
            uhsAi.Relation = "uhs_arff_total";

            Thread t1 = new Thread(new ThreadStart(delegate()
            {
                ArffInstance uhsAi2 = uhsAi.toCopy();
                uhsAi2.removeDatasetsPerAttributeValue("STATUS", "Y");
                uhsAi2.Relation = "uhs_arff_alive";
                new Ceka.Algorithms.Associaters.Apriori(uhsAi2, 0.2f, 0.1f, "apriori_result_alive");
            }));

            Thread t2 = new Thread(new ThreadStart(delegate()
            {
                ArffInstance uhsAi3 = uhsAi.toCopy();
                uhsAi3.removeDatasetsPerAttributeValue("STATUS", "N");
                uhsAi3.Relation = "uhs_arff_dead";
                new Ceka.Algorithms.Associaters.Apriori(uhsAi3, 0.2f, 0.1f, "apriori_result_dead");
            }));

            t1.Start();
            t2.Start();

            new Ceka.Algorithms.Associaters.Apriori(uhsAi, 0.2f, 0.1f, "apriori_result_total");

            t1.Join();
            t2.Join();

            Helpers.Utils.Debug("Json Apriori Flex Result Dump done, took " + sw.ElapsedMilliseconds + "ms."); sw.Stop();
        }
Example #8
0
        /// <summary>
        /// same as tableToInstance(), but it adds validity checking
        /// </summary>
        /// <param name="table"></param>
        /// <param name="table_col"></param>
        /// <param name="numeric"></param>
        /// <param name="startIndex"></param>
        /// <param name="endIndex"></param>
        /// <param name="firstUndefined"></param>
        /// <param name="secondUndefined"></param>
        /// <returns></returns>
        public ArffInstance tableToValidatedInstance(string table, string[] table_col, string[] numeric, int startIndex = -1, int endIndex = -1, bool firstUndefined = false, bool secondUndefined = false)
        {
            ArffInstance ai = this.tableToInstance(table, table_col, startIndex, endIndex, firstUndefined, secondUndefined);

            if (numeric != null)
            {
                foreach (string an in numeric)
                {
                    ai.turnAttributeIntoNumeric(an, "numeric");
                }
            }

            ai.Headers.CleanUp();
            ai.removeUnusedAttributeValues();
            ai.integrityCheck();

            return(ai);
        }
Example #9
0
        public static void LoadArffFileAndRunAprioriWithWekaOutput(string file = "test")
        {
            Helpers.Utils.Debug(string.Format("Running apriori on ARFF file, with WEKA output, {0}.arff..", file));
            Stopwatch sw = new Stopwatch(); sw.Start();

            Loader.ArffLoader al = new Loader.ArffLoader(file);
            al.loadArff();

            ArffInstance ai = al.getInstance();

            ai.Datasets.removeEmptyValueDatasets();

            long t = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Read & Parsed ARFF file in {0} ms.", t));

            new Ceka.Algorithms.Associaters.Apriori(ai, 0.1f, 0.5f, true, true, AprioriSaveTypes.WEKA);

            Helpers.Utils.Debug(string.Format("Finished, took {0} ms.", sw.ElapsedMilliseconds));
            sw.Stop();
        }
Example #10
0
        public static void GetDefaultDumpOfComplexUHSSets()
        {
            Helpers.Utils.Debug("Dumping default (complex) Arff UHS files..");
            Stopwatch sw = new Stopwatch();

            sw.Start();

            string mysqlConStr = "SERVER=localhost;" +
                                 "DATABASE=uhs;" +
                                 "UID=root;" +
                                 "PASSWORD=pascal;";

            CekaMySQL sql = new CekaMySQL(mysqlConStr);

            ArffInstance uhsAi = sql.tableToInstance("UHS_EXTENDED_STORIES_2",
                                                     new string[] {
                "AGE",
                "STATUS",
                "T_NONE",
                "T_HORMONE",
                "T_SURGERY",
                "T_PRI_CHEMO",
                "T_ADJ_CHEMO",
                "T_ADJ_RT",
                "T_OOPH",
                "T_PLASTIC",
                "T_HER"
            }, -1, -1, true, true);

            long em = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Gathered MySQL ArffInstance and stored total file in {0} ms.", em));

            new ArffSaver(uhsAi).saveInstance("uhs_ext_stories_evol1");

            Helpers.Utils.Debug(string.Format("Dumping done, tooks {0} ms.", sw.ElapsedMilliseconds));
        }
Example #11
0
        /// <summary>
        /// single constructor, executes the total algorithm using default thresholds
        /// </summary>
        /// <param name="ai"></param>
        public Apriori(ArffInstance ai)
            : base()
        {
            this.algorithm_id = Apriori.APRIORI_COUNT;
            Apriori.APRIORI_COUNT++;

            source = ai;

            dataset_count = this.source.Datasets.Data.Count;
            dataset_attribute_count = this.source.Datasets.Data[0].Length();

            hash = new Common.MurmurHash2Unsafe();

            this.rep_n_list = new List<uint[][]>();
        }
Example #12
0
 /// <summary>
 /// traditional apriori constructor, enabling configuration through confidence and support
 /// </summary>
 /// <param name="ai"></param>
 /// <param name="support"></param>
 /// <param name="confidence"></param>
 public Apriori(ArffInstance ai, float support, float confidence)
     : this(ai)
 {
     this.threshold_confidence = confidence;
     this.threshold_min_support = support;
 }
Example #13
0
 /// <summary>
 /// traditional apriori constructor that writes the result straight to a json file
 /// </summary>
 /// <param name="ai"></param>
 /// <param name="support"></param>
 /// <param name="confidence"></param>
 /// <param name="file"></param>
 public Apriori(ArffInstance ai, float support, float confidence, string file)
     : this(ai, support, confidence)
 {
     this.run_default_process();
     new Ceka.Saver.SimpleJsonSaver(this.get_aobj_result(), true).SaveToFile(file + ".json");
 }
Example #14
0
        /// <summary>
        /// traditional apriori constructor that writes the json result to a file but leaves the possibility to disable filters
        /// </summary>
        /// <param name="ai"></param>
        /// <param name="support"></param>
        /// <param name="confidence"></param>
        /// <param name="filterSupport"></param>
        /// <param name="filterConfidence"></param>
        /// <param name="file"></param>
        /// <param name="prettyJson"></param>
        public Apriori(ArffInstance ai, float support, float confidence, bool filterSupport, bool filterConfidence, string file, bool prettyJson)
            : this(ai, support, confidence)
        {
            this.filterForSupport = filterSupport;
            this.filterForConfidence = filterConfidence;

            this.run_default_process(filterSupport, filterConfidence);
            new Ceka.Saver.SimpleJsonSaver(this.get_aobj_result(), prettyJson).SaveToFile(file + ".json");
        }
Example #15
0
        /// <summary>
        /// similar to the traditional constructor with filter options, but leaves options for different result saving
        /// </summary>
        /// <param name="ai"></param>
        /// <param name="support"></param>
        /// <param name="confidence"></param>
        /// <param name="filterSupport"></param>
        /// <param name="filterConfidence"></param>
        /// <param name="savt"></param>
        public Apriori(ArffInstance ai, float support, float confidence, bool filterSupport, bool filterConfidence, Saver.AprioriSaveTypes savt, bool cli = false, string outputFile = null)
            : this(ai, support, confidence)
        {
            this.filterForSupport = filterSupport;
            this.filterForConfidence = filterConfidence;

            this.run_default_process(filterSupport, filterConfidence);

            string file = this.source.Relation + "_result";
            if (outputFile != null)
                file = outputFile;

            switch (savt)
            {
                case Saver.AprioriSaveTypes.JSON:
                    if (!cli)
                        new Saver.SimpleJsonSaver(this.get_aobj_result(), false).SaveToFile(file + ".json");
                    else
                        new Saver.SimpleJsonSaver(this.get_aobj_result(), false).CLI();
                    break;

                case Saver.AprioriSaveTypes.JSON_PRETTY:
                    if (!cli)
                        new Saver.SimpleJsonSaver(this.get_aobj_result(), true).SaveToFile(file + ".json");
                    else
                        new Saver.SimpleJsonSaver(this.get_aobj_result(), true).CLI();
                    break;

                case Saver.AprioriSaveTypes.WEKA:
                    if (!cli)
                        new Saver.WekaAssociationRulesSaver(this.get_aobj_result()).SaveToFile(file + ".ceka");
                    else
                        new Saver.WekaAssociationRulesSaver(this.get_aobj_result()).CLI();
                    break;

                case Saver.AprioriSaveTypes.NONE:
                    log("Apriori finished, doing nothing as SaveType is NONE.");
                    break;

                default:
                    log("Apriori finished, but SaveType is DEFAULT: " + savt.ToString());
                    break;
            }
        }
Example #16
0
        /// <summary>
        /// turns table structur into attributes and table rows (content) into data rows
        /// </summary>
        /// <param name="table">the mysql database table</param>
        /// <param name="table_col">a list of columns names that are to be read into the arff instance</param>
        /// <param name="startIndex">table row start index; use -1 for no limit</param>
        /// <param name="endIndex">table row end index; use -1 for no limit</param>
        /// <param name="firstUndefined">can the first column of the passed column array be NULL</param>
        /// <param name="secondUndefined">can the second column of the passed column array be NULL</param>
        /// <returns>returns the generated arff instance</returns>
        public ArffInstance tableToInstance(string table, string[] table_col, int startIndex = -1, int endIndex = -1, bool firstUndefined = false, bool secondUndefined = false)
        {
            if (table_col == null || table_col.Length < 2)
                throw new CekaException("can not create an ArffInstance from a Table with less then 2 columns.");

            StringBuilder sb = new StringBuilder();
            sb.Append("SELECT ");

            for(int i = 0; i < table_col.Length; i++)
            {
                sb.Append(table_col[i]);

                if(i != (table_col.Length - 1))
                    sb.Append(", ");
            }

            sb.Append(" FROM ");
            sb.Append(table);
            if (!firstUndefined)
            {
                sb.Append(" WHERE ");
                sb.Append(table_col[0]);
                if (!secondUndefined)
                {
                    sb.Append(" IS NOT NULL AND ");
                    sb.Append(table_col[1]);
                    sb.Append(" IS NOT NULL");
                }
                else
                {
                    sb.Append(" IS NOT NULL");
                }
            }

            MySqlDataReader mr = query(sb.ToString());

            ArffInstance ai = new ArffInstance("ceka_" + table);

            foreach (string s in table_col) //setup attributes from table columns
                ai.addAttribute(s, this.getDistinctOccurencesInColumn(table, s));

            //gather data from select resultset into a library dataset
            int c = 0;
            string[] sa = new string[table_col.Length];
            while (mr.Read())
            {
                if ((startIndex == -1 || c >= startIndex) && (endIndex == -1 || c <= endIndex)) //make sure to run in limes
                {
                    for (int i = 0; i < mr.FieldCount; i++)
                    {
                        sa[i] = mr.GetValue(i).ToString();

                        if (string.IsNullOrWhiteSpace(sa[i]) || string.IsNullOrEmpty(sa[i]))
                            sa[i] = ArffFile.ATT_UNDEFINED; //make sure to give these a value at all times
                        else if (sa[i].Contains(ArffFile.ARFF_SPACE)){ //also make sure there a no whitespaces at all times
                            sa[i] = sa[i].Replace(ArffFile.ARFF_SPACE, ArffFile.ATT_SPACE_EXCHANGE);

                            if (string.IsNullOrEmpty(sa[i]) || string.IsNullOrWhiteSpace(sa[i]))
                            {
                                sa[i] = ArffFile.ATT_UNDEFINED;
                            }
                        }
                    }

                    ai.addDataset(sa);
                    sa = new string[table_col.Length];
                }
                c++;
            }

            mr.Close();

            return ai;
        }
Example #17
0
        /// <summary>
        /// turns table structur into attributes and table rows (content) into data rows
        /// </summary>
        /// <param name="table">the mysql database table</param>
        /// <param name="table_col">a list of columns names that are to be read into the arff instance</param>
        /// <param name="startIndex">table row start index; use -1 for no limit</param>
        /// <param name="endIndex">table row end index; use -1 for no limit</param>
        /// <param name="firstUndefined">can the first column of the passed column array be NULL</param>
        /// <param name="secondUndefined">can the second column of the passed column array be NULL</param>
        /// <returns>returns the generated arff instance</returns>
        public ArffInstance tableToInstance(string table, string[] table_col, int startIndex = -1, int endIndex = -1, bool firstUndefined = false, bool secondUndefined = false)
        {
            if (table_col == null || table_col.Length < 2)
            {
                throw new CekaException("can not create an ArffInstance from a Table with less then 2 columns.");
            }

            StringBuilder sb = new StringBuilder();

            sb.Append("SELECT ");

            for (int i = 0; i < table_col.Length; i++)
            {
                sb.Append(table_col[i]);

                if (i != (table_col.Length - 1))
                {
                    sb.Append(", ");
                }
            }

            sb.Append(" FROM ");
            sb.Append(table);
            if (!firstUndefined)
            {
                sb.Append(" WHERE ");
                sb.Append(table_col[0]);
                if (!secondUndefined)
                {
                    sb.Append(" IS NOT NULL AND ");
                    sb.Append(table_col[1]);
                    sb.Append(" IS NOT NULL");
                }
                else
                {
                    sb.Append(" IS NOT NULL");
                }
            }

            MySqlDataReader mr = query(sb.ToString());

            ArffInstance ai = new ArffInstance("ceka_" + table);

            foreach (string s in table_col) //setup attributes from table columns
            {
                ai.addAttribute(s, this.getDistinctOccurencesInColumn(table, s));
            }

            //gather data from select resultset into a library dataset
            int c = 0;

            string[] sa = new string[table_col.Length];
            while (mr.Read())
            {
                if ((startIndex == -1 || c >= startIndex) && (endIndex == -1 || c <= endIndex)) //make sure to run in limes
                {
                    for (int i = 0; i < mr.FieldCount; i++)
                    {
                        sa[i] = mr.GetValue(i).ToString();

                        if (string.IsNullOrWhiteSpace(sa[i]) || string.IsNullOrEmpty(sa[i]))
                        {
                            sa[i] = ArffFile.ATT_UNDEFINED;           //make sure to give these a value at all times
                        }
                        else if (sa[i].Contains(ArffFile.ARFF_SPACE)) //also make sure there a no whitespaces at all times
                        {
                            sa[i] = sa[i].Replace(ArffFile.ARFF_SPACE, ArffFile.ATT_SPACE_EXCHANGE);

                            if (string.IsNullOrEmpty(sa[i]) || string.IsNullOrWhiteSpace(sa[i]))
                            {
                                sa[i] = ArffFile.ATT_UNDEFINED;
                            }
                        }
                    }

                    ai.addDataset(sa);
                    sa = new string[table_col.Length];
                }
                c++;
            }

            mr.Close();

            return(ai);
        }
Example #18
0
 /// <summary>
 /// main constructor
 /// </summary>
 /// <param name="instance">the arff instance you want to write to a file</param>
 public ArffSaver(ArffInstance instance)
 {
     this.Instance = instance;
 }
Example #19
0
 /// <summary>
 /// main constructor
 /// </summary>
 /// <param name="instance">the arff instance you want to write to a file</param>
 public ArffSaver(ArffInstance instance)
 {
     this.Instance = instance;
 }