Example #1
0
        /// <summary>
        /// this function can estimate the amount of time that is likely saved due to uint hash comparison by the apriori algorithm on a specific dataset
        /// </summary>
        /// <param name="attributes">number of attributes in the dataset header; @attribute a1 {..}</param>
        /// <param name="attribute_values">median of attribute value count in the attributes; @attribute .. { val1, val2, val3, .. }</param>
        /// <param name="dataset_columns">@data row (instances) count of the dataset</param>
        /// <param name="percentage">percentage (5%-85%) of item-pair checks that might be avoided by the algorithm (speeds it up)</param>
        /// <returns>[0] = calculated comparisons, [1] = uint comparison time, [2] = string comparison time (times are milliseconds) </returns>
        public static long[] run_int_vs_string_comparison(int attributes = 10, int attribute_values = 5, int dataset_columns = 50000, int percentage = 30)
        {
            //calculate the number of comparisons from the scenario
            long compare_num = (long)((double)(((attributes * attribute_values) * (attributes * attribute_values) / 2) * dataset_columns * attributes) * ((double)percentage / 100));

            string _s1 = "association rules mining";
            string _s2 = "apriori data mining algorithm";
            string _s3 = "algorithm mining data apriori";
            MurmurHash2Simple hash = new MurmurHash2Simple();
            uint _i1 = get_representative_int_hash(hash, _s1);
            uint _i2 = get_representative_int_hash(hash, _s2);
            uint _i3 = get_representative_int_hash(hash, _s3);

            long uints = -1;
            long strings = -1;
            Stopwatch sw = new Stopwatch();
            sw.Start();

            long do_something_1 = 0;
            long do_something_2 = 0;

            //comparing hashes
            for (long i = 0; i < compare_num; i++)
            {
                if (i % 2 == 0)
                {
                    if (_s1 == _s2) //doing the same thing will be optimized by the compiler..
                    {
                        do_something_1++;
                    }
                }
                else
                {
                    if (_s1 == _s3) //therefore another one here
                    {
                        do_something_2++;
                    }
                }
            }

            strings = sw.ElapsedMilliseconds;
            do_something_1 = 0;
            do_something_2 = 0;
            sw.Restart();

            //comparing strings
            for (long i = 0; i < compare_num; i++)
            {
                if (i % 2 == 0)
                {
                    if (_i1 == _i2) //doing the same thing will be optimized by the compiler..
                    {
                        do_something_1++;
                    }
                }
                else
                {
                    if (_i1 == _i3) //therefore another one here
                    {
                        do_something_2++;
                    }
                }
            }

            uints = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Compared {0} times; uints took {1} ms, strings took {2} ms.", compare_num, uints, strings));

            return new long[] { compare_num, uints, strings};
        }
Example #2
0
 /// <summary>
 /// turns string into representative (murmur2) uint hash
 /// </summary>
 /// <param name="hash"></param>
 /// <param name="dataset_column"></param>
 /// <returns></returns>
 public static uint get_representative_int_hash(MurmurHash2Simple hash, string dataset_column)
 {
     return hash.Hash(Encoding.UTF8.GetBytes(dataset_column));
 }