private int GetPartitionIndex(object partitionKey)
        {
            ASCIIEncoding     encoding = new ASCIIEncoding();
            MurmurHash2Simple hasher   = new MurmurHash2Simple();

            int bytes          = (int)hasher.Hash(encoding.GetBytes((string)partitionKey));
            int partitionIndex = Math.Abs(bytes % this.partitions.Count);

            return(partitionIndex);
        }
示例#2
0
        private int GetHash(string value)
        {
            MurmurHash2Simple simple = new MurmurHash2Simple();

            if (string.IsNullOrEmpty(value))
            {
                throw new ArgumentNullException(nameof(value));
            }

            if (value.Length > _maxSize)
            {
                throw new ArgumentException($"Максимальная длинна ключа составляет {_maxSize} символов.", nameof(value));
            }


            var hash = Convert.ToInt32(simple.Hash(System.Text.Encoding.UTF8.GetBytes(value)));

            return(hash);
        }
示例#3
0
        /// <summary>
        /// this function can estimate the amount of time that is likely saved due to uint hash comparison by the apriori algorithm on a specific dataset
        /// </summary>
        /// <param name="attributes">number of attributes in the dataset header; @attribute a1 {..}</param>
        /// <param name="attribute_values">median of attribute value count in the attributes; @attribute .. { val1, val2, val3, .. }</param>
        /// <param name="dataset_columns">@data row (instances) count of the dataset</param>
        /// <param name="percentage">percentage (5%-85%) of item-pair checks that might be avoided by the algorithm (speeds it up)</param>
        /// <returns>[0] = calculated comparisons, [1] = uint comparison time, [2] = string comparison time (times are milliseconds) </returns>
        public static long[] run_int_vs_string_comparison(int attributes = 10, int attribute_values = 5, int dataset_columns = 50000, int percentage = 30)
        {
            //calculate the number of comparisons from the scenario
            long compare_num = (long)((double)(((attributes * attribute_values) * (attributes * attribute_values) / 2) * dataset_columns * attributes) * ((double)percentage / 100));

            string            _s1  = "association rules mining";
            string            _s2  = "apriori data mining algorithm";
            string            _s3  = "algorithm mining data apriori";
            MurmurHash2Simple hash = new MurmurHash2Simple();
            uint _i1 = get_representative_int_hash(hash, _s1);
            uint _i2 = get_representative_int_hash(hash, _s2);
            uint _i3 = get_representative_int_hash(hash, _s3);

            long      uints   = -1;
            long      strings = -1;
            Stopwatch sw      = new Stopwatch();

            sw.Start();

            long do_something_1 = 0;
            long do_something_2 = 0;

            //comparing hashes
            for (long i = 0; i < compare_num; i++)
            {
                if (i % 2 == 0)
                {
                    if (_s1 == _s2) //doing the same thing will be optimized by the compiler..
                    {
                        do_something_1++;
                    }
                }
                else
                {
                    if (_s1 == _s3) //therefore another one here
                    {
                        do_something_2++;
                    }
                }
            }

            strings        = sw.ElapsedMilliseconds;
            do_something_1 = 0;
            do_something_2 = 0;
            sw.Restart();

            //comparing strings
            for (long i = 0; i < compare_num; i++)
            {
                if (i % 2 == 0)
                {
                    if (_i1 == _i2) //doing the same thing will be optimized by the compiler..
                    {
                        do_something_1++;
                    }
                }
                else
                {
                    if (_i1 == _i3) //therefore another one here
                    {
                        do_something_2++;
                    }
                }
            }

            uints = sw.ElapsedMilliseconds;

            Helpers.Utils.Debug(string.Format("Compared {0} times; uints took {1} ms, strings took {2} ms.", compare_num, uints, strings));

            return(new long[] { compare_num, uints, strings });
        }
示例#4
0
 /// <summary>
 /// turns string into representative (murmur2) uint hash
 /// </summary>
 /// <param name="hash"></param>
 /// <param name="dataset_column"></param>
 /// <returns></returns>
 public static uint get_representative_int_hash(MurmurHash2Simple hash, string dataset_column)
 {
     return(hash.Hash(Encoding.UTF8.GetBytes(dataset_column)));
 }
        private int GetPartitionIndex(object partitionKey)
        {
            ASCIIEncoding encoding = new ASCIIEncoding();
            MurmurHash2Simple hasher = new MurmurHash2Simple();

            int bytes = (int)hasher.Hash(encoding.GetBytes((string)partitionKey));
            int partitionIndex = Math.Abs(bytes % this.partitions.Count);
            return partitionIndex;
        }