예제 #1
0
        public bool build(IList<string> keyList, IList<int> valList, double max_slot_usage_rate_threshold = 0.95)
        {
            if (keyList == null)
            {
                Console.WriteLine("Key list is empty");
                return false;
            }
            if (valList == null)
            {
                Console.WriteLine("Value list is empty");
                return false;
            }

            if (keyList.Count != valList.Count)
            {
                Console.WriteLine("The size of key list and value list is not equal");
                return false;
            }

            for (int i = 0; i < valList.Count; i++)
            {
                if (valList[i] <= -1)
                {
                    Console.WriteLine("Invalidated value {0} at index {1}", valList[i], i);
                    return false;
                }
            }

            MAX_SLOT_USAGE_RATE_THRESHOLD = max_slot_usage_rate_threshold;
            slot_usage_rate_threshold_ = max_slot_usage_rate_threshold;
            progress_ = 0;
            key_ = keyList;
            val_ = valList;

            startDT = DateTime.Now;
            array = new VarBigArray<unit_t>(key_.Count * 5);
            used = new VarBigArray<int>(key_.Count * 5);
            array[0] = new unit_t();
            array[0].base1 = 1;
            used[0] = 1;
            next_chk_pos_ = 0;
            Node root_node = new Node();
            root_node.left = 0;
            root_node.right = key_.Count;
            root_node.depth = 0;
            List<Node> siblings = new List<Node>();
            fetch(root_node, siblings);
            insert(siblings);

            return true;
        }
예제 #2
0
        int insert(List<Node> siblings)
        {
            Random rnd = new Random(DateTime.Now.Millisecond + Thread.CurrentThread.ManagedThreadId);
            int begin = 0;
            bool cont = true;
            int nonzeronum = 0;

            while (used[next_chk_pos_] == 1)
            {
                Interlocked.Increment(ref next_chk_pos_);
            }

            int pos = next_chk_pos_;
            int startpos = pos;

            //search begin position
            pos--;
            while (cont == true)
            {
                pos++;
                if (used[pos] == 0)
                {
                    //Check whether slots are available, if not go on to search,
                    cont = false;
                    foreach (Node n in siblings)
                    {
                        if (used[pos + n.code] == 1 || array[pos + n.code] != null)
                        {
                            cont = true;
                            break;
                        }
                    }
                }
                else
                {
                    nonzeronum++;
                }
            }
            begin = pos;

            //check average slot usage rate. If the rate is no less than the threshold, update next_chk_pos_ to 
            //pos whose slot range has much less conflict.
            //note that, the higher rate threshold, the higher slot space usage rate, however, the timing-cost for tri-tree build
            //will also become more higher.
            if ((double)nonzeronum / (double)(pos - startpos + 1) >= slot_usage_rate_threshold_ &&
                pos > next_chk_pos_)
            {
                System.Threading.Interlocked.Exchange(ref next_chk_pos_, pos);
            }

            //double check whether slots are available
            //the reason why double check is because:
            //1. in entire slots space, conflict rate is different. the conflict rate of array's tail
            //   is much lower than that of its header and body
            //2. roll back cost is heavy. So in high conflict rate range, we just check conflict and no other action (first check)
            //   once we find a availabe range without conflict, we try to allocate memory on this range and double check conflict
            bool bAllNull;
            bool bZeroCode = false;
            foreach (Node n in siblings)
            {
                if (n.code == 0)
                {
                    bZeroCode = true;
                    break;
                }
            }

            if (bZeroCode == false)
            {
                Node sNode = new Node();
                sNode.code = 0;
                siblings.Add(sNode);
            }

            do
            {
                bAllNull = true;
                //Test conflict in multi-threads
                int cnt = 0;
                foreach (Node n in siblings)
                {
                    int nBlock = (begin + n.code) >> VarBigArray<int>.moveBit;
                    long offset = (begin + n.code) & (VarBigArray<int>.sizePerBlock - 1);

                    if (used[begin + n.code] == 1 ||
                        System.Threading.Interlocked.CompareExchange(ref used.arrList[nBlock][offset], 1, 0) != 0)
                    {
                        bAllNull = false;
                        foreach (Node revertNode in siblings.GetRange(0, cnt))
                        {
                            used[begin + revertNode.code] = 0;
                        }
                        begin += rnd.Next(thread_num_) + 1;
                        break;
                    }
                    cnt++;
                }
            } while (bAllNull == false);

            if (bZeroCode == false)
            {
                siblings.RemoveAt(siblings.Count - 1);
            }

#if NO_SUPPORT_PARALLEL_LIB
            for (int i = 0;i < siblings.Count;i++)
#else
            Parallel.For(0, siblings.Count, parallelOption, i =>
#endif
            {
                List<Node> new_siblings = new List<Node>();
                Node sibling = siblings[i];
                int offset = begin + sibling.code;

                array[offset] = new unit_t();
                array[offset].check = begin;
                if (fetch(sibling, new_siblings) == 0)
                {
                    array[offset].base1 = -val_[sibling.left] - 1;
                    if (Interlocked.Increment(ref progress_) % 10000 == 0)
                    {
                        //Try to adjust slot usage rate in order to keep high performance
                        TimeSpan ts = DateTime.Now - startDT;
                        double currQPS = progress_ / (ts.TotalSeconds + 1);
                        double currQPSDelta = currQPS - lastQPS;

                        if (currQPS < lastQPS && currQPSDelta < lastQPSDelta)
                        {
                            //Average QPS becomes slow down, need to reduce slot usage rate
                            slot_usage_rate_threshold_ -= 0.1;
                            if (slot_usage_rate_threshold_ < MIN_SLOT_USAGE_RATE_THRESHOLD)
                            {
                                slot_usage_rate_threshold_ = MIN_SLOT_USAGE_RATE_THRESHOLD;
                            }
                        }
                        else
                        {
                            //Average QPS becomes fast, need to add slot usage rate
                            slot_usage_rate_threshold_ += 0.1;
                            if (slot_usage_rate_threshold_ > MAX_SLOT_USAGE_RATE_THRESHOLD)
                            {
                                slot_usage_rate_threshold_ = MAX_SLOT_USAGE_RATE_THRESHOLD;
                            }
                        }

                        lastQPSDelta = currQPSDelta;
                        lastQPS = currQPS;

                        if (progress_ % 100000 == 0)
                        {
                            //Show current progress on console
                            Console.Write("{0}...", progress_);
                        }
                    }
                }
                else
                {
                    int b = insert(new_siblings);
                    array[offset].base1 = b;
                }
            }
#if NO_SUPPORT_PARALLEL_LIB
#else
);
#endif

            return begin;
        }