public bool build(IList<string> keyList, IList<int> valList, double max_slot_usage_rate_threshold = 0.95) { if (keyList == null) { Console.WriteLine("Key list is empty"); return false; } if (valList == null) { Console.WriteLine("Value list is empty"); return false; } if (keyList.Count != valList.Count) { Console.WriteLine("The size of key list and value list is not equal"); return false; } for (int i = 0; i < valList.Count; i++) { if (valList[i] <= -1) { Console.WriteLine("Invalidated value {0} at index {1}", valList[i], i); return false; } } MAX_SLOT_USAGE_RATE_THRESHOLD = max_slot_usage_rate_threshold; slot_usage_rate_threshold_ = max_slot_usage_rate_threshold; progress_ = 0; key_ = keyList; val_ = valList; startDT = DateTime.Now; array = new VarBigArray<unit_t>(key_.Count * 5); used = new VarBigArray<int>(key_.Count * 5); array[0] = new unit_t(); array[0].base1 = 1; used[0] = 1; next_chk_pos_ = 0; Node root_node = new Node(); root_node.left = 0; root_node.right = key_.Count; root_node.depth = 0; List<Node> siblings = new List<Node>(); fetch(root_node, siblings); insert(siblings); return true; }
int insert(List<Node> siblings) { Random rnd = new Random(DateTime.Now.Millisecond + Thread.CurrentThread.ManagedThreadId); int begin = 0; bool cont = true; int nonzeronum = 0; while (used[next_chk_pos_] == 1) { Interlocked.Increment(ref next_chk_pos_); } int pos = next_chk_pos_; int startpos = pos; //search begin position pos--; while (cont == true) { pos++; if (used[pos] == 0) { //Check whether slots are available, if not go on to search, cont = false; foreach (Node n in siblings) { if (used[pos + n.code] == 1 || array[pos + n.code] != null) { cont = true; break; } } } else { nonzeronum++; } } begin = pos; //check average slot usage rate. If the rate is no less than the threshold, update next_chk_pos_ to //pos whose slot range has much less conflict. //note that, the higher rate threshold, the higher slot space usage rate, however, the timing-cost for tri-tree build //will also become more higher. if ((double)nonzeronum / (double)(pos - startpos + 1) >= slot_usage_rate_threshold_ && pos > next_chk_pos_) { System.Threading.Interlocked.Exchange(ref next_chk_pos_, pos); } //double check whether slots are available //the reason why double check is because: //1. in entire slots space, conflict rate is different. the conflict rate of array's tail // is much lower than that of its header and body //2. roll back cost is heavy. So in high conflict rate range, we just check conflict and no other action (first check) // once we find a availabe range without conflict, we try to allocate memory on this range and double check conflict bool bAllNull; bool bZeroCode = false; foreach (Node n in siblings) { if (n.code == 0) { bZeroCode = true; break; } } if (bZeroCode == false) { Node sNode = new Node(); sNode.code = 0; siblings.Add(sNode); } do { bAllNull = true; //Test conflict in multi-threads int cnt = 0; foreach (Node n in siblings) { int nBlock = (begin + n.code) >> VarBigArray<int>.moveBit; long offset = (begin + n.code) & (VarBigArray<int>.sizePerBlock - 1); if (used[begin + n.code] == 1 || System.Threading.Interlocked.CompareExchange(ref used.arrList[nBlock][offset], 1, 0) != 0) { bAllNull = false; foreach (Node revertNode in siblings.GetRange(0, cnt)) { used[begin + revertNode.code] = 0; } begin += rnd.Next(thread_num_) + 1; break; } cnt++; } } while (bAllNull == false); if (bZeroCode == false) { siblings.RemoveAt(siblings.Count - 1); } #if NO_SUPPORT_PARALLEL_LIB for (int i = 0;i < siblings.Count;i++) #else Parallel.For(0, siblings.Count, parallelOption, i => #endif { List<Node> new_siblings = new List<Node>(); Node sibling = siblings[i]; int offset = begin + sibling.code; array[offset] = new unit_t(); array[offset].check = begin; if (fetch(sibling, new_siblings) == 0) { array[offset].base1 = -val_[sibling.left] - 1; if (Interlocked.Increment(ref progress_) % 10000 == 0) { //Try to adjust slot usage rate in order to keep high performance TimeSpan ts = DateTime.Now - startDT; double currQPS = progress_ / (ts.TotalSeconds + 1); double currQPSDelta = currQPS - lastQPS; if (currQPS < lastQPS && currQPSDelta < lastQPSDelta) { //Average QPS becomes slow down, need to reduce slot usage rate slot_usage_rate_threshold_ -= 0.1; if (slot_usage_rate_threshold_ < MIN_SLOT_USAGE_RATE_THRESHOLD) { slot_usage_rate_threshold_ = MIN_SLOT_USAGE_RATE_THRESHOLD; } } else { //Average QPS becomes fast, need to add slot usage rate slot_usage_rate_threshold_ += 0.1; if (slot_usage_rate_threshold_ > MAX_SLOT_USAGE_RATE_THRESHOLD) { slot_usage_rate_threshold_ = MAX_SLOT_USAGE_RATE_THRESHOLD; } } lastQPSDelta = currQPSDelta; lastQPS = currQPS; if (progress_ % 100000 == 0) { //Show current progress on console Console.Write("{0}...", progress_); } } } else { int b = insert(new_siblings); array[offset].base1 = b; } } #if NO_SUPPORT_PARALLEL_LIB #else ); #endif return begin; }