public static Real[] GetSparsitiesErdosRenyi(NdArray <Real>[] allMasks, string[] Names, Real defaultSparsity, Dictionary <string, Real> customSparsityMap) { // We have to enforce custom sparsities and then find the correct scaling factor. bool isEpsValid = false; // The following loop will terminate worst case when all masks are in the custom_sparsity_map. // This should probably never happen though, since once we have a single variable or more with the same constant, we have a valid epsilon. // Note that for each iteration we add at least one variable to the custom_sparsity_map and therefore this while loop should terminate. List <int> denseLayerIndex = new List <int>(); // We will start with all layers and try to find right epsilon. // However if any probablity exceeds 1, we will make that layer dense and repeat the process (finding epsilon) with the non-dense layers. // We want the total number of connections to be the same. // Let say we have for layers with N_1, ..., N_4 parameters each. // Let say after some iterations probability of some dense layers (3, 4) exceeded 1 and therefore we added them to the dense_layers set. // Those layers will not scale with erdos_renyi, however we need to count them so that target paratemeter count is achieved. // See below. // eps * (p_1 * N_1 + p_2 * N_2) + (N_3 + N_4) = (1 - default_sparsity) * (N_1 + N_2 + N_3 + N_4) // eps * (p_1 * N_1 + p_2 * N_2) = (1 - default_sparsity) * (N_1 + N_2) - default_sparsity * (N_3 + N_4) // eps = rhs / (\sum_i p_i * N_i) = rhs / divisor. Real[] rawProbabilities = new Real[allMasks.Length]; Real eps = 0; while (!isEpsValid) { Real divisor = 0; int rhs = 0; for (int i = 0; i < allMasks.Length; i++) { int nParam = allMasks[i].Data.Length; int nZeros = Math.Ceiling(nParam * defaultSparsity); if (denseLayerIndex.Contains(i)) { //See `- default_sparsity * (N_3 + N_4)` part of the equation above. rhs -= nZeros; } else if (customSparsityMap.ContainsKey(Names[i])) { //We ignore custom_sparsities in erdos-renyi calculations. continue; } else { //Corresponds to `(1 - default_sparsity) * (N_1 + N_2)` part of the equation above. int nOnes = nParam - nZeros; rhs += nOnes; rawProbabilities[i] = (allMasks[i].Shape[0] + allMasks[i].Shape[1]) / (Real)allMasks[i].Data.Length; } divisor += rawProbabilities[i] * nParam; } // By multipliying individual probabilites with epsilon, we should get the number of parameters per layer correctly. eps = rhs / divisor; // If eps * raw_probabilities[mask.name] > 1.0 We set the sparsities of that mask to 0.0, so they become part of dense_layers sets. Real maxProb = rawProbabilities.Max(); Real maxProbOne = maxProb * eps; if (maxProbOne > 1) { isEpsValid = false; for (int i = 0; i < rawProbabilities.Length; i++) { if (rawProbabilities[i] == maxProb) { denseLayerIndex.Add(i); } } } else { isEpsValid = true; } } Real[] sparsities = new Real[allMasks.Length]; for (int i = 0; i < sparsities.Length; i++) { if (customSparsityMap.ContainsKey(Names[i])) { sparsities[i] = customSparsityMap[Names[i]]; } else if (denseLayerIndex.Contains(i)) { sparsities[i] = 0.0f; } else { sparsities[i] = 1.0f - eps * rawProbabilities[i]; } } return(sparsities); }