public static void InitializeFromProbsArray(ref CompressedPolicyVector policyRef, int numMoves, int numMovesToSave, Span <ProbEntry> probs) { QuickSort(probs, 0, numMoves - 1); // Compute max probability so we can then // avoid overflow during exponentation by subtracting off float max = 0.0f; for (int j = 0; j < numMovesToSave; j++) { if (probs[j].P > max) { max = probs[j].P; } } Span <float> probsA = stackalloc float[numMoves]; Span <int> indicesA = stackalloc int[numMoves]; for (int j = 0; j < numMovesToSave; j++) { probsA[j] = MathF.Exp(probs[j].P - max); indicesA[j] = probs[j].Index; } CompressedPolicyVector.Initialize(ref policyRef, indicesA.Slice(0, numMovesToSave), probsA.Slice(0, numMovesToSave)); }
/// <summary> /// Returns the CompressedPolicyVector which is the linear combination /// of a set of other raw policy vectors (using a specified set of weights). /// </summary> /// <param name="policies"></param> /// <param name="weights"></param> /// <returns></returns> public static CompressedPolicyVector LinearlyCombined(CompressedPolicyVector[] policies, float[] weights) { Span <float> policyAverages = stackalloc float[EncodedPolicyVector.POLICY_VECTOR_LENGTH]; // Compute average policy result for this position for (int i = 0; i < policies.Length; i++) { CompressedPolicyVector policy = policies[i]; foreach ((EncodedMove move, float probability)moveInfo in policy.ProbabilitySummary()) { if (moveInfo.move.RawValue == SPECIAL_VALUE_RANDOM_NARROW || moveInfo.move.RawValue == SPECIAL_VALUE_RANDOM_WIDE) { throw new NotImplementedException("Method LinearlyCombined probably not yet supported with random evaluations."); } float thisContribution = weights[i] * moveInfo.probability; policyAverages[moveInfo.move.IndexNeuralNet] += thisContribution; } } CompressedPolicyVector policyRet = default; Initialize(ref policyRet, policyAverages, false); return(policyRet); }
/// <summary> /// Initializes values (bypassing readonly) with specified set of move indices and probabilities. /// </summary> /// <param name="policy"></param> /// <param name="indices"></param> /// <param name="probs"></param> public static void Initialize(ref CompressedPolicyVector policy, Span <ushort> indices, Span <ushort> probsEncoded) { if (indices.Length != probsEncoded.Length) { throw new ArgumentException("Length of indicies and probabilities must be same"); } float lastProb = float.MaxValue; fixed(ushort *moveIndices = &policy.MoveIndex_0) { fixed(ushort *moveProbabilitiesEncoded = &policy.MoveProbEncoded_0) { for (int i = 0; i < indices.Length && i < NUM_MOVE_SLOTS; i++) { if (indices[i] == SPECIAL_VALUE_SENTINEL_TERMINATOR) { break; } moveIndices[i] = indices[i]; moveProbabilitiesEncoded[i] = probsEncoded[i]; Debug.Assert(DecodedProbability(probsEncoded[i]) <= lastProb); lastProb = probsEncoded[i]; } } } }
/// <summary> /// A special encoding is used to indicate if the policy is desired to be initialized /// randomly (for testing purposes). /// /// The actual probabilities cannot be computed here since we don't yet know the move list, /// therefore weput a special value in the array to indicate that this should be expanded in subsequent processing. /// </summary> /// <param name="wide"></param> public static void InitializeAsRandom(ref CompressedPolicyVector policy, bool wide) { fixed(ushort *moveIndices = &policy.MoveIndex_0) { // Only need to set first move index moveIndices[0] = wide ? SPECIAL_VALUE_RANDOM_WIDE : SPECIAL_VALUE_RANDOM_NARROW; } }
/// <summary> /// Compares with another CompressedPolicyVector and returns the /// magnitude of the largest abolute difference in policy across all moves. /// </summary> /// <param name="other"></param> /// <returns></returns> public float MaxProbDifferenceWith(CompressedPolicyVector other) { float[] decoded = DecodedNoValidate; float[] otherDecoded = other.DecodedNoValidate; float max = 0; for (int i = 0; i < EncodedPolicyVector.POLICY_VECTOR_LENGTH; i++) { float diff = Math.Abs(decoded[i] - otherDecoded[i]); if (diff > max) { max = diff; } } return(max); }
/// <summary> /// Static method to initialize a CompressedPolicyVector from /// a specified array of expanded policy probabilities. /// </summary> /// <param name="policy"></param> /// <param name="probabilities"></param> /// <param name="alreadySorted"></param> public static void Initialize(ref CompressedPolicyVector policy, float *probabilities, bool alreadySorted) { float probabilityAcc = 0.0f; int numSlotsUsed = 0; fixed(ushort *moveIndices = &policy.MoveIndex_0) { fixed(ushort *moveProbabilitiesEncoded = &policy.MoveProbEncoded_0) { // Move all the probabilities into our array for (int i = 0; i < EncodedPolicyVector.POLICY_VECTOR_LENGTH; i++) { float thisProb = probabilities[i]; probabilityAcc += thisProb; if (probabilities[i] > HALF_INCREMENT) { ushort encodedProb = EncodedProbability(probabilities[i]); if (numSlotsUsed < NUM_MOVE_SLOTS) { moveIndices[numSlotsUsed] = (ushort)i; moveProbabilitiesEncoded[numSlotsUsed] = encodedProb; //Console.WriteLine("direct " + i + " " + probabilities[i]); numSlotsUsed++; } else { // Find smallest index/value int smallestIndex = -1; ushort smallestValue = ushort.MaxValue; for (int si = 0; si < NUM_MOVE_SLOTS; si++) { if (moveProbabilitiesEncoded[si] < smallestValue) { smallestIndex = si; smallestValue = moveProbabilitiesEncoded[si]; } } ushort encodedSmallest = EncodedProbability(probabilities[smallestIndex]); if (moveProbabilitiesEncoded[smallestIndex] < encodedProb) { moveIndices[smallestIndex] = (ushort)i; moveProbabilitiesEncoded[smallestIndex] = encodedProb; } else { // just drop it (lost) // Console.WriteLine(" drop " + encodedProb + " --> " + thisProb); } } } // Add terminator if not full if (numSlotsUsed < NUM_MOVE_SLOTS) { moveIndices[numSlotsUsed] = SPECIAL_VALUE_SENTINEL_TERMINATOR; } } if (probabilityAcc < 0.995 || probabilityAcc > 1.005) { throw new Exception($"Internal error: NN probabilities sum to { probabilityAcc}"); } } } if (!alreadySorted) { policy.Sort(numSlotsUsed); } }
/// <summary> /// Initializes values (bypassing readonly) with specified set of move indices and probabilities. /// </summary> /// <param name="policy"></param> /// <param name="probabilities"></param> /// <param name="alreadySorted"></param> public static void Initialize(ref CompressedPolicyVector policy, Span <float> probabilities, bool alreadySorted) { Initialize(ref policy, Fixed(probabilities), alreadySorted); }
/// <summary> /// Initializes values (bypassing readonly) with specified set of move indices and probabilities. /// </summary> /// <param name="policy"></param> /// <param name="indices"></param> /// <param name="probs"></param> internal static void Initialize(ref CompressedPolicyVector policy, Span <int> indices, Span <float> probs, bool alreadySorted = true) { // TODO: the Span<int> can actually be shortend to Span<short> if (indices.Length != probs.Length) { throw new ArgumentException("Length of indicies and probabilities must be same"); } float probabilityAcc = 0.0f; float priorProb = float.MaxValue; // only used in debug mode for verifying in order int numMovesUsed = 0; fixed(ushort *moveIndices = &policy.MoveIndex_0) { fixed(ushort *moveProbabilitiesEncoded = &policy.MoveProbEncoded_0) { // Move all the probabilities into our array for (int i = 0; i < indices.Length && i < NUM_MOVE_SLOTS; i++) { // Get this probability and make sure is in expected sorted order float thisProb = probs[i]; Debug.Assert(!alreadySorted || thisProb <= priorProb); // Save index int moveIndex = indices[i]; moveIndices[i] = (ushort)moveIndex; // Save compressed probability (unless rounds to zero) ushort encoded = EncodedProbability(thisProb); if (encoded != 0) { numMovesUsed++; moveProbabilitiesEncoded[i] = encoded; probabilityAcc += thisProb; } else { break; // moves are sorted, so we will not see any more above MIN_VALUE } priorProb = thisProb; } if (numMovesUsed < NUM_MOVE_SLOTS) { // Not full. Add terminator. moveIndices[numMovesUsed] = SPECIAL_VALUE_SENTINEL_TERMINATOR; } // Normalize to sum to 1.0 float adj = 1.0f / probabilityAcc; for (int i = 0; i < numMovesUsed; i++) { moveProbabilitiesEncoded[i] = EncodedProbability(probs[i] * adj); } } if (!alreadySorted) { policy.Sort(numMovesUsed); } } }