示例#1
0
        public static void InitializeFromProbsArray(ref CompressedPolicyVector policyRef, int numMoves, int numMovesToSave, Span <ProbEntry> probs)
        {
            QuickSort(probs, 0, numMoves - 1);

            // Compute max probability so we can then
            // avoid overflow during exponentation by subtracting off
            float max = 0.0f;

            for (int j = 0; j < numMovesToSave; j++)
            {
                if (probs[j].P > max)
                {
                    max = probs[j].P;
                }
            }

            Span <float> probsA   = stackalloc float[numMoves];
            Span <int>   indicesA = stackalloc int[numMoves];

            for (int j = 0; j < numMovesToSave; j++)
            {
                probsA[j]   = MathF.Exp(probs[j].P - max);
                indicesA[j] = probs[j].Index;
            }

            CompressedPolicyVector.Initialize(ref policyRef, indicesA.Slice(0, numMovesToSave), probsA.Slice(0, numMovesToSave));
        }
示例#2
0
        /// <summary>
        /// Returns the CompressedPolicyVector which is the linear combination
        /// of a set of other raw policy vectors (using a specified set of weights).
        /// </summary>
        /// <param name="policies"></param>
        /// <param name="weights"></param>
        /// <returns></returns>
        public static CompressedPolicyVector LinearlyCombined(CompressedPolicyVector[] policies, float[] weights)
        {
            Span <float> policyAverages = stackalloc float[EncodedPolicyVector.POLICY_VECTOR_LENGTH];

            // Compute average policy result for this position
            for (int i = 0; i < policies.Length; i++)
            {
                CompressedPolicyVector policy = policies[i];
                foreach ((EncodedMove move, float probability)moveInfo in policy.ProbabilitySummary())
                {
                    if (moveInfo.move.RawValue == SPECIAL_VALUE_RANDOM_NARROW ||
                        moveInfo.move.RawValue == SPECIAL_VALUE_RANDOM_WIDE)
                    {
                        throw new NotImplementedException("Method LinearlyCombined probably not yet supported with random evaluations.");
                    }

                    float thisContribution = weights[i] * moveInfo.probability;
                    policyAverages[moveInfo.move.IndexNeuralNet] += thisContribution;
                }
            }

            CompressedPolicyVector policyRet = default;

            Initialize(ref policyRet, policyAverages, false);
            return(policyRet);
        }
示例#3
0
        /// <summary>
        /// Initializes values (bypassing readonly) with specified set of move indices and probabilities.
        /// </summary>
        /// <param name="policy"></param>
        /// <param name="indices"></param>
        /// <param name="probs"></param>
        public static void Initialize(ref CompressedPolicyVector policy, Span <ushort> indices, Span <ushort> probsEncoded)
        {
            if (indices.Length != probsEncoded.Length)
            {
                throw new ArgumentException("Length of indicies and probabilities must be same");
            }

            float lastProb = float.MaxValue;

            fixed(ushort *moveIndices = &policy.MoveIndex_0)
            {
                fixed(ushort *moveProbabilitiesEncoded = &policy.MoveProbEncoded_0)
                {
                    for (int i = 0; i < indices.Length && i < NUM_MOVE_SLOTS; i++)
                    {
                        if (indices[i] == SPECIAL_VALUE_SENTINEL_TERMINATOR)
                        {
                            break;
                        }

                        moveIndices[i] = indices[i];
                        moveProbabilitiesEncoded[i] = probsEncoded[i];
                        Debug.Assert(DecodedProbability(probsEncoded[i]) <= lastProb);

                        lastProb = probsEncoded[i];
                    }
                }
            }
        }
示例#4
0
 /// <summary>
 /// A special encoding is used to indicate if the policy is desired to be initialized
 /// randomly (for testing purposes).
 ///
 /// The actual probabilities cannot be computed here since we don't yet know the move list,
 /// therefore weput a special value in the array to indicate that this should be expanded in subsequent processing.
 /// </summary>
 /// <param name="wide"></param>
 public static void InitializeAsRandom(ref CompressedPolicyVector policy, bool wide)
 {
     fixed(ushort *moveIndices = &policy.MoveIndex_0)
     {
         // Only need to set first move index
         moveIndices[0] = wide ? SPECIAL_VALUE_RANDOM_WIDE : SPECIAL_VALUE_RANDOM_NARROW;
     }
 }
示例#5
0
        /// <summary>
        /// Compares with another CompressedPolicyVector and returns the
        /// magnitude of the largest abolute difference in policy across all moves.
        /// </summary>
        /// <param name="other"></param>
        /// <returns></returns>
        public float MaxProbDifferenceWith(CompressedPolicyVector other)
        {
            float[] decoded      = DecodedNoValidate;
            float[] otherDecoded = other.DecodedNoValidate;

            float max = 0;

            for (int i = 0; i < EncodedPolicyVector.POLICY_VECTOR_LENGTH; i++)
            {
                float diff = Math.Abs(decoded[i] - otherDecoded[i]);
                if (diff > max)
                {
                    max = diff;
                }
            }
            return(max);
        }
示例#6
0
        /// <summary>
        /// Static method to initialize a CompressedPolicyVector from
        /// a specified array of expanded policy probabilities.
        /// </summary>
        /// <param name="policy"></param>
        /// <param name="probabilities"></param>
        /// <param name="alreadySorted"></param>
        public static void Initialize(ref CompressedPolicyVector policy, float *probabilities, bool alreadySorted)
        {
            float probabilityAcc = 0.0f;
            int   numSlotsUsed   = 0;

            fixed(ushort *moveIndices = &policy.MoveIndex_0)
            {
                fixed(ushort *moveProbabilitiesEncoded = &policy.MoveProbEncoded_0)
                {
                    // Move all the probabilities into our array
                    for (int i = 0; i < EncodedPolicyVector.POLICY_VECTOR_LENGTH; i++)
                    {
                        float thisProb = probabilities[i];
                        probabilityAcc += thisProb;

                        if (probabilities[i] > HALF_INCREMENT)
                        {
                            ushort encodedProb = EncodedProbability(probabilities[i]);
                            if (numSlotsUsed < NUM_MOVE_SLOTS)
                            {
                                moveIndices[numSlotsUsed] = (ushort)i;
                                moveProbabilitiesEncoded[numSlotsUsed] = encodedProb;
                                //Console.WriteLine("direct " + i + " " + probabilities[i]);
                                numSlotsUsed++;
                            }
                            else
                            {
                                // Find smallest index/value
                                int    smallestIndex = -1;
                                ushort smallestValue = ushort.MaxValue;
                                for (int si = 0; si < NUM_MOVE_SLOTS; si++)
                                {
                                    if (moveProbabilitiesEncoded[si] < smallestValue)
                                    {
                                        smallestIndex = si;
                                        smallestValue = moveProbabilitiesEncoded[si];
                                    }
                                }

                                ushort encodedSmallest = EncodedProbability(probabilities[smallestIndex]);
                                if (moveProbabilitiesEncoded[smallestIndex] < encodedProb)
                                {
                                    moveIndices[smallestIndex] = (ushort)i;
                                    moveProbabilitiesEncoded[smallestIndex] = encodedProb;
                                }
                                else
                                {
                                    // just drop it (lost)
                                    //              Console.WriteLine(" drop " + encodedProb + " --> " + thisProb);
                                }
                            }
                        }

                        // Add terminator if not full
                        if (numSlotsUsed < NUM_MOVE_SLOTS)
                        {
                            moveIndices[numSlotsUsed] = SPECIAL_VALUE_SENTINEL_TERMINATOR;
                        }
                    }

                    if (probabilityAcc < 0.995 || probabilityAcc > 1.005)
                    {
                        throw new Exception($"Internal error: NN probabilities sum to { probabilityAcc}");
                    }
                }
            }

            if (!alreadySorted)
            {
                policy.Sort(numSlotsUsed);
            }
        }
示例#7
0
 /// <summary>
 /// Initializes values (bypassing readonly) with specified set of move indices and probabilities.
 /// </summary>
 /// <param name="policy"></param>
 /// <param name="probabilities"></param>
 /// <param name="alreadySorted"></param>
 public static void Initialize(ref CompressedPolicyVector policy, Span <float> probabilities, bool alreadySorted)
 {
     Initialize(ref policy, Fixed(probabilities), alreadySorted);
 }
示例#8
0
        /// <summary>
        /// Initializes values (bypassing readonly) with specified set of move indices and probabilities.
        /// </summary>
        /// <param name="policy"></param>
        /// <param name="indices"></param>
        /// <param name="probs"></param>
        internal static void Initialize(ref CompressedPolicyVector policy,
                                        Span <int> indices, Span <float> probs, bool alreadySorted = true)
        {
            // TODO: the Span<int> can actually be shortend to Span<short>

            if (indices.Length != probs.Length)
            {
                throw new ArgumentException("Length of indicies and probabilities must be same");
            }

            float probabilityAcc = 0.0f;
            float priorProb      = float.MaxValue; // only used in debug mode for verifying in order
            int   numMovesUsed   = 0;

            fixed(ushort *moveIndices = &policy.MoveIndex_0)
            {
                fixed(ushort *moveProbabilitiesEncoded = &policy.MoveProbEncoded_0)
                {
                    // Move all the probabilities into our array
                    for (int i = 0; i < indices.Length && i < NUM_MOVE_SLOTS; i++)
                    {
                        // Get this probability and make sure is in expected sorted order
                        float thisProb = probs[i];
                        Debug.Assert(!alreadySorted || thisProb <= priorProb);

                        // Save index
                        int moveIndex = indices[i];
                        moveIndices[i] = (ushort)moveIndex;

                        // Save compressed probability (unless rounds to zero)
                        ushort encoded = EncodedProbability(thisProb);
                        if (encoded != 0)
                        {
                            numMovesUsed++;
                            moveProbabilitiesEncoded[i] = encoded;
                            probabilityAcc += thisProb;
                        }
                        else
                        {
                            break; // moves are sorted, so we will not see any more above MIN_VALUE
                        }

                        priorProb = thisProb;
                    }

                    if (numMovesUsed < NUM_MOVE_SLOTS)
                    {
                        // Not full. Add terminator.
                        moveIndices[numMovesUsed] = SPECIAL_VALUE_SENTINEL_TERMINATOR;
                    }

                    // Normalize to sum to 1.0
                    float adj = 1.0f / probabilityAcc;

                    for (int i = 0; i < numMovesUsed; i++)
                    {
                        moveProbabilitiesEncoded[i] = EncodedProbability(probs[i] * adj);
                    }
                }

                if (!alreadySorted)
                {
                    policy.Sort(numMovesUsed);
                }
            }
        }