Ejemplo n.º 1
0
        /// <summary>
        /// Returns a CompressedPolicyVector array with the policy vectors
        /// extracted from all the positions in this batch.
        /// </summary>
        /// <param name="numPos"></param>
        /// <param name="topK"></param>
        /// <param name="indices"></param>
        /// <param name="probabilities"></param>
        /// <param name="probType"></param>
        /// <returns></returns>
        static CompressedPolicyVector[] ExtractPoliciesTopK(int numPos, int topK, Span <int> indices, Span <float> probabilities, PolicyType probType)
        {
            if (probType == PolicyType.LogProbabilities)
            {
                throw new NotImplementedException();
                for (int i = 0; i < indices.Length; i++)
                {
                    probabilities[i] = MathF.Exp(probabilities[i]);
                }
            }

            if (indices == null && probabilities == null)
            {
                return(null);
            }
            if (probabilities.Length != indices.Length)
            {
                throw new ArgumentException("Indices and probabilties expected to be same length");
            }

            CompressedPolicyVector[] retPolicies = new CompressedPolicyVector[numPos];

            int offset = 0;

            for (int i = 0; i < numPos; i++)
            {
                CompressedPolicyVector.Initialize(ref retPolicies[i], indices.Slice(offset, topK), probabilities.Slice(offset, topK));
                offset += topK;
            }

            return(retPolicies);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="winP"></param>
 /// <param name="lossP"></param>
 /// <param name="m"></param>
 /// <param name="policy"></param>
 public NNEvaluatorResult(float winP, float lossP, float m, CompressedPolicyVector policy)
 {
     this.winP  = winP;
     this.lossP = lossP;
     M          = m;
     Policy     = policy;
 }
Ejemplo n.º 3
0
        // --------------------------------------------------------------------------------------------
        static void ProcessNode(PositionEvalCache cache, MCTSNode node, float weightEmpirical,
                                bool saveToCache, bool rewriteNodeInTree)
        {
            Span <MCTSNodeStructChild> children = node.Ref.Children;

            // TODO: optimize this away if saveToCache is false
            ushort[] probabilities = new ushort[node.NumPolicyMoves];
            ushort[] indices       = new ushort[node.NumPolicyMoves];

            // Compute empirical visit distribution
            float[] nodeFractions = new float[node.NumPolicyMoves];
            for (int i = 0; i < node.NumChildrenExpanded; i++)
            {
                nodeFractions[i] = (float)node.ChildAtIndex(i).N / (float)node.N;
            }

            // Determine P of first unexpanded node
            // We can't allow any child to have a new P less than this
            // since we need to keep them in order by P and the resorting logic below
            // can only operate over expanded nodes
            float minP = 0;

            if (node.NumChildrenExpanded < node.NumPolicyMoves)
            {
                minP = node.ChildAtIndexInfo(node.NumChildrenExpanded).p;
            }

            // Add each move to the policy vector with blend of prior and empirical values
            for (int i = 0; i < node.NumChildrenExpanded; i++)
            {
                (MCTSNode node, EncodedMove move, FP16 p)info = node.ChildAtIndexInfo(i);
                indices[i] = (ushort)info.move.IndexNeuralNet;

                float newValue = (1.0f - weightEmpirical) * info.p
                                 + weightEmpirical * nodeFractions[i];
                if (newValue < minP)
                {
                    newValue = minP;
                }
                probabilities[i] = CompressedPolicyVector.EncodedProbability(newValue);

                if (rewriteNodeInTree && weightEmpirical != 0)
                {
                    MCTSNodeStructChild thisChild = children[i];
                    if (thisChild.IsExpanded)
                    {
                        ref MCTSNodeStruct childNodeRef = ref thisChild.ChildRef;
                        thisChild.ChildRef.P = (FP16)newValue;
                    }
                    else
                    {
                        node.Ref.ChildAtIndex(i).SetUnexpandedPolicyValues(thisChild.Move, (FP16)newValue);
                    }
                }
            }
        internal static void Repack(MCTSNode node, ref CompressedPolicyVector policy)
        {
            Span <ushort> indicies      = stackalloc ushort[node.NumPolicyMoves];
            Span <ushort> probabilities = stackalloc ushort[node.NumPolicyMoves];

            for (int i = 0; i < node.NumPolicyMoves; i++)
            {
                ref MCTSNodeStructChild childRef = ref node.ChildAtIndexRef(i);
                indicies[i]      = (ushort)childRef.Move.IndexNeuralNet;
                probabilities[i] = CompressedPolicyVector.EncodedProbability(childRef.P);
            }
Ejemplo n.º 5
0
        protected override LeafEvaluationResult DoTryEvaluate(MCTSNode node)
        {
            VerifyCompatibleNetworkDefinition(node);

            if (OtherContext.Tree.TranspositionRoots.TryGetValue(node.Ref.ZobristHash, out int nodeIndex))
            {
                using (new SearchContextExecutionBlock(OtherContext))
                {
                    ref MCTSNodeStruct       otherNodeRef = ref OtherContext.Tree.Store.Nodes.nodes[nodeIndex];
                    CompressedPolicyVector[] cpvArray     = new CompressedPolicyVector[1];

                    if (otherNodeRef.Terminal != Chess.GameResult.Unknown)
                    {
                        NumMisses++;
                        return(default);
Ejemplo n.º 6
0
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            TimingStats timingStats = new TimingStats();

            using (new TimingBlock("EvalBatch", timingStats, TimingBlock.LoggingType.None))
            {
                CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];

                FP16[] w = new FP16[positions.NumPos];
                FP16[] l = IsWDL ? new FP16[positions.NumPos] : null;
                FP16[] m = IsWDL ? new FP16[positions.NumPos] : null;

                for (int i = 0; i < positions.NumPos; i++)
                {
                    int hashPos = HashInRange(positions.PosPlaneBitmaps, i * EncodedPositionWithHistory.NUM_PLANES_TOTAL, EncodedPositionWithHistory.NUM_PLANES_TOTAL);
                    hashPos = (Math.Abs(hashPos)) ^ 172854;

                    // Generate value
                    if (IsWDL)
                    {
                        GenerateRandValue(hashPos, ref w[i], ref l[i]);
                        m[i] = 30 + i % 7;
                    }
                    else
                    {
                        FP16 dummyL = 0;
                        GenerateRandValue(hashPos, ref w[i], ref dummyL);
                    }

                    // Initialize policies. Mark them as requests to be random
                    // (the actual randomization will be done during search, when we have the set of legal moves handy)
                    // TODO: if the batch also contains Positions already, we could do the assignment now
                    CompressedPolicyVector.InitializeAsRandom(ref policies[i], Type == RandomType.WidePolicy);
                }

                if (retrieveSupplementalResults)
                {
                    throw new NotImplementedException();
                }
                float[] supplemental = null;

                return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, supplemental, timingStats));
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Evaluates specified batch into internal buffers.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            IPositionEvaluationBatch result = base.EvaluateIntoBuffers(positions, retrieveSupplementalResults);

            int   numVOK        = 0;
            int   numPolicyOK   = 0;
            float maxPolicyDiff = 0;

            for (int i = 0; i < positions.NumPos; i++)
            {
                float v0 = subResults[0].GetWinP(i) - subResults[0].GetLossP(i);
                float v1 = subResults[1].GetWinP(i) - subResults[1].GetLossP(i);

                // Check W/D/L
                if (MathF.Abs(v0 - v1) > 0.02)
                {
                    Console.WriteLine($"WFEvalNetCompare V discrepancy: {i,6:F0} {v0,7:F3} {v1,7:F3}");
                }
                else
                {
                    numVOK++;
                }

                (Memory <CompressedPolicyVector> policiesArray0, int policyIndex0) = subResults[0].GetPolicy(i);
                CompressedPolicyVector thesePolicies0 = policiesArray0.Span[policyIndex0];
                (Memory <CompressedPolicyVector> policiesArray1, int policyIndex1) = subResults[1].GetPolicy(i);
                CompressedPolicyVector thesePolicies1 = policiesArray1.Span[policyIndex1];

                float[] policies0 = thesePolicies0.DecodedAndNormalized;
                float[] policies1 = thesePolicies1.DecodedAndNormalized;
                float   maxDiff   = 0;

                for (int p = 0; p < policies0.Length; p++)
                {
                    float diff      = MathF.Abs(policies0[p] - policies1[p]);
                    float tolerance = Math.Max(0.03f, 0.07f * MathF.Abs(policies0[p] + policies1[p] * 0.5f));
                    if (diff > maxDiff && (diff > tolerance))
                    {
                        if (maxDiff == 0)
                        {
                            Console.WriteLine("WFEvalNetCompare policy discrepancies:");
                        }
                        maxDiff = policies0[p] - policies1[p];
                        Console.WriteLine($"  {p,6} {policies0[p], 6:F3} { policies1[p], 6:F3}");
                    }
                }

                if (maxDiff == 0)
                {
                    numPolicyOK++;
                }
                else if (maxDiff > maxPolicyDiff)
                {
                    maxPolicyDiff = maxDiff;
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine();
                Console.WriteLine($"{numVOK} of {positions.NumPos} had approximately equal W/D/L scores between the first two WFEvalNetCompare");
                Console.WriteLine($"{numPolicyOK} of {positions.NumPos} had all policies good, worse significant difference {maxPolicyDiff}");
            }

            return(result);
        }
Ejemplo n.º 8
0
        static CompressedPolicyVector[] ExtractPoliciesBufferFlat(int numPos, float[] policyProbs, PolicyType probType, bool alreadySorted)
        {
            // TODO: possibly needs work.
            // Do we handle WDL correctly? Do we flip the moves if we are black (using positions) ?

            if (policyProbs == null)
            {
                return(null);
            }
            if (policyProbs.Length != EncodedPolicyVector.POLICY_VECTOR_LENGTH * numPos)
            {
                throw new ArgumentException("Wrong policy size");
            }

            CompressedPolicyVector[] retPolicies = new CompressedPolicyVector[numPos];
            if (policyProbs.Length != EncodedPolicyVector.POLICY_VECTOR_LENGTH * numPos)
            {
                throw new ArgumentException("Wrong policy size");
            }

            float[] buffer = new float[EncodedPolicyVector.POLICY_VECTOR_LENGTH];
            for (int i = 0; i < numPos; i++)
            {
                int startIndex = EncodedPolicyVector.POLICY_VECTOR_LENGTH * i;
                if (probType == PolicyType.Probabilities)
                {
                    Array.Copy(policyProbs, startIndex, buffer, 0, EncodedPolicyVector.POLICY_VECTOR_LENGTH);
                }
                else
                {
                    // Avoid overflow by subtracting off max
                    float max = 0.0f;
                    for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++)
                    {
                        float val = policyProbs[startIndex + j];
                        if (val > max)
                        {
                            max = val;
                        }
                    }

                    for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++)
                    {
                        buffer[j] = (float)Math.Exp(policyProbs[startIndex + j] - max); // TODO: make faster
                    }
                }

                double acc = 0;
                for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++)
                {
                    acc += buffer[j];
                }
                if (acc == 0.0)
                {
                    throw new Exception("Sum of unnormalized probabilities was zero.");
                }

                // As performance optimization, only adjust if significantly different from 1.0
                const float MAX_DEVIATION = 0.001f;
                if (acc < 1.0f - MAX_DEVIATION || acc > 1.0f + MAX_DEVIATION)
                {
                    for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++)
                    {
                        buffer[j] = (float)(buffer[j] / acc);
                    }
                }

                CompressedPolicyVector.Initialize(ref retPolicies[i], buffer, alreadySorted);
            }

            return(retPolicies);
        }
Ejemplo n.º 9
0
        public static void ExtractPolicyVector(float softmaxValue, MCTSNodeStruct nodeRef, ref CompressedPolicyVector policy)
        {
            Span <ushort> indicies      = stackalloc ushort[CompressedPolicyVector.NUM_MOVE_SLOTS];
            Span <ushort> probabilities = stackalloc ushort[CompressedPolicyVector.NUM_MOVE_SLOTS];

            for (int i = 0; i < nodeRef.NumPolicyMoves; i++)
            {
                MCTSNodeStructChild child = nodeRef.ChildAtIndex(i);
                if (child.IsExpanded)
                {
                    ref readonly MCTSNodeStruct childRef = ref child.ChildRef;
Ejemplo n.º 10
0
        /// <summary>
        /// Implementation of virtual method to actually evaluate the batch.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            if (retrieveSupplementalResults)
            {
                throw new NotImplementedException();
            }

            if (positions.NumPos <= MinSplitSize)
            {
                // Too small to profitably split across multiple devices
                return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults));
            }
            else
            {
                // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice
                //       and construct custom WFEvaluationBatch which are just using approrpiate Memory slices
                //       Need to create a new constructor for WFEvaluationBatch
                IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length];

                List <Task> tasks         = new List <Task>();
                int[]       subBatchSizes = new int[Evaluators.Length];
                for (int i = 0; i < Evaluators.Length; i++)
                {
                    int capI = i;
                    IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI);
                    subBatchSizes[capI] = thisSubBatch.NumPos;
                    tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults)));
                }
                Task.WaitAll(tasks.ToArray());

                if (UseMergedBatch)
                {
                    return(new PositionsEvaluationBatchMerged(results, subBatchSizes));
                }
                else
                {
                    CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];
                    FP16[] w = new FP16[positions.NumPos];
                    FP16[] l = new FP16[positions.NumPos];
                    FP16[] m = new FP16[positions.NumPos];

                    bool isWDL = results[0].IsWDL;
                    bool hasM  = results[0].HasM;

                    int nextPosIndex = 0;
                    for (int i = 0; i < Evaluators.Length; i++)
                    {
                        PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i];
                        int thisNumPos = resultI.NumPos;

                        resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos));
                        resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos));

                        if (isWDL)
                        {
                            resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos));
                            resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos));
                        }

                        nextPosIndex += thisNumPos;
                    }

                    TimingStats stats = new TimingStats();
                    return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats));
                }
            }