示例#1
0
        /// <summary>
        ///
        /// TODO: this method similar to one below, try to unify them
        /// </summary>
        /// <param name="nodes"></param>
        /// <param name="results"></param>
        void RetrieveResults(Span <MCTSNode> nodes, IPositionEvaluationBatch results, EvalResultTarget resultTarget)
        {
            for (int i = 0; i < nodes.Length; i++)
            {
                MCTSNode node = nodes[i];

                FP16 winP;
                FP16 lossP;
                FP16 rawM = results.GetM(i);

                // Copy WinP
                FP16 rawWinP = results.GetWinP(i);
                Debug.Assert(!float.IsNaN(rawWinP));

                // Copy LossP
                FP16 rawLossP = results.GetLossP(i);
                Debug.Assert(!float.IsNaN(rawLossP));

                // Assign win and loss probabilities
                // If they look like non-WDL result, try to rewrite them
                // in equivalent way that avoids negative probabilities
                if (rawWinP < 0 && rawLossP == 0)
                {
                    winP  = 0;
                    lossP = -rawWinP;
                }
                else
                {
                    winP  = rawWinP;
                    lossP = rawLossP;
                }

                LeafEvaluationResult evalResult = new LeafEvaluationResult(GameResult.Unknown, winP, lossP, rawM);
                evalResult.PolicyInArray = results.GetPolicy(i);

                // Copy policy
                if (resultTarget == EvalResultTarget.PrimaryEvalResult)
                {
                    node.EvalResult = evalResult;
                }
                else if (resultTarget == EvalResultTarget.SecondaryEvalResult)
                {
                    node.EvalResultSecondary = evalResult;
                }
                else
                {
                    throw new Exception("Internal error: unexpected EvalResultTarget");
                }

                // Save back to cache
                if (SaveToCache)
                {
                    Cache.Store(node.Annotation.PositionHashForCaching,
                                GameResult.Unknown, rawWinP, rawLossP, rawM,
                                in node.EvalResult.PolicyRef);
                }
            }
        }
示例#2
0
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            // Evaluate using next evaluator
            IPositionEvaluationBatch batch = Evaluators[nextIndex++].EvaluateIntoBuffers(positions, retrieveSupplementalResults);

            // Advance to next evaluator for next time
            nextIndex = nextIndex % Evaluators.Length;

            return(batch);
        }
示例#3
0
        /// <summary>
        /// Determines the nodes per second achieved at a specified batch size.
        /// </summary>
        /// <param name="evaluator"></param>
        /// <param name="batchSize"></param>
        /// <param name="latencyAdjustmentSecs"></param>
        /// <returns></returns>
        static float NPSAtBatchSize(NNEvaluator evaluator, int batchSize, float latencyAdjustmentSecs)
        {
            TimingStats statsBig = new TimingStats();
            EncodedPositionBatchFlat positions = MakeTestBatch(evaluator, batchSize);

            using (new TimingBlock(statsBig, TimingBlock.LoggingType.None))
            {
                IPositionEvaluationBatch result = evaluator.EvaluateIntoBuffers(positions, false);
            }

            float npsBatchBig = batchSize / ((float)statsBig.ElapsedTimeSecs - latencyAdjustmentSecs);

            return(npsBatchBig);
        }
示例#4
0
        /// <summary>
        /// Processes the current set of batches by:
        ///   - aggregating them into one big batch
        ///   - evaluating that big batch all at once
        ///   - disaggregating the returned evaluations into sub-batch-results
        /// </summary>
        /// <param name="evaluator"></param>
        /// <param name="retrieveSupplementalResults"></param>
        internal void ProcessPooledBatch(NNEvaluator evaluator, bool retrieveSupplementalResults)
        {
            // Combine together the pending batches.
            IEncodedPositionBatchFlat fullBatch = null;

            if (pendingBatches.Count == 1)
            {
                // Handle the special and easy case of exactly one batch.
                fullBatch = pendingBatches[0];
            }
            else
            {
                fullBatch = AggregateBatches();
            }

            // Evaluate the big batch
            IPositionEvaluationBatch fullBatchResult = evaluator.EvaluateIntoBuffers(fullBatch, retrieveSupplementalResults);
            PositionEvaluationBatch  batchDirect     = (PositionEvaluationBatch)fullBatchResult;

            completedBatches = DisaggregateBatches(retrieveSupplementalResults, batchDirect, pendingBatches);
        }
示例#5
0
        /// <summary>
        /// Evaluates specified batch into internal buffers.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            IPositionEvaluationBatch result = base.EvaluateIntoBuffers(positions, retrieveSupplementalResults);

            int   numVOK        = 0;
            int   numPolicyOK   = 0;
            float maxPolicyDiff = 0;

            for (int i = 0; i < positions.NumPos; i++)
            {
                float v0 = subResults[0].GetWinP(i) - subResults[0].GetLossP(i);
                float v1 = subResults[1].GetWinP(i) - subResults[1].GetLossP(i);

                // Check W/D/L
                if (MathF.Abs(v0 - v1) > 0.02)
                {
                    Console.WriteLine($"WFEvalNetCompare V discrepancy: {i,6:F0} {v0,7:F3} {v1,7:F3}");
                }
                else
                {
                    numVOK++;
                }

                (Memory <CompressedPolicyVector> policiesArray0, int policyIndex0) = subResults[0].GetPolicy(i);
                CompressedPolicyVector thesePolicies0 = policiesArray0.Span[policyIndex0];
                (Memory <CompressedPolicyVector> policiesArray1, int policyIndex1) = subResults[1].GetPolicy(i);
                CompressedPolicyVector thesePolicies1 = policiesArray1.Span[policyIndex1];

                float[] policies0 = thesePolicies0.DecodedAndNormalized;
                float[] policies1 = thesePolicies1.DecodedAndNormalized;
                float   maxDiff   = 0;

                for (int p = 0; p < policies0.Length; p++)
                {
                    float diff      = MathF.Abs(policies0[p] - policies1[p]);
                    float tolerance = Math.Max(0.03f, 0.07f * MathF.Abs(policies0[p] + policies1[p] * 0.5f));
                    if (diff > maxDiff && (diff > tolerance))
                    {
                        if (maxDiff == 0)
                        {
                            Console.WriteLine("WFEvalNetCompare policy discrepancies:");
                        }
                        maxDiff = policies0[p] - policies1[p];
                        Console.WriteLine($"  {p,6} {policies0[p], 6:F3} { policies1[p], 6:F3}");
                    }
                }

                if (maxDiff == 0)
                {
                    numPolicyOK++;
                }
                else if (maxDiff > maxPolicyDiff)
                {
                    maxPolicyDiff = maxDiff;
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine();
                Console.WriteLine($"{numVOK} of {positions.NumPos} had approximately equal W/D/L scores between the first two WFEvalNetCompare");
                Console.WriteLine($"{numPolicyOK} of {positions.NumPos} had all policies good, worse significant difference {maxPolicyDiff}");
            }

            return(result);
        }
示例#6
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="batch"></param>
 /// <param name="index"></param>
 internal NNPositionEvaluationBatchMember(IPositionEvaluationBatch batch, int index)
 {
     this.batch = batch;
     this.index = index;
 }
示例#7
0
        /// <summary>
        /// Implementation of virtual method to actually evaluate the batch.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            if (retrieveSupplementalResults)
            {
                throw new NotImplementedException();
            }

            if (positions.NumPos <= MinSplitSize)
            {
                // Too small to profitably split across multiple devices
                return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults));
            }
            else
            {
                // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice
                //       and construct custom WFEvaluationBatch which are just using approrpiate Memory slices
                //       Need to create a new constructor for WFEvaluationBatch
                IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length];

                List <Task> tasks         = new List <Task>();
                int[]       subBatchSizes = new int[Evaluators.Length];
                for (int i = 0; i < Evaluators.Length; i++)
                {
                    int capI = i;
                    IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI);
                    subBatchSizes[capI] = thisSubBatch.NumPos;
                    tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults)));
                }
                Task.WaitAll(tasks.ToArray());

                if (UseMergedBatch)
                {
                    return(new PositionsEvaluationBatchMerged(results, subBatchSizes));
                }
                else
                {
                    CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];
                    FP16[] w = new FP16[positions.NumPos];
                    FP16[] l = new FP16[positions.NumPos];
                    FP16[] m = new FP16[positions.NumPos];

                    bool isWDL = results[0].IsWDL;
                    bool hasM  = results[0].HasM;

                    int nextPosIndex = 0;
                    for (int i = 0; i < Evaluators.Length; i++)
                    {
                        PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i];
                        int thisNumPos = resultI.NumPos;

                        resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos));
                        resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos));

                        if (isWDL)
                        {
                            resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos));
                            resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos));
                        }

                        nextPosIndex += thisNumPos;
                    }

                    TimingStats stats = new TimingStats();
                    return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats));
                }
            }