Esempio n. 1
0
        /// <summary>
        ///
        /// TODO: this method similar to one below, try to unify them
        /// </summary>
        /// <param name="nodes"></param>
        /// <param name="results"></param>
        void RetrieveResults(Span <MCTSNode> nodes, IPositionEvaluationBatch results, EvalResultTarget resultTarget)
        {
            for (int i = 0; i < nodes.Length; i++)
            {
                MCTSNode node = nodes[i];

                FP16 winP;
                FP16 lossP;
                FP16 rawM = results.GetM(i);

                // Copy WinP
                FP16 rawWinP = results.GetWinP(i);
                Debug.Assert(!float.IsNaN(rawWinP));

                // Copy LossP
                FP16 rawLossP = results.GetLossP(i);
                Debug.Assert(!float.IsNaN(rawLossP));

                // Assign win and loss probabilities
                // If they look like non-WDL result, try to rewrite them
                // in equivalent way that avoids negative probabilities
                if (rawWinP < 0 && rawLossP == 0)
                {
                    winP  = 0;
                    lossP = -rawWinP;
                }
                else
                {
                    winP  = rawWinP;
                    lossP = rawLossP;
                }

                LeafEvaluationResult evalResult = new LeafEvaluationResult(GameResult.Unknown, winP, lossP, rawM);
                evalResult.PolicyInArray = results.GetPolicy(i);

                // Copy policy
                if (resultTarget == EvalResultTarget.PrimaryEvalResult)
                {
                    node.EvalResult = evalResult;
                }
                else if (resultTarget == EvalResultTarget.SecondaryEvalResult)
                {
                    node.EvalResultSecondary = evalResult;
                }
                else
                {
                    throw new Exception("Internal error: unexpected EvalResultTarget");
                }

                // Save back to cache
                if (SaveToCache)
                {
                    Cache.Store(node.Annotation.PositionHashForCaching,
                                GameResult.Unknown, rawWinP, rawLossP, rawM,
                                in node.EvalResult.PolicyRef);
                }
            }
        }
Esempio n. 2
0
 public static FP16[] ToLogisticsArray(EncodedEvalLogistic[] evals)
 {
     FP16[] ret = new FP16[evals.Length];
     for (int i = 0; i < ret.Length; i++)
     {
         ret[i] = (FP16)evals[i].LogisticValue;
     }
     return(ret);
 }
Esempio n. 3
0
        void RunLocal(Span <MCTSNode> nodes, EvalResultTarget resultTarget)
        {
//      Span<NodeEvaluatorResult> resultSpan;
            const bool RETRIEVE_SUPPLEMENTAL = false;

            IPositionEvaluationBatch result;

            if (localEvaluator.InputsRequired > NNEvaluator.InputTypes.Boards)
            {
                bool hasPositions = localEvaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Positions);
                bool hasHashes    = localEvaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Hashes);
                bool hasMoves     = localEvaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Moves);

                if (hasPositions && Batch.Positions == null)
                {
                    Batch.Positions = new MGPosition[Batch.MaxBatchSize];
                }
                if (hasHashes && Batch.PositionHashes == null)
                {
                    Batch.PositionHashes = new ulong[Batch.MaxBatchSize];
                }
                if (hasMoves && Batch.Moves == null)
                {
                    Batch.Moves = new MGMoveList[Batch.MaxBatchSize];
                }

                for (int i = 0; i < nodes.Length; i++)
                {
                    MCTSNode node = nodes[i];

                    if (hasPositions)
                    {
                        Batch.Positions[i] = node.Annotation.PosMG;
                    }
                    if (hasHashes)
                    {
                        Batch.PositionHashes[i] = node.Annotation.PositionHashForCaching;
                    }
                    if (hasMoves)
                    {
                        Batch.Moves[i] = node.Annotation.Moves;
                    }
                }
            }

            // Note that we call EvaluateBatchIntoBuffers instead of EvaluateBatch for performance reasons
            // (we immediately extract from buffers in RetrieveResults below)
            result = localEvaluator.EvaluateIntoBuffers(Batch, RETRIEVE_SUPPLEMENTAL);
            Debug.Assert(!FP16.IsNaN(result.GetWinP(0)) && !FP16.IsNaN(result.GetLossP(0)));

            RetrieveResults(nodes, result, resultTarget);

            //          if (MCTSParamsFixed.MONITORING) EventSourceCeres.Log.WriteMetric("MCTS.NodeEvaluatorNN_Local.Hit", counterNumHits++);
        }
Esempio n. 4
0
        /// <summary>
        /// Re-initializes from specified values (with no policy).
        /// </summary>
        /// <param name="terminalStatus"></param>
        /// <param name="winP"></param>
        /// <param name="lossP"></param>
        /// <param name="m"></param>
        public void Initialize(GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m)
        {
            Debug.Assert(terminalStatus != GameResult.NotInitialized);

            TerminalStatus = terminalStatus;
            WinP           = winP;
            LossP          = lossP;
            M = m;
            policyArrayIndex = -1;
            policyArray      = null;
            ExtraResults     = null;
        }
Esempio n. 5
0
        /// <summary>
        /// Evaluates a batch.
        /// </summary>
        /// <param name="isWDL"></param>
        /// <param name="positionEncoding"></param>
        /// <param name="numPositionsUsed"></param>
        /// <param name="debuggingDump"></param>
        /// <param name="alreadyConvertedToLZ0"></param>
        /// <returns></returns>
        public ONNXRuntimeExecutorResultBatch Execute(bool isWDL, float[] positionEncoding, int numPositionsUsed,
                                                      bool debuggingDump = false, bool alreadyConvertedToLZ0 = false)
        {
            if (!alreadyConvertedToLZ0)
            {
                if (positionEncoding.Length / BatchSize != (64 * EncodedPositionBatchFlat.TOTAL_NUM_PLANES_ALL_HISTORIES))
                {
                    throw new Exception();
                }

                if (NetType == NetTypeEnum.LC0)
                {
                    positionEncoding = ONNXRuntimeExecutorResultBatch.RebuildInputsForLC0Network(positionEncoding, BatchSize); // Centralize this
                }
                else
                {
                    throw new NotImplementedException();
                }
            }

            // ** NICE DEBUGGING!
            if (debuggingDump)
            {
                EncodedPositionBatchFlat.DumpDecoded(positionEncoding, 112 * 2);
            }

            float[][] eval = executor.Run(positionEncoding, new int[] { numPositionsUsed, 112, 64 });

            const int VALUE_FC_SIZE = 32 * 64;

            int numPlanes = NetType == NetTypeEnum.Ceres ? EncodedPositionBatchFlat.TOTAL_NUM_PLANES_ALL_HISTORIES : 112;

            if (NetType == NetTypeEnum.Ceres)
            {
                throw new NotImplementedException();
                //nRunner = session.GetRunner().AddInput("input_1", inputTensor).Fetch("value_out/Tanh").Fetch("policy_out/Softmax").Fetch("draw_out/Sigmoid");
            }
            else
            {
                FP16[] values = FP16.ToFP16(eval[0]);
                Debug.Assert(values.Length == (isWDL ? 3 : 1) * numPositionsUsed);

                float[] policiesLogistics = eval[1];
                //for (int j = 0; j < policies.Length; j++) policies[j] = (float)Math.Exp(policies[j]);
                //float[] draws = NetType == NetTypeEnum.Ceres ? ExtractFloats(result1[2], BatchSize) : null;
                float[] value_fc_activations = eval.Length < 3 ? null : eval[2];

                ONNXRuntimeExecutorResultBatch result = new ONNXRuntimeExecutorResultBatch(isWDL, values, policiesLogistics, value_fc_activations, numPositionsUsed);
                return(result);
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Constructor from specified values (including policy reference).
        /// </summary>
        /// <param name="terminalStatus"></param>
        /// <param name="winP"></param>
        /// <param name="lossP"></param>
        /// <param name="m"></param>
        /// <param name="policyArray"></param>
        /// <param name="policyArrayIndex"></param>
        public LeafEvaluationResult(GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m, Memory <CompressedPolicyVector> policyArray, short policyArrayIndex)
        {
            Debug.Assert(terminalStatus != GameResult.NotInitialized);

            TerminalStatus = terminalStatus;
            WinP           = winP;
            LossP          = lossP;
            M = m;

            this.policyArrayIndex = policyArrayIndex;
            this.policyArray      = policyArray;

            ExtraResults = null;
        }
Esempio n. 7
0
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            TimingStats timingStats = new TimingStats();

            using (new TimingBlock("EvalBatch", timingStats, TimingBlock.LoggingType.None))
            {
                CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];

                FP16[] w = new FP16[positions.NumPos];
                FP16[] l = IsWDL ? new FP16[positions.NumPos] : null;
                FP16[] m = IsWDL ? new FP16[positions.NumPos] : null;

                for (int i = 0; i < positions.NumPos; i++)
                {
                    int hashPos = HashInRange(positions.PosPlaneBitmaps, i * EncodedPositionWithHistory.NUM_PLANES_TOTAL, EncodedPositionWithHistory.NUM_PLANES_TOTAL);
                    hashPos = (Math.Abs(hashPos)) ^ 172854;

                    // Generate value
                    if (IsWDL)
                    {
                        GenerateRandValue(hashPos, ref w[i], ref l[i]);
                        m[i] = 30 + i % 7;
                    }
                    else
                    {
                        FP16 dummyL = 0;
                        GenerateRandValue(hashPos, ref w[i], ref dummyL);
                    }

                    // Initialize policies. Mark them as requests to be random
                    // (the actual randomization will be done during search, when we have the set of legal moves handy)
                    // TODO: if the batch also contains Positions already, we could do the assignment now
                    CompressedPolicyVector.InitializeAsRandom(ref policies[i], Type == RandomType.WidePolicy);
                }

                if (retrieveSupplementalResults)
                {
                    throw new NotImplementedException();
                }
                float[] supplemental = null;

                return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, supplemental, timingStats));
            }
        }
Esempio n. 8
0
        /// <summary>
        /// Initializes the win/loss array with speicifed values from the value head.
        /// </summary>
        /// <param name="valueEvals"></param>
        /// <param name="valsAreLogistic"></param>
        void InitializeValueEvals(Span <FP16> valueEvals, bool valsAreLogistic)
        {
            if (IsWDL)
            {
                FP16[] w = new FP16[NumPos];
                FP16[] l = new FP16[NumPos];

                W = w;
                L = l;

                for (int i = 0; i < NumPos; i++)
                {
                    if (!valsAreLogistic)
                    {
                        w[i] = valueEvals[i * 3 + 0];
                        l[i] = valueEvals[i * 3 + 2];
                        Debug.Assert(Math.Abs(100 - valueEvals[i * 3 + 0] + valueEvals[i * 3 + 1] + valueEvals[i * 3 + 2]) <= 0.001);
                    }
                    else
                    {
                        // NOTE: Use min with 20 to deal with excessively large values (that would go to infinity)
                        double v1 = Math.Exp(Math.Min(20, valueEvals[i * 3 + 0]));
                        double v2 = Math.Exp(Math.Min(20, valueEvals[i * 3 + 1]));
                        double v3 = Math.Exp(Math.Min(20, valueEvals[i * 3 + 2]));

                        double totl = v1 + v2 + v3;
                        Debug.Assert(!double.IsNaN(totl));

                        w[i] = (FP16)(v1 / totl);
                        l[i] = (FP16)(v3 / totl);
                    }
                }
            }
            else
            {
                W = valueEvals.ToArray();
            }
        }
Esempio n. 9
0
        /// <summary>
        /// Generates a pseudorandom value score in range [-1.0, 1.0]
        /// </summary>
        /// <param name="hashPos"></param>
        /// <param name="w">win probability</param>
        /// <param name="l">loss probability (only if WDL)</param>
        void GenerateRandValue(int hashPos, ref FP16 w, ref FP16 l)
        {
            const int DIV1 = 1826743;
            float     q    = Math.Abs(((float)(hashPos % DIV1)) / (float)DIV1);

            // Force the q to typically have values closer to 0 by squaring
            q *= q;

            if (IsWDL)
            {
                const int DIV2            = 782743;
                float     hashPosInRange2 = Math.Abs(((float)(hashPos % DIV2)) / (float)DIV2);

                w = (FP16)q;
                float maxD = 1.0f - Math.Abs(q);
                float d    = hashPosInRange2 * maxD;
                l = (FP16)(1.0f - w - d);
            }
            else
            {
                w = (FP16)q;
                l = 0;
            }
        }
Esempio n. 10
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="firstMoveSampler"></param>
 public MCTSApply(MultinomialBayesianThompsonSampler firstMoveSampler)
 {
     FirstMoveSampler = firstMoveSampler;
     MAX_M            = 250;
 }
Esempio n. 11
0
 public PositionEvalCacheEntry(GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m, in CompressedPolicyVector policy)
Esempio n. 12
0
        /// <summary>
        /// Implementation of virtual method to actually evaluate the batch.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            if (retrieveSupplementalResults)
            {
                throw new NotImplementedException();
            }

            if (positions.NumPos <= MinSplitSize)
            {
                // Too small to profitably split across multiple devices
                return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults));
            }
            else
            {
                // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice
                //       and construct custom WFEvaluationBatch which are just using approrpiate Memory slices
                //       Need to create a new constructor for WFEvaluationBatch
                IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length];

                List <Task> tasks         = new List <Task>();
                int[]       subBatchSizes = new int[Evaluators.Length];
                for (int i = 0; i < Evaluators.Length; i++)
                {
                    int capI = i;
                    IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI);
                    subBatchSizes[capI] = thisSubBatch.NumPos;
                    tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults)));
                }
                Task.WaitAll(tasks.ToArray());

                if (UseMergedBatch)
                {
                    return(new PositionsEvaluationBatchMerged(results, subBatchSizes));
                }
                else
                {
                    CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];
                    FP16[] w = new FP16[positions.NumPos];
                    FP16[] l = new FP16[positions.NumPos];
                    FP16[] m = new FP16[positions.NumPos];

                    bool isWDL = results[0].IsWDL;
                    bool hasM  = results[0].HasM;

                    int nextPosIndex = 0;
                    for (int i = 0; i < Evaluators.Length; i++)
                    {
                        PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i];
                        int thisNumPos = resultI.NumPos;

                        resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos));
                        resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos));

                        if (isWDL)
                        {
                            resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos));
                            resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos));
                        }

                        nextPosIndex += thisNumPos;
                    }

                    TimingStats stats = new TimingStats();
                    return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats));
                }
            }
Esempio n. 13
0
 /// <summary>
 ///
 /// Note that it is possible and acceptable that the specified entry might already exist.
 /// For example, when processing a large batch there might be tranpositions resulting in
 /// multiple nodes having same position. It is harmless to store two or more times.
 /// </summary>
 /// <param name="hash"></param>
 /// <param name="value"></param>
 /// <param name="policy"></param>
 public void Store(ulong hash, GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m, in CompressedPolicyVector policy)