/// <summary> /// /// TODO: this method similar to one below, try to unify them /// </summary> /// <param name="nodes"></param> /// <param name="results"></param> void RetrieveResults(Span <MCTSNode> nodes, IPositionEvaluationBatch results, EvalResultTarget resultTarget) { for (int i = 0; i < nodes.Length; i++) { MCTSNode node = nodes[i]; FP16 winP; FP16 lossP; FP16 rawM = results.GetM(i); // Copy WinP FP16 rawWinP = results.GetWinP(i); Debug.Assert(!float.IsNaN(rawWinP)); // Copy LossP FP16 rawLossP = results.GetLossP(i); Debug.Assert(!float.IsNaN(rawLossP)); // Assign win and loss probabilities // If they look like non-WDL result, try to rewrite them // in equivalent way that avoids negative probabilities if (rawWinP < 0 && rawLossP == 0) { winP = 0; lossP = -rawWinP; } else { winP = rawWinP; lossP = rawLossP; } LeafEvaluationResult evalResult = new LeafEvaluationResult(GameResult.Unknown, winP, lossP, rawM); evalResult.PolicyInArray = results.GetPolicy(i); // Copy policy if (resultTarget == EvalResultTarget.PrimaryEvalResult) { node.EvalResult = evalResult; } else if (resultTarget == EvalResultTarget.SecondaryEvalResult) { node.EvalResultSecondary = evalResult; } else { throw new Exception("Internal error: unexpected EvalResultTarget"); } // Save back to cache if (SaveToCache) { Cache.Store(node.Annotation.PositionHashForCaching, GameResult.Unknown, rawWinP, rawLossP, rawM, in node.EvalResult.PolicyRef); } } }
public static FP16[] ToLogisticsArray(EncodedEvalLogistic[] evals) { FP16[] ret = new FP16[evals.Length]; for (int i = 0; i < ret.Length; i++) { ret[i] = (FP16)evals[i].LogisticValue; } return(ret); }
void RunLocal(Span <MCTSNode> nodes, EvalResultTarget resultTarget) { // Span<NodeEvaluatorResult> resultSpan; const bool RETRIEVE_SUPPLEMENTAL = false; IPositionEvaluationBatch result; if (localEvaluator.InputsRequired > NNEvaluator.InputTypes.Boards) { bool hasPositions = localEvaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Positions); bool hasHashes = localEvaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Hashes); bool hasMoves = localEvaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Moves); if (hasPositions && Batch.Positions == null) { Batch.Positions = new MGPosition[Batch.MaxBatchSize]; } if (hasHashes && Batch.PositionHashes == null) { Batch.PositionHashes = new ulong[Batch.MaxBatchSize]; } if (hasMoves && Batch.Moves == null) { Batch.Moves = new MGMoveList[Batch.MaxBatchSize]; } for (int i = 0; i < nodes.Length; i++) { MCTSNode node = nodes[i]; if (hasPositions) { Batch.Positions[i] = node.Annotation.PosMG; } if (hasHashes) { Batch.PositionHashes[i] = node.Annotation.PositionHashForCaching; } if (hasMoves) { Batch.Moves[i] = node.Annotation.Moves; } } } // Note that we call EvaluateBatchIntoBuffers instead of EvaluateBatch for performance reasons // (we immediately extract from buffers in RetrieveResults below) result = localEvaluator.EvaluateIntoBuffers(Batch, RETRIEVE_SUPPLEMENTAL); Debug.Assert(!FP16.IsNaN(result.GetWinP(0)) && !FP16.IsNaN(result.GetLossP(0))); RetrieveResults(nodes, result, resultTarget); // if (MCTSParamsFixed.MONITORING) EventSourceCeres.Log.WriteMetric("MCTS.NodeEvaluatorNN_Local.Hit", counterNumHits++); }
/// <summary> /// Re-initializes from specified values (with no policy). /// </summary> /// <param name="terminalStatus"></param> /// <param name="winP"></param> /// <param name="lossP"></param> /// <param name="m"></param> public void Initialize(GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m) { Debug.Assert(terminalStatus != GameResult.NotInitialized); TerminalStatus = terminalStatus; WinP = winP; LossP = lossP; M = m; policyArrayIndex = -1; policyArray = null; ExtraResults = null; }
/// <summary> /// Evaluates a batch. /// </summary> /// <param name="isWDL"></param> /// <param name="positionEncoding"></param> /// <param name="numPositionsUsed"></param> /// <param name="debuggingDump"></param> /// <param name="alreadyConvertedToLZ0"></param> /// <returns></returns> public ONNXRuntimeExecutorResultBatch Execute(bool isWDL, float[] positionEncoding, int numPositionsUsed, bool debuggingDump = false, bool alreadyConvertedToLZ0 = false) { if (!alreadyConvertedToLZ0) { if (positionEncoding.Length / BatchSize != (64 * EncodedPositionBatchFlat.TOTAL_NUM_PLANES_ALL_HISTORIES)) { throw new Exception(); } if (NetType == NetTypeEnum.LC0) { positionEncoding = ONNXRuntimeExecutorResultBatch.RebuildInputsForLC0Network(positionEncoding, BatchSize); // Centralize this } else { throw new NotImplementedException(); } } // ** NICE DEBUGGING! if (debuggingDump) { EncodedPositionBatchFlat.DumpDecoded(positionEncoding, 112 * 2); } float[][] eval = executor.Run(positionEncoding, new int[] { numPositionsUsed, 112, 64 }); const int VALUE_FC_SIZE = 32 * 64; int numPlanes = NetType == NetTypeEnum.Ceres ? EncodedPositionBatchFlat.TOTAL_NUM_PLANES_ALL_HISTORIES : 112; if (NetType == NetTypeEnum.Ceres) { throw new NotImplementedException(); //nRunner = session.GetRunner().AddInput("input_1", inputTensor).Fetch("value_out/Tanh").Fetch("policy_out/Softmax").Fetch("draw_out/Sigmoid"); } else { FP16[] values = FP16.ToFP16(eval[0]); Debug.Assert(values.Length == (isWDL ? 3 : 1) * numPositionsUsed); float[] policiesLogistics = eval[1]; //for (int j = 0; j < policies.Length; j++) policies[j] = (float)Math.Exp(policies[j]); //float[] draws = NetType == NetTypeEnum.Ceres ? ExtractFloats(result1[2], BatchSize) : null; float[] value_fc_activations = eval.Length < 3 ? null : eval[2]; ONNXRuntimeExecutorResultBatch result = new ONNXRuntimeExecutorResultBatch(isWDL, values, policiesLogistics, value_fc_activations, numPositionsUsed); return(result); } }
/// <summary> /// Constructor from specified values (including policy reference). /// </summary> /// <param name="terminalStatus"></param> /// <param name="winP"></param> /// <param name="lossP"></param> /// <param name="m"></param> /// <param name="policyArray"></param> /// <param name="policyArrayIndex"></param> public LeafEvaluationResult(GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m, Memory <CompressedPolicyVector> policyArray, short policyArrayIndex) { Debug.Assert(terminalStatus != GameResult.NotInitialized); TerminalStatus = terminalStatus; WinP = winP; LossP = lossP; M = m; this.policyArrayIndex = policyArrayIndex; this.policyArray = policyArray; ExtraResults = null; }
public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { TimingStats timingStats = new TimingStats(); using (new TimingBlock("EvalBatch", timingStats, TimingBlock.LoggingType.None)) { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = IsWDL ? new FP16[positions.NumPos] : null; FP16[] m = IsWDL ? new FP16[positions.NumPos] : null; for (int i = 0; i < positions.NumPos; i++) { int hashPos = HashInRange(positions.PosPlaneBitmaps, i * EncodedPositionWithHistory.NUM_PLANES_TOTAL, EncodedPositionWithHistory.NUM_PLANES_TOTAL); hashPos = (Math.Abs(hashPos)) ^ 172854; // Generate value if (IsWDL) { GenerateRandValue(hashPos, ref w[i], ref l[i]); m[i] = 30 + i % 7; } else { FP16 dummyL = 0; GenerateRandValue(hashPos, ref w[i], ref dummyL); } // Initialize policies. Mark them as requests to be random // (the actual randomization will be done during search, when we have the set of legal moves handy) // TODO: if the batch also contains Positions already, we could do the assignment now CompressedPolicyVector.InitializeAsRandom(ref policies[i], Type == RandomType.WidePolicy); } if (retrieveSupplementalResults) { throw new NotImplementedException(); } float[] supplemental = null; return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, supplemental, timingStats)); } }
/// <summary> /// Initializes the win/loss array with speicifed values from the value head. /// </summary> /// <param name="valueEvals"></param> /// <param name="valsAreLogistic"></param> void InitializeValueEvals(Span <FP16> valueEvals, bool valsAreLogistic) { if (IsWDL) { FP16[] w = new FP16[NumPos]; FP16[] l = new FP16[NumPos]; W = w; L = l; for (int i = 0; i < NumPos; i++) { if (!valsAreLogistic) { w[i] = valueEvals[i * 3 + 0]; l[i] = valueEvals[i * 3 + 2]; Debug.Assert(Math.Abs(100 - valueEvals[i * 3 + 0] + valueEvals[i * 3 + 1] + valueEvals[i * 3 + 2]) <= 0.001); } else { // NOTE: Use min with 20 to deal with excessively large values (that would go to infinity) double v1 = Math.Exp(Math.Min(20, valueEvals[i * 3 + 0])); double v2 = Math.Exp(Math.Min(20, valueEvals[i * 3 + 1])); double v3 = Math.Exp(Math.Min(20, valueEvals[i * 3 + 2])); double totl = v1 + v2 + v3; Debug.Assert(!double.IsNaN(totl)); w[i] = (FP16)(v1 / totl); l[i] = (FP16)(v3 / totl); } } } else { W = valueEvals.ToArray(); } }
/// <summary> /// Generates a pseudorandom value score in range [-1.0, 1.0] /// </summary> /// <param name="hashPos"></param> /// <param name="w">win probability</param> /// <param name="l">loss probability (only if WDL)</param> void GenerateRandValue(int hashPos, ref FP16 w, ref FP16 l) { const int DIV1 = 1826743; float q = Math.Abs(((float)(hashPos % DIV1)) / (float)DIV1); // Force the q to typically have values closer to 0 by squaring q *= q; if (IsWDL) { const int DIV2 = 782743; float hashPosInRange2 = Math.Abs(((float)(hashPos % DIV2)) / (float)DIV2); w = (FP16)q; float maxD = 1.0f - Math.Abs(q); float d = hashPosInRange2 * maxD; l = (FP16)(1.0f - w - d); } else { w = (FP16)q; l = 0; } }
/// <summary> /// Constructor. /// </summary> /// <param name="firstMoveSampler"></param> public MCTSApply(MultinomialBayesianThompsonSampler firstMoveSampler) { FirstMoveSampler = firstMoveSampler; MAX_M = 250; }
public PositionEvalCacheEntry(GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m, in CompressedPolicyVector policy)
/// <summary> /// Implementation of virtual method to actually evaluate the batch. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { if (retrieveSupplementalResults) { throw new NotImplementedException(); } if (positions.NumPos <= MinSplitSize) { // Too small to profitably split across multiple devices return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults)); } else { // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice // and construct custom WFEvaluationBatch which are just using approrpiate Memory slices // Need to create a new constructor for WFEvaluationBatch IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length]; List <Task> tasks = new List <Task>(); int[] subBatchSizes = new int[Evaluators.Length]; for (int i = 0; i < Evaluators.Length; i++) { int capI = i; IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI); subBatchSizes[capI] = thisSubBatch.NumPos; tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults))); } Task.WaitAll(tasks.ToArray()); if (UseMergedBatch) { return(new PositionsEvaluationBatchMerged(results, subBatchSizes)); } else { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = new FP16[positions.NumPos]; FP16[] m = new FP16[positions.NumPos]; bool isWDL = results[0].IsWDL; bool hasM = results[0].HasM; int nextPosIndex = 0; for (int i = 0; i < Evaluators.Length; i++) { PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i]; int thisNumPos = resultI.NumPos; resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos)); resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos)); if (isWDL) { resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos)); resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos)); } nextPosIndex += thisNumPos; } TimingStats stats = new TimingStats(); return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats)); } }
/// <summary> /// /// Note that it is possible and acceptable that the specified entry might already exist. /// For example, when processing a large batch there might be tranpositions resulting in /// multiple nodes having same position. It is harmless to store two or more times. /// </summary> /// <param name="hash"></param> /// <param name="value"></param> /// <param name="policy"></param> public void Store(ulong hash, GameResult terminalStatus, FP16 winP, FP16 lossP, FP16 m, in CompressedPolicyVector policy)