/// <summary> /// Returns a CompressedPolicyVector array with the policy vectors /// extracted from all the positions in this batch. /// </summary> /// <param name="numPos"></param> /// <param name="topK"></param> /// <param name="indices"></param> /// <param name="probabilities"></param> /// <param name="probType"></param> /// <returns></returns> static CompressedPolicyVector[] ExtractPoliciesTopK(int numPos, int topK, Span <int> indices, Span <float> probabilities, PolicyType probType) { if (probType == PolicyType.LogProbabilities) { throw new NotImplementedException(); for (int i = 0; i < indices.Length; i++) { probabilities[i] = MathF.Exp(probabilities[i]); } } if (indices == null && probabilities == null) { return(null); } if (probabilities.Length != indices.Length) { throw new ArgumentException("Indices and probabilties expected to be same length"); } CompressedPolicyVector[] retPolicies = new CompressedPolicyVector[numPos]; int offset = 0; for (int i = 0; i < numPos; i++) { CompressedPolicyVector.Initialize(ref retPolicies[i], indices.Slice(offset, topK), probabilities.Slice(offset, topK)); offset += topK; } return(retPolicies); }
/// <summary> /// Constructor. /// </summary> /// <param name="winP"></param> /// <param name="lossP"></param> /// <param name="m"></param> /// <param name="policy"></param> public NNEvaluatorResult(float winP, float lossP, float m, CompressedPolicyVector policy) { this.winP = winP; this.lossP = lossP; M = m; Policy = policy; }
// -------------------------------------------------------------------------------------------- static void ProcessNode(PositionEvalCache cache, MCTSNode node, float weightEmpirical, bool saveToCache, bool rewriteNodeInTree) { Span <MCTSNodeStructChild> children = node.Ref.Children; // TODO: optimize this away if saveToCache is false ushort[] probabilities = new ushort[node.NumPolicyMoves]; ushort[] indices = new ushort[node.NumPolicyMoves]; // Compute empirical visit distribution float[] nodeFractions = new float[node.NumPolicyMoves]; for (int i = 0; i < node.NumChildrenExpanded; i++) { nodeFractions[i] = (float)node.ChildAtIndex(i).N / (float)node.N; } // Determine P of first unexpanded node // We can't allow any child to have a new P less than this // since we need to keep them in order by P and the resorting logic below // can only operate over expanded nodes float minP = 0; if (node.NumChildrenExpanded < node.NumPolicyMoves) { minP = node.ChildAtIndexInfo(node.NumChildrenExpanded).p; } // Add each move to the policy vector with blend of prior and empirical values for (int i = 0; i < node.NumChildrenExpanded; i++) { (MCTSNode node, EncodedMove move, FP16 p)info = node.ChildAtIndexInfo(i); indices[i] = (ushort)info.move.IndexNeuralNet; float newValue = (1.0f - weightEmpirical) * info.p + weightEmpirical * nodeFractions[i]; if (newValue < minP) { newValue = minP; } probabilities[i] = CompressedPolicyVector.EncodedProbability(newValue); if (rewriteNodeInTree && weightEmpirical != 0) { MCTSNodeStructChild thisChild = children[i]; if (thisChild.IsExpanded) { ref MCTSNodeStruct childNodeRef = ref thisChild.ChildRef; thisChild.ChildRef.P = (FP16)newValue; } else { node.Ref.ChildAtIndex(i).SetUnexpandedPolicyValues(thisChild.Move, (FP16)newValue); } } }
internal static void Repack(MCTSNode node, ref CompressedPolicyVector policy) { Span <ushort> indicies = stackalloc ushort[node.NumPolicyMoves]; Span <ushort> probabilities = stackalloc ushort[node.NumPolicyMoves]; for (int i = 0; i < node.NumPolicyMoves; i++) { ref MCTSNodeStructChild childRef = ref node.ChildAtIndexRef(i); indicies[i] = (ushort)childRef.Move.IndexNeuralNet; probabilities[i] = CompressedPolicyVector.EncodedProbability(childRef.P); }
protected override LeafEvaluationResult DoTryEvaluate(MCTSNode node) { VerifyCompatibleNetworkDefinition(node); if (OtherContext.Tree.TranspositionRoots.TryGetValue(node.Ref.ZobristHash, out int nodeIndex)) { using (new SearchContextExecutionBlock(OtherContext)) { ref MCTSNodeStruct otherNodeRef = ref OtherContext.Tree.Store.Nodes.nodes[nodeIndex]; CompressedPolicyVector[] cpvArray = new CompressedPolicyVector[1]; if (otherNodeRef.Terminal != Chess.GameResult.Unknown) { NumMisses++; return(default);
public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { TimingStats timingStats = new TimingStats(); using (new TimingBlock("EvalBatch", timingStats, TimingBlock.LoggingType.None)) { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = IsWDL ? new FP16[positions.NumPos] : null; FP16[] m = IsWDL ? new FP16[positions.NumPos] : null; for (int i = 0; i < positions.NumPos; i++) { int hashPos = HashInRange(positions.PosPlaneBitmaps, i * EncodedPositionWithHistory.NUM_PLANES_TOTAL, EncodedPositionWithHistory.NUM_PLANES_TOTAL); hashPos = (Math.Abs(hashPos)) ^ 172854; // Generate value if (IsWDL) { GenerateRandValue(hashPos, ref w[i], ref l[i]); m[i] = 30 + i % 7; } else { FP16 dummyL = 0; GenerateRandValue(hashPos, ref w[i], ref dummyL); } // Initialize policies. Mark them as requests to be random // (the actual randomization will be done during search, when we have the set of legal moves handy) // TODO: if the batch also contains Positions already, we could do the assignment now CompressedPolicyVector.InitializeAsRandom(ref policies[i], Type == RandomType.WidePolicy); } if (retrieveSupplementalResults) { throw new NotImplementedException(); } float[] supplemental = null; return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, supplemental, timingStats)); } }
/// <summary> /// Evaluates specified batch into internal buffers. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { IPositionEvaluationBatch result = base.EvaluateIntoBuffers(positions, retrieveSupplementalResults); int numVOK = 0; int numPolicyOK = 0; float maxPolicyDiff = 0; for (int i = 0; i < positions.NumPos; i++) { float v0 = subResults[0].GetWinP(i) - subResults[0].GetLossP(i); float v1 = subResults[1].GetWinP(i) - subResults[1].GetLossP(i); // Check W/D/L if (MathF.Abs(v0 - v1) > 0.02) { Console.WriteLine($"WFEvalNetCompare V discrepancy: {i,6:F0} {v0,7:F3} {v1,7:F3}"); } else { numVOK++; } (Memory <CompressedPolicyVector> policiesArray0, int policyIndex0) = subResults[0].GetPolicy(i); CompressedPolicyVector thesePolicies0 = policiesArray0.Span[policyIndex0]; (Memory <CompressedPolicyVector> policiesArray1, int policyIndex1) = subResults[1].GetPolicy(i); CompressedPolicyVector thesePolicies1 = policiesArray1.Span[policyIndex1]; float[] policies0 = thesePolicies0.DecodedAndNormalized; float[] policies1 = thesePolicies1.DecodedAndNormalized; float maxDiff = 0; for (int p = 0; p < policies0.Length; p++) { float diff = MathF.Abs(policies0[p] - policies1[p]); float tolerance = Math.Max(0.03f, 0.07f * MathF.Abs(policies0[p] + policies1[p] * 0.5f)); if (diff > maxDiff && (diff > tolerance)) { if (maxDiff == 0) { Console.WriteLine("WFEvalNetCompare policy discrepancies:"); } maxDiff = policies0[p] - policies1[p]; Console.WriteLine($" {p,6} {policies0[p], 6:F3} { policies1[p], 6:F3}"); } } if (maxDiff == 0) { numPolicyOK++; } else if (maxDiff > maxPolicyDiff) { maxPolicyDiff = maxDiff; } } if (VERBOSE) { Console.WriteLine(); Console.WriteLine($"{numVOK} of {positions.NumPos} had approximately equal W/D/L scores between the first two WFEvalNetCompare"); Console.WriteLine($"{numPolicyOK} of {positions.NumPos} had all policies good, worse significant difference {maxPolicyDiff}"); } return(result); }
static CompressedPolicyVector[] ExtractPoliciesBufferFlat(int numPos, float[] policyProbs, PolicyType probType, bool alreadySorted) { // TODO: possibly needs work. // Do we handle WDL correctly? Do we flip the moves if we are black (using positions) ? if (policyProbs == null) { return(null); } if (policyProbs.Length != EncodedPolicyVector.POLICY_VECTOR_LENGTH * numPos) { throw new ArgumentException("Wrong policy size"); } CompressedPolicyVector[] retPolicies = new CompressedPolicyVector[numPos]; if (policyProbs.Length != EncodedPolicyVector.POLICY_VECTOR_LENGTH * numPos) { throw new ArgumentException("Wrong policy size"); } float[] buffer = new float[EncodedPolicyVector.POLICY_VECTOR_LENGTH]; for (int i = 0; i < numPos; i++) { int startIndex = EncodedPolicyVector.POLICY_VECTOR_LENGTH * i; if (probType == PolicyType.Probabilities) { Array.Copy(policyProbs, startIndex, buffer, 0, EncodedPolicyVector.POLICY_VECTOR_LENGTH); } else { // Avoid overflow by subtracting off max float max = 0.0f; for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++) { float val = policyProbs[startIndex + j]; if (val > max) { max = val; } } for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++) { buffer[j] = (float)Math.Exp(policyProbs[startIndex + j] - max); // TODO: make faster } } double acc = 0; for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++) { acc += buffer[j]; } if (acc == 0.0) { throw new Exception("Sum of unnormalized probabilities was zero."); } // As performance optimization, only adjust if significantly different from 1.0 const float MAX_DEVIATION = 0.001f; if (acc < 1.0f - MAX_DEVIATION || acc > 1.0f + MAX_DEVIATION) { for (int j = 0; j < EncodedPolicyVector.POLICY_VECTOR_LENGTH; j++) { buffer[j] = (float)(buffer[j] / acc); } } CompressedPolicyVector.Initialize(ref retPolicies[i], buffer, alreadySorted); } return(retPolicies); }
public static void ExtractPolicyVector(float softmaxValue, MCTSNodeStruct nodeRef, ref CompressedPolicyVector policy) { Span <ushort> indicies = stackalloc ushort[CompressedPolicyVector.NUM_MOVE_SLOTS]; Span <ushort> probabilities = stackalloc ushort[CompressedPolicyVector.NUM_MOVE_SLOTS]; for (int i = 0; i < nodeRef.NumPolicyMoves; i++) { MCTSNodeStructChild child = nodeRef.ChildAtIndex(i); if (child.IsExpanded) { ref readonly MCTSNodeStruct childRef = ref child.ChildRef;
/// <summary> /// Implementation of virtual method to actually evaluate the batch. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { if (retrieveSupplementalResults) { throw new NotImplementedException(); } if (positions.NumPos <= MinSplitSize) { // Too small to profitably split across multiple devices return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults)); } else { // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice // and construct custom WFEvaluationBatch which are just using approrpiate Memory slices // Need to create a new constructor for WFEvaluationBatch IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length]; List <Task> tasks = new List <Task>(); int[] subBatchSizes = new int[Evaluators.Length]; for (int i = 0; i < Evaluators.Length; i++) { int capI = i; IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI); subBatchSizes[capI] = thisSubBatch.NumPos; tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults))); } Task.WaitAll(tasks.ToArray()); if (UseMergedBatch) { return(new PositionsEvaluationBatchMerged(results, subBatchSizes)); } else { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = new FP16[positions.NumPos]; FP16[] m = new FP16[positions.NumPos]; bool isWDL = results[0].IsWDL; bool hasM = results[0].HasM; int nextPosIndex = 0; for (int i = 0; i < Evaluators.Length; i++) { PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i]; int thisNumPos = resultI.NumPos; resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos)); resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos)); if (isWDL) { resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos)); resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos)); } nextPosIndex += thisNumPos; } TimingStats stats = new TimingStats(); return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats)); } }