/// <summary> /// /// TODO: this method similar to one below, try to unify them /// </summary> /// <param name="nodes"></param> /// <param name="results"></param> void RetrieveResults(Span <MCTSNode> nodes, IPositionEvaluationBatch results, EvalResultTarget resultTarget) { for (int i = 0; i < nodes.Length; i++) { MCTSNode node = nodes[i]; FP16 winP; FP16 lossP; FP16 rawM = results.GetM(i); // Copy WinP FP16 rawWinP = results.GetWinP(i); Debug.Assert(!float.IsNaN(rawWinP)); // Copy LossP FP16 rawLossP = results.GetLossP(i); Debug.Assert(!float.IsNaN(rawLossP)); // Assign win and loss probabilities // If they look like non-WDL result, try to rewrite them // in equivalent way that avoids negative probabilities if (rawWinP < 0 && rawLossP == 0) { winP = 0; lossP = -rawWinP; } else { winP = rawWinP; lossP = rawLossP; } LeafEvaluationResult evalResult = new LeafEvaluationResult(GameResult.Unknown, winP, lossP, rawM); evalResult.PolicyInArray = results.GetPolicy(i); // Copy policy if (resultTarget == EvalResultTarget.PrimaryEvalResult) { node.EvalResult = evalResult; } else if (resultTarget == EvalResultTarget.SecondaryEvalResult) { node.EvalResultSecondary = evalResult; } else { throw new Exception("Internal error: unexpected EvalResultTarget"); } // Save back to cache if (SaveToCache) { Cache.Store(node.Annotation.PositionHashForCaching, GameResult.Unknown, rawWinP, rawLossP, rawM, in node.EvalResult.PolicyRef); } } }
public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { // Evaluate using next evaluator IPositionEvaluationBatch batch = Evaluators[nextIndex++].EvaluateIntoBuffers(positions, retrieveSupplementalResults); // Advance to next evaluator for next time nextIndex = nextIndex % Evaluators.Length; return(batch); }
/// <summary> /// Determines the nodes per second achieved at a specified batch size. /// </summary> /// <param name="evaluator"></param> /// <param name="batchSize"></param> /// <param name="latencyAdjustmentSecs"></param> /// <returns></returns> static float NPSAtBatchSize(NNEvaluator evaluator, int batchSize, float latencyAdjustmentSecs) { TimingStats statsBig = new TimingStats(); EncodedPositionBatchFlat positions = MakeTestBatch(evaluator, batchSize); using (new TimingBlock(statsBig, TimingBlock.LoggingType.None)) { IPositionEvaluationBatch result = evaluator.EvaluateIntoBuffers(positions, false); } float npsBatchBig = batchSize / ((float)statsBig.ElapsedTimeSecs - latencyAdjustmentSecs); return(npsBatchBig); }
/// <summary> /// Processes the current set of batches by: /// - aggregating them into one big batch /// - evaluating that big batch all at once /// - disaggregating the returned evaluations into sub-batch-results /// </summary> /// <param name="evaluator"></param> /// <param name="retrieveSupplementalResults"></param> internal void ProcessPooledBatch(NNEvaluator evaluator, bool retrieveSupplementalResults) { // Combine together the pending batches. IEncodedPositionBatchFlat fullBatch = null; if (pendingBatches.Count == 1) { // Handle the special and easy case of exactly one batch. fullBatch = pendingBatches[0]; } else { fullBatch = AggregateBatches(); } // Evaluate the big batch IPositionEvaluationBatch fullBatchResult = evaluator.EvaluateIntoBuffers(fullBatch, retrieveSupplementalResults); PositionEvaluationBatch batchDirect = (PositionEvaluationBatch)fullBatchResult; completedBatches = DisaggregateBatches(retrieveSupplementalResults, batchDirect, pendingBatches); }
/// <summary> /// Evaluates specified batch into internal buffers. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { IPositionEvaluationBatch result = base.EvaluateIntoBuffers(positions, retrieveSupplementalResults); int numVOK = 0; int numPolicyOK = 0; float maxPolicyDiff = 0; for (int i = 0; i < positions.NumPos; i++) { float v0 = subResults[0].GetWinP(i) - subResults[0].GetLossP(i); float v1 = subResults[1].GetWinP(i) - subResults[1].GetLossP(i); // Check W/D/L if (MathF.Abs(v0 - v1) > 0.02) { Console.WriteLine($"WFEvalNetCompare V discrepancy: {i,6:F0} {v0,7:F3} {v1,7:F3}"); } else { numVOK++; } (Memory <CompressedPolicyVector> policiesArray0, int policyIndex0) = subResults[0].GetPolicy(i); CompressedPolicyVector thesePolicies0 = policiesArray0.Span[policyIndex0]; (Memory <CompressedPolicyVector> policiesArray1, int policyIndex1) = subResults[1].GetPolicy(i); CompressedPolicyVector thesePolicies1 = policiesArray1.Span[policyIndex1]; float[] policies0 = thesePolicies0.DecodedAndNormalized; float[] policies1 = thesePolicies1.DecodedAndNormalized; float maxDiff = 0; for (int p = 0; p < policies0.Length; p++) { float diff = MathF.Abs(policies0[p] - policies1[p]); float tolerance = Math.Max(0.03f, 0.07f * MathF.Abs(policies0[p] + policies1[p] * 0.5f)); if (diff > maxDiff && (diff > tolerance)) { if (maxDiff == 0) { Console.WriteLine("WFEvalNetCompare policy discrepancies:"); } maxDiff = policies0[p] - policies1[p]; Console.WriteLine($" {p,6} {policies0[p], 6:F3} { policies1[p], 6:F3}"); } } if (maxDiff == 0) { numPolicyOK++; } else if (maxDiff > maxPolicyDiff) { maxPolicyDiff = maxDiff; } } if (VERBOSE) { Console.WriteLine(); Console.WriteLine($"{numVOK} of {positions.NumPos} had approximately equal W/D/L scores between the first two WFEvalNetCompare"); Console.WriteLine($"{numPolicyOK} of {positions.NumPos} had all policies good, worse significant difference {maxPolicyDiff}"); } return(result); }
/// <summary> /// Constructor. /// </summary> /// <param name="batch"></param> /// <param name="index"></param> internal NNPositionEvaluationBatchMember(IPositionEvaluationBatch batch, int index) { this.batch = batch; this.index = index; }
/// <summary> /// Implementation of virtual method to actually evaluate the batch. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { if (retrieveSupplementalResults) { throw new NotImplementedException(); } if (positions.NumPos <= MinSplitSize) { // Too small to profitably split across multiple devices return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults)); } else { // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice // and construct custom WFEvaluationBatch which are just using approrpiate Memory slices // Need to create a new constructor for WFEvaluationBatch IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length]; List <Task> tasks = new List <Task>(); int[] subBatchSizes = new int[Evaluators.Length]; for (int i = 0; i < Evaluators.Length; i++) { int capI = i; IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI); subBatchSizes[capI] = thisSubBatch.NumPos; tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults))); } Task.WaitAll(tasks.ToArray()); if (UseMergedBatch) { return(new PositionsEvaluationBatchMerged(results, subBatchSizes)); } else { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = new FP16[positions.NumPos]; FP16[] m = new FP16[positions.NumPos]; bool isWDL = results[0].IsWDL; bool hasM = results[0].HasM; int nextPosIndex = 0; for (int i = 0; i < Evaluators.Length; i++) { PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i]; int thisNumPos = resultI.NumPos; resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos)); resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos)); if (isWDL) { resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos)); resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos)); } nextPosIndex += thisNumPos; } TimingStats stats = new TimingStats(); return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats)); } }