public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat batch, bool retrieveSupplementalResults = false) // ** MAKE VIRTUAL { if (RetrieveValueFCActivations != retrieveSupplementalResults) { throw new Exception("Value of parameter " + retrieveSupplementalResults + " does not math constructor configuration"); } return(EvaluateBatch(batch, batch.NumPos, false, retrieveValueFCActivations: retrieveSupplementalResults)); }
PositionEvaluationBatch DoEvaluateBatch(IEncodedPositionBatchFlat batch, int numToProcess, bool verbose = false, bool retrieveValueFCActivations = false) // ** MAKE VIRTUAL { if (batch.NumPos > Config.MaxBatchSize) { throw new Exception($"Requested batch size {batch.NumPos} for TensorRT exceeds specified maximum {Config.MaxBatchSize}"); } if (batch.NumPos == 0) { throw new Exception("Empty batch"); } NNEvaluatorStats.UpdateStatsForBatch(GPUID, numToProcess); #if NOT if (Config.GPUID != TRT_GPU) { if (TRT_GPU != -1) { throw new NotImplementedException("Implementation restriction: DLL TRTRun call not multithreaded"); } TRT_GPU = Config.GPUID; } #endif if (numToProcess <= 0) { throw new ArgumentOutOfRangeException($"numToProcess must be greater than zero {numToProcess}"); } if (numToProcess > 2048) { throw new ArgumentOutOfRangeException("TensorRT engines are unlikely to be able to process >2048 positions"); } //LZTrainingPositionServerBatch batchCalib = null; // no longer used LZTrainingPositionServerBatch.GenBatchFromPositions(CalibPositions); int numToProcessPadded = (int)MathUtils.RoundedUp(numToProcess, PADDING_ALIGN); //TimingStats stats = new TimingStats(); //using (new TimingBlock(stats, TimingBlock.LoggingType.None)) slow { float[] floatsCalib = null; // batchCalib.EncodedPosExpandedAsFloats; float[] lz0FloatsCalib = null; // ChessNetTFExecutor.RebuildInputsForLZ0Network(floatsCalib, numToProcess); const bool USE_TOP_K = true; // !USE_TRT713; Span <float> rawResultsPolicy = USE_TOP_K ? stackalloc float[Config.MaxBatchSize * NUM_TOPK_POLICY * 2] // one 4 byte entry for each index (as int 8), one 4 byte entry for each probability : new float[numToProcessPadded * EncodedPolicyVector.POLICY_VECTOR_LENGTH]; int NUM_VALUE_OUTPUTS = (Config.IsWDL ? 3 : 1); Span <FP16> results = stackalloc FP16[numToProcessPadded * NUM_VALUE_OUTPUTS]; if (retrieveValueFCActivations) { throw new Exception("The ONNX version of our TensorRT library does not expose inner layers"); } float[] rawResultsConvValFlat = retrieveValueFCActivations ? new float[numToProcessPadded * NUM_VALUE_OUTPUTS * 32 * 64] : new float[1]; // Note: can't be null or empty, since we use in fixed statement below #if NOTES NOTE : cudaMallocHost was 1.5 slower than just cudaAlloc for the buffers[]
public PositionEvaluationBatch EvaluateBatch(IEncodedPositionBatchFlat batch, int numToProcess, bool verbose = false, bool retrieveValueFCActivations = false) // ** MAKE VIRTUAL { // Serialize access since executor does not support parallel operations lock (sessionActiveLocks[SessionID]) { return(DoEvaluateBatch(batch, numToProcess, verbose, retrieveValueFCActivations)); } }
public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { // Evaluate using next evaluator IPositionEvaluationBatch batch = Evaluators[nextIndex++].EvaluateIntoBuffers(positions, retrieveSupplementalResults); // Advance to next evaluator for next time nextIndex = nextIndex % Evaluators.Length; return(batch); }
/// <summary> /// Overrides worker method to evaluate a specified batch into internal buffers. /// </summary> /// <param name="batch"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat batch, bool retrieveSupplementalResults = false) { int bufferLength = 112 * batch.NumPos * 64; float[] flatValues = ArrayPool <float> .Shared.Rent(bufferLength); batch.ValuesFlatFromPlanes(flatValues); PositionEvaluationBatch ret = DoEvaluateBatch(flatValues, batch.NumPos, retrieveSupplementalResults); ArrayPool <float> .Shared.Return(flatValues); return(ret); }
/// <summary> /// Implements virtual method to evaluate a specified batch. /// This may block for some time before executing, /// waiting for more additions to be made to the pooled batch. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { if (retrieveSupplementalResults != RetrieveSupplementalResults) { throw new Exception("Internal error: Requested unexpected retrieveSupplementalResults"); } // Launch if the current batch already exceeds threshold number of positions // to avoid overflow and also because there is little benefit to accumulate more. while (true) { lock (lockObj) { int currentPendingPositions = currentPooledBatch.NumPendingPositions; if (currentPendingPositions > DEFAULT_BATCH_THRESHOLD) { Launch(); } else { break; } } } int batchIndex; NNEvaluatorPoolBatch poolBatch; lock (lockObj) { poolBatch = currentPooledBatch;// grab local copy of this since it may change upon next set of batches batchIndex = poolBatch.pendingBatches.Count; poolBatch.pendingBatches.Add(positions); // if (positions.Positions == null) // throw new Exception("missing **********************"); } // Wait until we are signalled that this pooled batch has completed processing poolBatch.batchesDoneEvent.Wait(); Debug.Assert(!float.IsNaN(poolBatch.completedBatches[batchIndex].GetV(0))); // Now that the batch has finished, return just the sub-batch that was requested in this call. return(poolBatch.completedBatches[batchIndex]); }
/// <summary> /// Virtual method that evaluates batch into internal buffers. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { int index; if (DynamicEvaluatorIndexPredicate != null) { index = DynamicEvaluatorIndexPredicate(positions); } else { index = positions.PreferredEvaluatorIndex; } return(Evaluators[index].EvaluateIntoBuffers(positions, retrieveSupplementalResults)); }
public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { TimingStats timingStats = new TimingStats(); using (new TimingBlock("EvalBatch", timingStats, TimingBlock.LoggingType.None)) { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = IsWDL ? new FP16[positions.NumPos] : null; FP16[] m = IsWDL ? new FP16[positions.NumPos] : null; for (int i = 0; i < positions.NumPos; i++) { int hashPos = HashInRange(positions.PosPlaneBitmaps, i * EncodedPositionWithHistory.NUM_PLANES_TOTAL, EncodedPositionWithHistory.NUM_PLANES_TOTAL); hashPos = (Math.Abs(hashPos)) ^ 172854; // Generate value if (IsWDL) { GenerateRandValue(hashPos, ref w[i], ref l[i]); m[i] = 30 + i % 7; } else { FP16 dummyL = 0; GenerateRandValue(hashPos, ref w[i], ref dummyL); } // Initialize policies. Mark them as requests to be random // (the actual randomization will be done during search, when we have the set of legal moves handy) // TODO: if the batch also contains Positions already, we could do the assignment now CompressedPolicyVector.InitializeAsRandom(ref policies[i], Type == RandomType.WidePolicy); } if (retrieveSupplementalResults) { throw new NotImplementedException(); } float[] supplemental = null; return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, supplemental, timingStats)); } }
public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { if (positions.Moves == null) { throw new Exception("NNEvaluatorLC0NNEvaluator requires Moves to be provided"); } if (retrieveSupplementalResults) { throw new NotImplementedException("retrieveSupplementalResults not supported"); } Evaluator.EvaluateNN(positions, positions.Positions); const int NUM_POSITIONS_PER_THREAD = 40; ParallelOptions parallelOptions = ParallelUtils.ParallelOptions(positions.NumPos, NUM_POSITIONS_PER_THREAD); Parallel.For(0, positions.NumPos, parallelOptions, PreparePosition); return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, null, new TimingStats()));; }
/// <summary> /// Processes the current set of batches by: /// - aggregating them into one big batch /// - evaluating that big batch all at once /// - disaggregating the returned evaluations into sub-batch-results /// </summary> /// <param name="evaluator"></param> /// <param name="retrieveSupplementalResults"></param> internal void ProcessPooledBatch(NNEvaluator evaluator, bool retrieveSupplementalResults) { // Combine together the pending batches. IEncodedPositionBatchFlat fullBatch = null; if (pendingBatches.Count == 1) { // Handle the special and easy case of exactly one batch. fullBatch = pendingBatches[0]; } else { fullBatch = AggregateBatches(); } // Evaluate the big batch IPositionEvaluationBatch fullBatchResult = evaluator.EvaluateIntoBuffers(fullBatch, retrieveSupplementalResults); PositionEvaluationBatch batchDirect = (PositionEvaluationBatch)fullBatchResult; completedBatches = DisaggregateBatches(retrieveSupplementalResults, batchDirect, pendingBatches); }
IEncodedPositionBatchFlat GetSubBatch(IEncodedPositionBatchFlat fullBatch, float[] splitFracs, int thisSplitIndex) { float[] cums = ToCumulative(splitFracs); int StartIndex(int i) => (int)(fullBatch.NumPos * cums[i]); int start = StartIndex(thisSplitIndex); int end; bool isLastSplit = thisSplitIndex == splitFracs.Length - 1; if (isLastSplit) { end = fullBatch.NumPos; } else { end = StartIndex(thisSplitIndex + 1); } int length = end - start; return(fullBatch.GetSubBatchSlice(start, length)); }
/// <summary> /// Evaluates batch of positions into the buffers local to this object. /// /// Note that the batch returned is built over the local buffers /// and may be overwritten upon next call to this method. /// /// Therefore this method is intended only for low-level /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public abstract IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false);
/// <summary> /// Evaluates specified batch into internal buffers. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { IPositionEvaluationBatch result = base.EvaluateIntoBuffers(positions, retrieveSupplementalResults); int numVOK = 0; int numPolicyOK = 0; float maxPolicyDiff = 0; for (int i = 0; i < positions.NumPos; i++) { float v0 = subResults[0].GetWinP(i) - subResults[0].GetLossP(i); float v1 = subResults[1].GetWinP(i) - subResults[1].GetLossP(i); // Check W/D/L if (MathF.Abs(v0 - v1) > 0.02) { Console.WriteLine($"WFEvalNetCompare V discrepancy: {i,6:F0} {v0,7:F3} {v1,7:F3}"); } else { numVOK++; } (Memory <CompressedPolicyVector> policiesArray0, int policyIndex0) = subResults[0].GetPolicy(i); CompressedPolicyVector thesePolicies0 = policiesArray0.Span[policyIndex0]; (Memory <CompressedPolicyVector> policiesArray1, int policyIndex1) = subResults[1].GetPolicy(i); CompressedPolicyVector thesePolicies1 = policiesArray1.Span[policyIndex1]; float[] policies0 = thesePolicies0.DecodedAndNormalized; float[] policies1 = thesePolicies1.DecodedAndNormalized; float maxDiff = 0; for (int p = 0; p < policies0.Length; p++) { float diff = MathF.Abs(policies0[p] - policies1[p]); float tolerance = Math.Max(0.03f, 0.07f * MathF.Abs(policies0[p] + policies1[p] * 0.5f)); if (diff > maxDiff && (diff > tolerance)) { if (maxDiff == 0) { Console.WriteLine("WFEvalNetCompare policy discrepancies:"); } maxDiff = policies0[p] - policies1[p]; Console.WriteLine($" {p,6} {policies0[p], 6:F3} { policies1[p], 6:F3}"); } } if (maxDiff == 0) { numPolicyOK++; } else if (maxDiff > maxPolicyDiff) { maxPolicyDiff = maxDiff; } } if (VERBOSE) { Console.WriteLine(); Console.WriteLine($"{numVOK} of {positions.NumPos} had approximately equal W/D/L scores between the first two WFEvalNetCompare"); Console.WriteLine($"{numPolicyOK} of {positions.NumPos} had all policies good, worse significant difference {maxPolicyDiff}"); } return(result); }
/// <summary> /// Implementation of virtual method to actually evaluate the batch. /// </summary> /// <param name="positions"></param> /// <param name="retrieveSupplementalResults"></param> /// <returns></returns> public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false) { if (retrieveSupplementalResults) { throw new NotImplementedException(); } if (positions.NumPos <= MinSplitSize) { // Too small to profitably split across multiple devices return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults)); } else { // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice // and construct custom WFEvaluationBatch which are just using approrpiate Memory slices // Need to create a new constructor for WFEvaluationBatch IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length]; List <Task> tasks = new List <Task>(); int[] subBatchSizes = new int[Evaluators.Length]; for (int i = 0; i < Evaluators.Length; i++) { int capI = i; IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI); subBatchSizes[capI] = thisSubBatch.NumPos; tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults))); } Task.WaitAll(tasks.ToArray()); if (UseMergedBatch) { return(new PositionsEvaluationBatchMerged(results, subBatchSizes)); } else { CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos]; FP16[] w = new FP16[positions.NumPos]; FP16[] l = new FP16[positions.NumPos]; FP16[] m = new FP16[positions.NumPos]; bool isWDL = results[0].IsWDL; bool hasM = results[0].HasM; int nextPosIndex = 0; for (int i = 0; i < Evaluators.Length; i++) { PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i]; int thisNumPos = resultI.NumPos; resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos)); resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos)); if (isWDL) { resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos)); resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos)); } nextPosIndex += thisNumPos; } TimingStats stats = new TimingStats(); return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats)); } }
/// <summary> /// Constructor which takes a slice from a specified flat batch. /// </summary> /// <param name="parent"></param> /// <param name="startIndex"></param> /// <param name="length"></param> public EncodedPositionBatchFlatSlice(IEncodedPositionBatchFlat parent, int startIndex, int length) { Parent = parent; StartIndex = startIndex; Length = length; }