/// <summary> /// Constructs a test batch of specified size. /// </summary> /// <param name="evaluator"></param> /// <param name="count"></param> /// <param name="fen"></param> /// <returns></returns> public static EncodedPositionBatchFlat MakeTestBatch(NNEvaluator evaluator, int count, string fen = null) { EncodedPositionBatchFlat batch; if (fen == null) { fen = Position.StartPosition.FEN; } Position rawPos = Position.FromFEN(fen); MGPosition mgPos = MGPosition.FromPosition(rawPos); EncodedPositionWithHistory position = EncodedPositionWithHistory.FromFEN(fen); EncodedPositionWithHistory[] positions = new EncodedPositionWithHistory[count]; Array.Fill(positions, position); batch = new EncodedPositionBatchFlat(positions, count); bool hasPositions = evaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Positions); bool hasMoves = evaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Moves); bool hasHashes = evaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Hashes); bool hasBoards = evaluator.InputsRequired.HasFlag(NNEvaluator.InputTypes.Boards); if (fen != null) { if (hasPositions) { batch.Positions = new MGPosition[count]; } if (hasHashes) { batch.PositionHashes = new ulong[count]; } if (hasMoves) { batch.Moves = new MGMoveList[count]; } for (int i = 0; i < count; i++) { if (hasPositions) { batch.Positions[i] = MGChessPositionConverter.MGChessPositionFromFEN(fen); } if (hasHashes) { batch.PositionHashes[i] = (ulong)i + (ulong)batch.Positions[i].GetHashCode(); } if (hasMoves) { MGMoveList moves = new MGMoveList(); MGMoveGen.GenerateMoves(in mgPos, moves); batch.Moves[i] = moves; } } } return(batch); }
/// <summary> /// Evaluates a batch. /// </summary> /// <param name="isWDL"></param> /// <param name="positionEncoding"></param> /// <param name="numPositionsUsed"></param> /// <param name="debuggingDump"></param> /// <param name="alreadyConvertedToLZ0"></param> /// <returns></returns> public ONNXRuntimeExecutorResultBatch Execute(bool isWDL, float[] positionEncoding, int numPositionsUsed, bool debuggingDump = false, bool alreadyConvertedToLZ0 = false) { if (!alreadyConvertedToLZ0) { if (positionEncoding.Length / BatchSize != (64 * EncodedPositionBatchFlat.TOTAL_NUM_PLANES_ALL_HISTORIES)) { throw new Exception(); } if (NetType == NetTypeEnum.LC0) { positionEncoding = ONNXRuntimeExecutorResultBatch.RebuildInputsForLC0Network(positionEncoding, BatchSize); // Centralize this } else { throw new NotImplementedException(); } } // ** NICE DEBUGGING! if (debuggingDump) { EncodedPositionBatchFlat.DumpDecoded(positionEncoding, 112 * 2); } float[][] eval = executor.Run(positionEncoding, new int[] { numPositionsUsed, 112, 64 }); const int VALUE_FC_SIZE = 32 * 64; int numPlanes = NetType == NetTypeEnum.Ceres ? EncodedPositionBatchFlat.TOTAL_NUM_PLANES_ALL_HISTORIES : 112; if (NetType == NetTypeEnum.Ceres) { throw new NotImplementedException(); //nRunner = session.GetRunner().AddInput("input_1", inputTensor).Fetch("value_out/Tanh").Fetch("policy_out/Softmax").Fetch("draw_out/Sigmoid"); } else { FP16[] values = FP16.ToFP16(eval[0]); Debug.Assert(values.Length == (isWDL ? 3 : 1) * numPositionsUsed); float[] policiesLogistics = eval[1]; //for (int j = 0; j < policies.Length; j++) policies[j] = (float)Math.Exp(policies[j]); //float[] draws = NetType == NetTypeEnum.Ceres ? ExtractFloats(result1[2], BatchSize) : null; float[] value_fc_activations = eval.Length < 3 ? null : eval[2]; ONNXRuntimeExecutorResultBatch result = new ONNXRuntimeExecutorResultBatch(isWDL, values, policiesLogistics, value_fc_activations, numPositionsUsed); return(result); } }
/// <summary> /// Determines the nodes per second achieved at a specified batch size. /// </summary> /// <param name="evaluator"></param> /// <param name="batchSize"></param> /// <param name="latencyAdjustmentSecs"></param> /// <returns></returns> static float NPSAtBatchSize(NNEvaluator evaluator, int batchSize, float latencyAdjustmentSecs) { TimingStats statsBig = new TimingStats(); EncodedPositionBatchFlat positions = MakeTestBatch(evaluator, batchSize); using (new TimingBlock(statsBig, TimingBlock.LoggingType.None)) { IPositionEvaluationBatch result = evaluator.EvaluateIntoBuffers(positions, false); } float npsBatchBig = batchSize / ((float)statsBig.ElapsedTimeSecs - latencyAdjustmentSecs); return(npsBatchBig); }
/// <summary> /// Constructor for a NN evaluator (either local or remote) with specified parameters. /// </summary> /// <param name="paramsNN"></param> /// <param name="saveToCache"></param> /// <param name="instanceID"></param> /// <param name="lowPriority"></param> public LeafEvaluatorNN(NNEvaluatorDef evaluatorDef, NNEvaluator evaluator, bool saveToCache, bool lowPriority, PositionEvalCache cache, Func <MCTSIterator, int> batchEvaluatorIndexDynamicSelector) { rawPosArray = posArrayPool.Rent(NNEvaluatorDef.MAX_BATCH_SIZE); EvaluatorDef = evaluatorDef; SaveToCache = saveToCache; LowPriority = lowPriority; Cache = cache; this.BatchEvaluatorIndexDynamicSelector = batchEvaluatorIndexDynamicSelector; Batch = new EncodedPositionBatchFlat(EncodedPositionType.PositionOnly, NNEvaluatorDef.MAX_BATCH_SIZE); if (evaluatorDef.Location == NNEvaluatorDef.LocationType.Local) { localEvaluator = evaluator;// isEvaluator1 ? Params.Evaluator1 : Params.Evaluator2; } else { throw new NotImplementedException(); } // TODO: auto-estimate performance #if SOMEDAY for (int i = 0; i < 10; i++) { // using (new TimingBlock("benchmark")) { float[] splits = WFEvalNetBenchmark.GetBigBatchNPSFractions(((WFEvalNetCompound)localEvaluator).Evaluators); Console.WriteLine(splits[0] + " " + splits[1] + " " + splits[2] + " " + splits[3]); (float estNPSSingletons, float estNPSBigBatch) = WFEvalNetBenchmark.EstNPS(localEvaluator); Console.WriteLine(estNPSSingletons + " " + estNPSBigBatch); } } #endif }
/// <summary> /// Aggregates together all batches in the pending set into a single big batch. /// </summary> /// <returns></returns> private IEncodedPositionBatchFlat AggregateBatches() { IEncodedPositionBatchFlat fullBatch; // Concatenate all batches together into one big batch. // TODO: could we allocate these arrays once and then reuse for efficiency? int numPositions = NumPendingPositions; ulong[] posPlaneBitmaps = new ulong[numPositions * EncodedPositionWithHistory.NUM_PLANES_TOTAL]; byte[] posPlaneValuesEncoded = new byte[numPositions * EncodedPositionWithHistory.NUM_PLANES_TOTAL]; bool hasPositions = pendingBatches[0].Positions != null; bool hasMoves = pendingBatches[0].Moves != null; bool hasHashes = pendingBatches[0].PositionHashes != null; MGPosition[] positions = hasPositions ? new MGPosition[numPositions] : null; ulong[] positionHashes = hasHashes ? new ulong[numPositions] : null; MGMoveList[] moves = hasMoves ? new MGMoveList[numPositions] : null; int nextSourceBitmapIndex = 0; int nextSourceValueIndex = 0; int nextPositionIndex = 0; foreach (EncodedPositionBatchFlat thisBatch in pendingBatches) { int skipCount = thisBatch.NumPos * EncodedPositionWithHistory.NUM_PLANES_TOTAL; Array.Copy(thisBatch.PosPlaneBitmaps, 0, posPlaneBitmaps, nextSourceBitmapIndex, skipCount); nextSourceBitmapIndex += skipCount; Array.Copy(thisBatch.PosPlaneValues, 0, posPlaneValuesEncoded, nextSourceValueIndex, skipCount); nextSourceValueIndex += skipCount; if (hasPositions) { Array.Copy(thisBatch.Positions, 0, positions, nextPositionIndex, thisBatch.NumPos); } if (hasHashes) { Array.Copy(thisBatch.PositionHashes, 0, positionHashes, nextPositionIndex, thisBatch.NumPos); } if (hasMoves) { Array.Copy(thisBatch.Moves, 0, moves, nextPositionIndex, thisBatch.NumPos); } nextPositionIndex += thisBatch.NumPos; } fullBatch = new EncodedPositionBatchFlat(posPlaneBitmaps, posPlaneValuesEncoded, null, null, null, numPositions); if (hasPositions) { fullBatch.Positions = positions; } if (hasHashes) { fullBatch.PositionHashes = positionHashes; } if (hasMoves) { fullBatch.Moves = moves; } return(fullBatch); }
/// <summary> /// Estimates performance of evaluating either single positions or batches. /// </summary> /// <param name="evaluator"></param> /// <param name="computeBreaks"></param> /// <param name="bigBatchSize"></param> /// <param name="estimateSingletons"></param> /// <param name="numWarmups"></param> /// <returns></returns> public static (float NPSSingletons, float NPSBigBatch, int[] Breaks) EstNPS(NNEvaluator evaluator, bool computeBreaks = false, int bigBatchSize = 512, bool estimateSingletons = true, int numWarmups = 1) { if (batch1 == null) { batchBig = MakeTestBatch(evaluator, bigBatchSize); batch1 = MakeTestBatch(evaluator, 1); } IPositionEvaluationBatch result; // Run numerous batches to "warm up" the GPU (make sure in full power state). for (int i = 0; i < numWarmups; i++) { for (int j = 0; j < 100; j++) { evaluator.EvaluateIntoBuffers(batch1, false); } result = evaluator.EvaluateIntoBuffers(batchBig, false); for (int j = 0; j < 100; j++) { evaluator.EvaluateIntoBuffers(batch1, false); } } float npsSingletons = float.NaN; if (estimateSingletons) { // Singletons const int NUM_SINGLETONS = 20; TimingStats statsSingletons = new TimingStats(); float accumulatedTimeSingletons = 0; for (int i = 0; i < NUM_SINGLETONS; i++) { using (new TimingBlock(statsSingletons, TimingBlock.LoggingType.None)) { result = evaluator.EvaluateIntoBuffers(batch1, false); accumulatedTimeSingletons += (float)statsSingletons.ElapsedTimeSecs; } } npsSingletons = NUM_SINGLETONS / accumulatedTimeSingletons; } // Big batch TimingStats statsBig = new TimingStats(); using (new TimingBlock(statsBig, TimingBlock.LoggingType.None)) { // To make sure we defeat any possible caching in place, // randomize the batch in some trivial way result = evaluator.EvaluateIntoBuffers(batchBig, false); } float npsBatchBig = bigBatchSize / (float)statsBig.ElapsedTimeSecs; int[] breaks = computeBreaks ? FindBreaks(evaluator, 48, 432, 0) : null; return(npsSingletons, npsBatchBig, breaks); }