示例#1
0
 public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat batch, bool retrieveSupplementalResults = false) // ** MAKE VIRTUAL
 {
     if (RetrieveValueFCActivations != retrieveSupplementalResults)
     {
         throw new Exception("Value of parameter " + retrieveSupplementalResults + " does not math constructor configuration");
     }
     return(EvaluateBatch(batch, batch.NumPos, false, retrieveValueFCActivations: retrieveSupplementalResults));
 }
示例#2
0
        PositionEvaluationBatch DoEvaluateBatch(IEncodedPositionBatchFlat batch, int numToProcess, bool verbose = false,
                                                bool retrieveValueFCActivations = false) // ** MAKE VIRTUAL
        {
            if (batch.NumPos > Config.MaxBatchSize)
            {
                throw new Exception($"Requested batch size {batch.NumPos} for TensorRT exceeds specified maximum {Config.MaxBatchSize}");
            }
            if (batch.NumPos == 0)
            {
                throw new Exception("Empty batch");
            }

            NNEvaluatorStats.UpdateStatsForBatch(GPUID, numToProcess);

#if NOT
            if (Config.GPUID != TRT_GPU)
            {
                if (TRT_GPU != -1)
                {
                    throw new NotImplementedException("Implementation restriction: DLL TRTRun call not multithreaded");
                }
                TRT_GPU = Config.GPUID;
            }
#endif

            if (numToProcess <= 0)
            {
                throw new ArgumentOutOfRangeException($"numToProcess must be greater than zero {numToProcess}");
            }
            if (numToProcess > 2048)
            {
                throw new ArgumentOutOfRangeException("TensorRT engines are unlikely to be able to process >2048 positions");
            }
            //LZTrainingPositionServerBatch batchCalib = null; // no longer used LZTrainingPositionServerBatch.GenBatchFromPositions(CalibPositions);

            int numToProcessPadded = (int)MathUtils.RoundedUp(numToProcess, PADDING_ALIGN);

            //TimingStats stats = new TimingStats();
            //using (new TimingBlock(stats, TimingBlock.LoggingType.None)) slow
            {
                float[] floatsCalib    = null;                                              // batchCalib.EncodedPosExpandedAsFloats;
                float[] lz0FloatsCalib = null;                                              // ChessNetTFExecutor.RebuildInputsForLZ0Network(floatsCalib, numToProcess);

                const bool   USE_TOP_K        = true;                                       // !USE_TRT713;
                Span <float> rawResultsPolicy =
                    USE_TOP_K ? stackalloc float[Config.MaxBatchSize * NUM_TOPK_POLICY * 2] // one 4 byte entry for each index (as int 8), one 4 byte entry for each probability
                    : new float[numToProcessPadded * EncodedPolicyVector.POLICY_VECTOR_LENGTH];
                int         NUM_VALUE_OUTPUTS = (Config.IsWDL ? 3 : 1);
                Span <FP16> results           = stackalloc FP16[numToProcessPadded * NUM_VALUE_OUTPUTS];

                if (retrieveValueFCActivations)
                {
                    throw new Exception("The ONNX version of our TensorRT library does not expose inner layers");
                }
                float[] rawResultsConvValFlat = retrieveValueFCActivations ? new float[numToProcessPadded * NUM_VALUE_OUTPUTS * 32 * 64] : new float[1]; // Note: can't be null or empty, since we use in fixed statement below

#if NOTES
                NOTE              : cudaMallocHost was 1.5 slower than just cudaAlloc for the buffers[]
示例#3
0
 public PositionEvaluationBatch EvaluateBatch(IEncodedPositionBatchFlat batch, int numToProcess, bool verbose = false,
                                              bool retrieveValueFCActivations = false) // ** MAKE VIRTUAL
 {
     // Serialize access since executor does not support parallel operations
     lock (sessionActiveLocks[SessionID])
     {
         return(DoEvaluateBatch(batch, numToProcess, verbose, retrieveValueFCActivations));
     }
 }
示例#4
0
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            // Evaluate using next evaluator
            IPositionEvaluationBatch batch = Evaluators[nextIndex++].EvaluateIntoBuffers(positions, retrieveSupplementalResults);

            // Advance to next evaluator for next time
            nextIndex = nextIndex % Evaluators.Length;

            return(batch);
        }
示例#5
0
        /// <summary>
        /// Overrides worker method to evaluate a specified batch into internal buffers.
        /// </summary>
        /// <param name="batch"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat batch, bool retrieveSupplementalResults = false)
        {
            int bufferLength = 112 * batch.NumPos * 64;

            float[] flatValues = ArrayPool <float> .Shared.Rent(bufferLength);

            batch.ValuesFlatFromPlanes(flatValues);
            PositionEvaluationBatch ret = DoEvaluateBatch(flatValues, batch.NumPos, retrieveSupplementalResults);

            ArrayPool <float> .Shared.Return(flatValues);

            return(ret);
        }
示例#6
0
        /// <summary>
        /// Implements virtual method to evaluate a specified batch.
        /// This may block for some time before executing,
        /// waiting for more additions to be made to the pooled batch.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            if (retrieveSupplementalResults != RetrieveSupplementalResults)
            {
                throw new Exception("Internal error: Requested unexpected retrieveSupplementalResults");
            }


            // Launch if the current batch already exceeds threshold number of positions
            // to avoid overflow and also because there is little benefit to accumulate more.
            while (true)
            {
                lock (lockObj)
                {
                    int currentPendingPositions = currentPooledBatch.NumPendingPositions;
                    if (currentPendingPositions > DEFAULT_BATCH_THRESHOLD)
                    {
                        Launch();
                    }
                    else
                    {
                        break;
                    }
                }
            }


            int batchIndex;
            NNEvaluatorPoolBatch poolBatch;

            lock (lockObj)
            {
                poolBatch  = currentPooledBatch;// grab local copy of this since it may change upon next set of batches
                batchIndex = poolBatch.pendingBatches.Count;
                poolBatch.pendingBatches.Add(positions);

//        if (positions.Positions == null)
//          throw new Exception("missing **********************");
            }

            // Wait until we are signalled that this pooled batch has completed processing
            poolBatch.batchesDoneEvent.Wait();

            Debug.Assert(!float.IsNaN(poolBatch.completedBatches[batchIndex].GetV(0)));

            // Now that the batch has finished, return just the sub-batch that was requested in this call.
            return(poolBatch.completedBatches[batchIndex]);
        }
示例#7
0
        /// <summary>
        /// Virtual method that evaluates batch into internal buffers.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions,
                                                                     bool retrieveSupplementalResults = false)
        {
            int index;

            if (DynamicEvaluatorIndexPredicate != null)
            {
                index = DynamicEvaluatorIndexPredicate(positions);
            }
            else
            {
                index = positions.PreferredEvaluatorIndex;
            }

            return(Evaluators[index].EvaluateIntoBuffers(positions, retrieveSupplementalResults));
        }
示例#8
0
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            TimingStats timingStats = new TimingStats();

            using (new TimingBlock("EvalBatch", timingStats, TimingBlock.LoggingType.None))
            {
                CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];

                FP16[] w = new FP16[positions.NumPos];
                FP16[] l = IsWDL ? new FP16[positions.NumPos] : null;
                FP16[] m = IsWDL ? new FP16[positions.NumPos] : null;

                for (int i = 0; i < positions.NumPos; i++)
                {
                    int hashPos = HashInRange(positions.PosPlaneBitmaps, i * EncodedPositionWithHistory.NUM_PLANES_TOTAL, EncodedPositionWithHistory.NUM_PLANES_TOTAL);
                    hashPos = (Math.Abs(hashPos)) ^ 172854;

                    // Generate value
                    if (IsWDL)
                    {
                        GenerateRandValue(hashPos, ref w[i], ref l[i]);
                        m[i] = 30 + i % 7;
                    }
                    else
                    {
                        FP16 dummyL = 0;
                        GenerateRandValue(hashPos, ref w[i], ref dummyL);
                    }

                    // Initialize policies. Mark them as requests to be random
                    // (the actual randomization will be done during search, when we have the set of legal moves handy)
                    // TODO: if the batch also contains Positions already, we could do the assignment now
                    CompressedPolicyVector.InitializeAsRandom(ref policies[i], Type == RandomType.WidePolicy);
                }

                if (retrieveSupplementalResults)
                {
                    throw new NotImplementedException();
                }
                float[] supplemental = null;

                return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, supplemental, timingStats));
            }
        }
示例#9
0
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            if (positions.Moves == null)
            {
                throw new Exception("NNEvaluatorLC0NNEvaluator requires Moves to be provided");
            }
            if (retrieveSupplementalResults)
            {
                throw new NotImplementedException("retrieveSupplementalResults not supported");
            }

            Evaluator.EvaluateNN(positions, positions.Positions);

            const int       NUM_POSITIONS_PER_THREAD = 40;
            ParallelOptions parallelOptions          = ParallelUtils.ParallelOptions(positions.NumPos, NUM_POSITIONS_PER_THREAD);

            Parallel.For(0, positions.NumPos, parallelOptions, PreparePosition);

            return(new PositionEvaluationBatch(IsWDL, HasM, positions.NumPos, policies, w, l, m, null, new TimingStats()));;
        }
示例#10
0
        /// <summary>
        /// Processes the current set of batches by:
        ///   - aggregating them into one big batch
        ///   - evaluating that big batch all at once
        ///   - disaggregating the returned evaluations into sub-batch-results
        /// </summary>
        /// <param name="evaluator"></param>
        /// <param name="retrieveSupplementalResults"></param>
        internal void ProcessPooledBatch(NNEvaluator evaluator, bool retrieveSupplementalResults)
        {
            // Combine together the pending batches.
            IEncodedPositionBatchFlat fullBatch = null;

            if (pendingBatches.Count == 1)
            {
                // Handle the special and easy case of exactly one batch.
                fullBatch = pendingBatches[0];
            }
            else
            {
                fullBatch = AggregateBatches();
            }

            // Evaluate the big batch
            IPositionEvaluationBatch fullBatchResult = evaluator.EvaluateIntoBuffers(fullBatch, retrieveSupplementalResults);
            PositionEvaluationBatch  batchDirect     = (PositionEvaluationBatch)fullBatchResult;

            completedBatches = DisaggregateBatches(retrieveSupplementalResults, batchDirect, pendingBatches);
        }
示例#11
0
            IEncodedPositionBatchFlat GetSubBatch(IEncodedPositionBatchFlat fullBatch, float[] splitFracs, int thisSplitIndex)
            {
                float[] cums = ToCumulative(splitFracs);

                int StartIndex(int i) => (int)(fullBatch.NumPos * cums[i]);

                int start = StartIndex(thisSplitIndex);
                int end;

                bool isLastSplit = thisSplitIndex == splitFracs.Length - 1;

                if (isLastSplit)
                {
                    end = fullBatch.NumPos;
                }
                else
                {
                    end = StartIndex(thisSplitIndex + 1);
                }

                int length = end - start;

                return(fullBatch.GetSubBatchSlice(start, length));
            }
示例#12
0
 /// <summary>
 /// Evaluates batch of positions into the buffers local to this object.
 ///
 /// Note that the batch returned is built over the local buffers
 /// and may be overwritten upon next call to this method.
 ///
 /// Therefore this method is intended only for low-level
 /// </summary>
 /// <param name="positions"></param>
 /// <param name="retrieveSupplementalResults"></param>
 /// <returns></returns>
 public abstract IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false);
示例#13
0
        /// <summary>
        /// Evaluates specified batch into internal buffers.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            IPositionEvaluationBatch result = base.EvaluateIntoBuffers(positions, retrieveSupplementalResults);

            int   numVOK        = 0;
            int   numPolicyOK   = 0;
            float maxPolicyDiff = 0;

            for (int i = 0; i < positions.NumPos; i++)
            {
                float v0 = subResults[0].GetWinP(i) - subResults[0].GetLossP(i);
                float v1 = subResults[1].GetWinP(i) - subResults[1].GetLossP(i);

                // Check W/D/L
                if (MathF.Abs(v0 - v1) > 0.02)
                {
                    Console.WriteLine($"WFEvalNetCompare V discrepancy: {i,6:F0} {v0,7:F3} {v1,7:F3}");
                }
                else
                {
                    numVOK++;
                }

                (Memory <CompressedPolicyVector> policiesArray0, int policyIndex0) = subResults[0].GetPolicy(i);
                CompressedPolicyVector thesePolicies0 = policiesArray0.Span[policyIndex0];
                (Memory <CompressedPolicyVector> policiesArray1, int policyIndex1) = subResults[1].GetPolicy(i);
                CompressedPolicyVector thesePolicies1 = policiesArray1.Span[policyIndex1];

                float[] policies0 = thesePolicies0.DecodedAndNormalized;
                float[] policies1 = thesePolicies1.DecodedAndNormalized;
                float   maxDiff   = 0;

                for (int p = 0; p < policies0.Length; p++)
                {
                    float diff      = MathF.Abs(policies0[p] - policies1[p]);
                    float tolerance = Math.Max(0.03f, 0.07f * MathF.Abs(policies0[p] + policies1[p] * 0.5f));
                    if (diff > maxDiff && (diff > tolerance))
                    {
                        if (maxDiff == 0)
                        {
                            Console.WriteLine("WFEvalNetCompare policy discrepancies:");
                        }
                        maxDiff = policies0[p] - policies1[p];
                        Console.WriteLine($"  {p,6} {policies0[p], 6:F3} { policies1[p], 6:F3}");
                    }
                }

                if (maxDiff == 0)
                {
                    numPolicyOK++;
                }
                else if (maxDiff > maxPolicyDiff)
                {
                    maxPolicyDiff = maxDiff;
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine();
                Console.WriteLine($"{numVOK} of {positions.NumPos} had approximately equal W/D/L scores between the first two WFEvalNetCompare");
                Console.WriteLine($"{numPolicyOK} of {positions.NumPos} had all policies good, worse significant difference {maxPolicyDiff}");
            }

            return(result);
        }
示例#14
0
        /// <summary>
        /// Implementation of virtual method to actually evaluate the batch.
        /// </summary>
        /// <param name="positions"></param>
        /// <param name="retrieveSupplementalResults"></param>
        /// <returns></returns>
        public override IPositionEvaluationBatch EvaluateIntoBuffers(IEncodedPositionBatchFlat positions, bool retrieveSupplementalResults = false)
        {
            if (retrieveSupplementalResults)
            {
                throw new NotImplementedException();
            }

            if (positions.NumPos <= MinSplitSize)
            {
                // Too small to profitably split across multiple devices
                return(Evaluators[indexPerferredEvalator].EvaluateIntoBuffers(positions, retrieveSupplementalResults));
            }
            else
            {
                // TODO: someday we could use the idea already used in LZTrainingPositionServerBatchSlice
                //       and construct custom WFEvaluationBatch which are just using approrpiate Memory slices
                //       Need to create a new constructor for WFEvaluationBatch
                IPositionEvaluationBatch[] results = new IPositionEvaluationBatch[Evaluators.Length];

                List <Task> tasks         = new List <Task>();
                int[]       subBatchSizes = new int[Evaluators.Length];
                for (int i = 0; i < Evaluators.Length; i++)
                {
                    int capI = i;
                    IEncodedPositionBatchFlat thisSubBatch = GetSubBatch(positions, PreferredFractions, capI);
                    subBatchSizes[capI] = thisSubBatch.NumPos;
                    tasks.Add(Task.Run(() => results[capI] = Evaluators[capI].EvaluateIntoBuffers(thisSubBatch, retrieveSupplementalResults)));
                }
                Task.WaitAll(tasks.ToArray());

                if (UseMergedBatch)
                {
                    return(new PositionsEvaluationBatchMerged(results, subBatchSizes));
                }
                else
                {
                    CompressedPolicyVector[] policies = new CompressedPolicyVector[positions.NumPos];
                    FP16[] w = new FP16[positions.NumPos];
                    FP16[] l = new FP16[positions.NumPos];
                    FP16[] m = new FP16[positions.NumPos];

                    bool isWDL = results[0].IsWDL;
                    bool hasM  = results[0].HasM;

                    int nextPosIndex = 0;
                    for (int i = 0; i < Evaluators.Length; i++)
                    {
                        PositionEvaluationBatch resultI = (PositionEvaluationBatch)results[i];
                        int thisNumPos = resultI.NumPos;

                        resultI.Policies.CopyTo(new Memory <CompressedPolicyVector>(policies).Slice(nextPosIndex, thisNumPos));
                        resultI.W.CopyTo(new Memory <FP16>(w).Slice(nextPosIndex, thisNumPos));

                        if (isWDL)
                        {
                            resultI.L.CopyTo(new Memory <FP16>(l).Slice(nextPosIndex, thisNumPos));
                            resultI.M.CopyTo(new Memory <FP16>(m).Slice(nextPosIndex, thisNumPos));
                        }

                        nextPosIndex += thisNumPos;
                    }

                    TimingStats stats = new TimingStats();
                    return(new PositionEvaluationBatch(isWDL, hasM, positions.NumPos, policies, w, l, m, null, stats));
                }
            }
示例#15
0
 /// <summary>
 /// Constructor which takes a slice from a specified flat batch.
 /// </summary>
 /// <param name="parent"></param>
 /// <param name="startIndex"></param>
 /// <param name="length"></param>
 public EncodedPositionBatchFlatSlice(IEncodedPositionBatchFlat parent, int startIndex, int length)
 {
     Parent     = parent;
     StartIndex = startIndex;
     Length     = length;
 }