protected void AppendActiveIndices(NnuePosition pos, Span <IndexList2> active) { for (int c = 0; c < 2; c++) { HalfKpAppendActiveIndices(pos, c, ref active[c]); } }
private unsafe void RefreshAccumulator(NnuePosition pos) { var accumulator = pos.Nnue[0].accumulator; Span <IndexList2> activeIndices = stackalloc IndexList2[2]; AppendActiveIndices(pos, activeIndices); for (var color = 0; color < 2; color++) { Array.Copy(_parameters.FeatureTransformer.Biases, accumulator.accumulation[color], kHalfDimensions); for (uint i = 0; i < activeIndices[color].size; i++) { uint index = activeIndices[color].values[i]; uint offset = kHalfDimensions * index; for (uint j = 0; j < kHalfDimensions; j++) { accumulator.accumulation[color][j] += _parameters.FeatureTransformer.Weights[offset + j]; } } } accumulator.computedAccumulation = true; }
private unsafe void TransformAvx2(NnuePosition pos, Span <sbyte> output, Span <uint> outMask) { if (!UpdateAccumulator(pos)) { RefreshAccumulatorAvx2(pos); } var accumulation = pos.Nnue[0].accumulator.accumulation; var perspectives = new int[] { pos.Player, pos.Player ^ 1 }; var outputMaskIndex = 0; for (uint perspective = 0; perspective < 2; perspective++) { var offset = kHalfDimensions * perspective; const uint numChunks = (16 * kHalfDimensions) / SimdWidth; fixed(sbyte *outputPtr = output) fixed(short *accumulationsPtr = accumulation[perspectives[perspective]]) { var outEntry = (Vector256 <sbyte> *) & outputPtr[offset]; for (uint i = 0; i < numChunks / 2; i++) { var s0 = ((Vector256 <short> *)accumulationsPtr)[i * 2]; var s1 = ((Vector256 <short> *)accumulationsPtr)[i * 2 + 1]; outEntry[i] = Avx2.PackSignedSaturate(s0, s1); var x = outEntry[i]; outMask[outputMaskIndex++] = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(outEntry[i], Vector256 <sbyte> .Zero)); } } } }
private unsafe void HalfKpAppendActiveIndices(NnuePosition pos, int c, ref IndexList2 active) { int ksq = pos.Squares[c]; ksq = orient(c, ksq); for (int i = 2; pos.Pieces[i] != 0; i++) { int sq = pos.Squares[i]; int pc = pos.Pieces[i]; active.values[active.size++] = make_index(c, sq, pc, ksq); } }
public override int Evaluate(NnuePosition pos) { //pos.nnue[0].accumulator.computedAccumulation = false; Span <sbyte> input = stackalloc sbyte[FtOutDims]; Span <sbyte> hidden1Out = stackalloc sbyte[32]; Span <sbyte> hidden2Out = stackalloc sbyte[32]; Transform(pos, input); AffineTransform(input, hidden1Out, FtOutDims, 32, _parameters.Hidden1.Biases, _parameters.Hidden1.Weights); AffineTransform(hidden1Out, hidden2Out, 32, 32, _parameters.Hidden2.Biases, _parameters.Hidden2.Weights); var outValue = AffinePropagate(hidden2Out, _parameters.Output.Biases, _parameters.Output.Weights); var result = outValue / 16; return(result); }
private unsafe void RefreshAccumulatorAvx2(NnuePosition pos) { var accumulator = pos.Nnue[0].accumulator; Span <IndexList2> activeIndices = stackalloc IndexList2[2]; AppendActiveIndices(pos, activeIndices); var acc = stackalloc Vector256 <short> [RegisterCount]; for (int color = 0; color < 2; color++) { for (uint i = 0; i < kHalfDimensions / TileHeight; i++) { fixed(short *biasPtr = _parameters.FeatureTransformer.Biases) fixed(short *weightsPtr = _parameters.FeatureTransformer.Weights) fixed(short *accumulationsPtr = accumulator.accumulation[color]) { var biasesTile = (Vector256 <short> *)(&biasPtr[i * TileHeight]); var accTile = (Vector256 <short> *)(&accumulationsPtr[i * TileHeight]); for (var j = 0; j < RegisterCount; j++) { acc[j] = biasesTile[j]; } for (var j = 0; j < activeIndices[color].size; j++) { uint index = activeIndices[color].values[j]; uint offset = kHalfDimensions * index + i * TileHeight; Vector256 <short> *column = (Vector256 <short> *) & weightsPtr[offset]; for (uint k = 0; k < RegisterCount; k++) { acc[k] = Avx2.Add(acc[k], column[k]); } } for (uint j = 0; j < RegisterCount; j++) { accTile[j] = acc[j]; } } } } accumulator.computedAccumulation = true; }
private void Transform(NnuePosition pos, Span <sbyte> output) { if (!UpdateAccumulator(pos)) { RefreshAccumulator(pos); } var accumulation = pos.Nnue[0].accumulator.accumulation; Span <int> perspectives = stackalloc int[] { pos.Player, pos.Player ^ 1 }; for (var perspective = 0; perspective < 2; perspective++) { var offset = kHalfDimensions * perspective; for (var i = 0; i < kHalfDimensions; i++) { short sum = accumulation[perspectives[perspective]][i]; output[offset + i] = (sbyte)Clamp((int)sum, 0, 127); } } }
protected void AppendChangedIndices(NnuePosition pos, Span <IndexList2> removed, Span <IndexList2> added, Span <bool> reset) { var dp = pos.Nnue[0].dirtyPiece; Debug.Assert(dp.dirtyNum != 0); if (pos.Nnue[1].accumulator.computedAccumulation) { for (byte color = 0; color < 2; color++) { reset[color] = dp.pc[0] == GetKing(color); if (reset[color]) { HalfKpAppendActiveIndices(pos, color, ref added[color]); } else { HaldKpAppendChangedIndices(pos, color, dp, ref removed[color], ref added[color]); } } } else { var dp2 = pos.Nnue[1].dirtyPiece; for (byte c = 0; c < 2; c++) { reset[c] = dp.pc[0] == GetKing(c) || dp2.pc[0] == GetKing(c); if (reset[c]) { HalfKpAppendActiveIndices(pos, c, ref added[c]); } else { HaldKpAppendChangedIndices(pos, c, dp, ref removed[c], ref added[c]); HaldKpAppendChangedIndices(pos, c, dp2, ref removed[c], ref added[c]); } } } }
public override int Evaluate(NnuePosition pos) { //pos.nnue[0].accumulator.computedAccumulation = false; //pos.nnue[1].accumulator.computedAccumulation = false; //pos.nnue[2].accumulator.computedAccumulation = false; Span <uint> inputMask = stackalloc uint[FtOutDims / (8 * sizeof(uint))]; Span <uint> hidden1Mask = stackalloc uint[8 / sizeof(uint)]; Span <uint> fake = stackalloc uint[0]; Span <sbyte> input = stackalloc sbyte[FtOutDims]; Span <sbyte> hidden1Out = stackalloc sbyte[32]; Span <sbyte> hidden2Out = stackalloc sbyte[32]; TransformAvx2(pos, input, inputMask); AffineTransformAvx2(input, hidden1Out, FtOutDims, _parameters.Hidden1.Biases, _parameters.Hidden1.Weights, inputMask, hidden1Mask, true); AffineTransformAvx2(hidden1Out, hidden2Out, 32, _parameters.Hidden2.Biases, _parameters.Hidden2.Weights, hidden1Mask, fake, false); var outValue = AffinePropagateAvx2(hidden2Out, _parameters.Output.Biases, _parameters.Output.Weights); var result = outValue / 16; return(result); }
private unsafe void HaldKpAppendChangedIndices(NnuePosition pos, byte color, NnueDirtyPiece dirtyPiece, ref IndexList2 removed, ref IndexList2 added) { int ksq = pos.Squares[color]; ksq = orient(color, ksq); for (int i = 0; i < dirtyPiece.dirtyNum; i++) { var pc = dirtyPiece.pc[i]; if (IsKing(pc)) { continue; } if (dirtyPiece.from[i] != 64) { removed.values[removed.size++] = make_index(color, dirtyPiece.from[i], pc, ksq); } if (dirtyPiece.to[i] != 64) { added.values[added.size++] = make_index(color, dirtyPiece.to[i], pc, ksq); } } }
private unsafe bool UpdateAccumulator(NnuePosition pos) { var accumulator = pos.Nnue[0].accumulator; if (accumulator.computedAccumulation) { return(true); } NnueAccumulator prevAcc; if (pos.NnueCount > 0 && pos.Nnue[1].accumulator.computedAccumulation) { prevAcc = pos.Nnue[1].accumulator; } else if (pos.NnueCount > 1 && pos.Nnue[2].accumulator.computedAccumulation) { prevAcc = pos.Nnue[2].accumulator; } else { return(false); } Span <IndexList2> removedIndices = stackalloc IndexList2[2]; Span <IndexList2> addedIndices = stackalloc IndexList2[2]; Span <bool> reset = stackalloc bool[2]; AppendChangedIndices(pos, removedIndices, addedIndices, reset); for (var color = 0; color < 2; color++) { if (reset[color]) { Array.Copy(_parameters.FeatureTransformer.Biases, accumulator.accumulation[color], kHalfDimensions); } else { Array.Copy(prevAcc.accumulation[color], accumulator.accumulation[color], kHalfDimensions); // Difference calculation for the deactivated features for (uint k = 0; k < removedIndices[color].size; k++) { uint index = removedIndices[color].values[k]; uint offset = kHalfDimensions * index; for (uint j = 0; j < kHalfDimensions; j++) { accumulator.accumulation[color][j] -= _parameters.FeatureTransformer.Weights[offset + j]; } } } // Difference calculation for the activated features for (uint k = 0; k < addedIndices[color].size; k++) { uint index = addedIndices[color].values[k]; uint offset = kHalfDimensions * index; for (uint j = 0; j < kHalfDimensions; j++) { accumulator.accumulation[color][j] += _parameters.FeatureTransformer.Weights[offset + j]; } } } accumulator.computedAccumulation = true; return(true); }
public int Evaluate(NnuePosition pos) { return(_implementation.Evaluate(pos)); }
public abstract int Evaluate(NnuePosition position);
private unsafe bool UpdateAccumulator(NnuePosition pos) { var accumulator = pos.Nnue[0].accumulator; if (accumulator.computedAccumulation) { return(true); } NnueAccumulator prevAcc; if (pos.NnueCount > 0 && pos.Nnue[1].accumulator.computedAccumulation) { prevAcc = pos.Nnue[1].accumulator; } else if (pos.NnueCount > 1 && pos.Nnue[2].accumulator.computedAccumulation) { prevAcc = pos.Nnue[2].accumulator; } else { return(false); } Span <IndexList2> removedIndices = stackalloc IndexList2[2]; Span <IndexList2> addedIndices = stackalloc IndexList2[2]; Span <bool> reset = stackalloc bool[2]; AppendChangedIndices(pos, removedIndices, addedIndices, reset); var acc = stackalloc Vector256 <short> [RegisterCount]; for (uint i = 0; i < kHalfDimensions / TileHeight; i++) { for (int color = 0; color < 2; color++) { fixed(short *biasPtr = _parameters.FeatureTransformer.Biases) fixed(short *weightsPtr = _parameters.FeatureTransformer.Weights) fixed(short *accumulationsPtr = accumulator.accumulation[color]) fixed(short *prevAccPtr = prevAcc.accumulation[color]) { var accTile = (Vector256 <short> *)(&accumulationsPtr[i * TileHeight]); if (reset[color]) { var biasesTile = (Vector256 <short> *)(&biasPtr[i * TileHeight]); for (var j = 0; j < RegisterCount; j++) { acc[j] = biasesTile[j]; } } else { var prevAccTile = (Vector256 <short> *)(&prevAccPtr[i * TileHeight]); for (var j = 0; j < RegisterCount; j++) { acc[j] = prevAccTile[j]; } // Difference calculation for the deactivated features for (uint k = 0; k < removedIndices[color].size; k++) { uint index = removedIndices[color].values[k]; uint offset = kHalfDimensions * index + i * TileHeight; Vector256 <short> *column = (Vector256 <short> *) & weightsPtr[offset]; for (uint j = 0; j < RegisterCount; j++) { acc[j] = Avx2.Subtract(acc[j], column[j]); } } } // Difference calculation for the activated features for (uint k = 0; k < addedIndices[color].size; k++) { uint index = addedIndices[color].values[k]; uint offset = kHalfDimensions * index + i * TileHeight; Vector256 <short> *column = (Vector256 <short> *) & weightsPtr[offset]; for (uint j = 0; j < RegisterCount; j++) { acc[j] = Avx2.Add(acc[j], column[j]); } } for (uint j = 0; j < RegisterCount; j++) { accTile[j] = acc[j]; } } } } accumulator.computedAccumulation = true; return(true); }