public static void Activation_Softmax(Tensor input, Tensor output) { float sum = 0.0f; for (int i = 0; i < output.xmm.Length; ++i) { output.xmm[i] = NN_utils.exp256_ps(input.xmm[i]); sum += NN_utils.hsums(output.xmm[i]).GetElement <float>(0); } sum = 1.0f / sum; output.mul(sum, output); }
public void dot_product(int kept_dim, List <float> big_matrix_vec, int big_reserve_size, Tensor small, int chunk_range, Tensor o) { int out_index = 0; for (int small_chunk = 0; small_chunk < small.xmm.Length; small_chunk += chunk_range) { for (int big_chunk = 0; big_chunk < xmm.Length; big_chunk += chunk_range) { __m256 FMA = Vector256 <float> .Zero; for (int partial_index = 0; partial_index < chunk_range; ++partial_index) { FMA = Fma.MultiplyAdd(xmm[big_chunk + partial_index], small.xmm[small_chunk + partial_index], FMA); } o.setElement(out_index++, NN_utils.hsums(FMA).GetElement <float>(0)); } } }