public QuantizedMaxPool2dLayerArgument Convert(QuantizedMaxPool2d layer, ConvertContext context) { var inputRange = context.Quantization.Distributions[layer.Input.Connection.From].Global; var outputRange = context.Quantization.Distributions[layer.Output].Global; (var sa, var ba) = inputRange.GetScaleBias(8); (var so, var bo) = outputRange.GetScaleBias(8); (var mulO, var shiftO) = Quantizer.ExtractValueAndShift(so / sa, 32, 32); return(new QuantizedMaxPool2dLayerArgument { InputWidth = (uint)layer.Input.Dimensions[3], InputHeight = (uint)layer.Input.Dimensions[2], InputChannels = (uint)layer.Input.Dimensions[1], OutputWidth = (uint)layer.Output.Dimensions[3], OutputHeight = (uint)layer.Output.Dimensions[2], OutputChannels = (uint)layer.Output.Dimensions[1], KernelWidth = (uint)layer.FilterWidth, KernelHeight = (uint)layer.FilterHeight, StrideWidth = (uint)layer.StrideWidth, StrideHeight = (uint)layer.StrideHeight, PaddingWidth = (uint)Layer.GetPadding(layer.Input.Dimensions[3], layer.Output.Dimensions[3], layer.StrideWidth, 1, layer.FilterWidth), PaddingHeight = (uint)Layer.GetPadding(layer.Input.Dimensions[2], layer.Output.Dimensions[2], layer.StrideHeight, 1, layer.FilterHeight) }); }
public QuantizedAddLayerArgument Convert(QuantizedAdd layer, ConvertContext context) { var inputARange = context.Quantization.Distributions[layer.InputA.Connection.From].Global; var inputBRange = context.Quantization.Distributions[layer.InputB.Connection.From].Global; var outputRange = context.Quantization.Distributions[layer.Output].Global; (var sa, var ba) = inputARange.GetScaleBias(8); (var sb, var bb) = inputBRange.GetScaleBias(8); (var so, var bo) = outputRange.GetScaleBias(8); (var mulA, var shiftA) = Quantizer.ExtractValueAndShift(sb, 32, 32); (var mulB, var shiftB) = Quantizer.ExtractValueAndShift(sa, 32, 32); (var mulO, var shiftO) = Quantizer.ExtractValueAndShift(so / (sa * sb), 32, 32); return(new QuantizedAddLayerArgument { InputAOffset = (int)ba, InputAMul = (int)Math.Round(mulA), InputAShift = shiftA, InputBOffset = (int)bb, InputBMul = (int)Math.Round(mulB), InputBShift = shiftB, OutputOffset = (int)(-bo), OutputMul = (int)Math.Round(mulO), OutputShift = shiftO, Count = (uint)(layer.Output.Dimensions.GetSize()) }); }
private static (double scale, double bias) QuantizeInput(QuantizationRange range, K210ConvLayerConfig config) { (var scale, var bias) = range.GetScaleBias(8); (var mul, var shift) = Quantizer.ExtractValueAndShift(bias, 24, 15); config.ArgW = (int)Math.Round(mul); config.ShiftW = shift; return(scale, bias); }
public static (double[] scale, double bias) QuantizeWeights(bool isConv2d, Tensor <float> weights, K210ConvLayerConfig config, int weightsBits) { #if CHANNEL_WISE var kernels = weights.ToDenseTensor().Buffer.Span; var channels = weights.Dimensions[isConv2d ? 0 : 1]; var channelSize = weights.Dimensions.GetSize() / channels; var totalRange = Quantizer.GetRange(kernels); var scales = new double[channels]; for (int i = 0; i < channels; i++) { double s; var buffer = kernels.Slice(i * channelSize, channelSize); var range = Quantizer.GetRange(buffer); var s1 = totalRange.Max / range.Max; var s2 = totalRange.Min / range.Min; s = (s1 < 0 || s2 < 0) ? Math.Max(s1, s2) : Math.Min(s1, s2); Debug.Assert(s > 0); for (int j = 0; j < buffer.Length; j++) { buffer[j] = (float)(buffer[j] * s); } scales[i] = s; } (var scale, var bias) = Quantizer.GetRange(kernels).GetScaleBias(weightsBits); (var mul, var shift) = Quantizer.ExtractValueAndShift(bias, 24, 15); config.Weights = Quantizer.Quantize(kernels, scale, bias, weightsBits); config.ArgX = (int)Math.Round(mul); config.ShiftX = shift; for (int i = 0; i < scales.Length; i++) { scales[i] *= scale; } return(scales, bias); #else var buffer = weights.ToDenseTensor().Buffer.Span; (var scale, var bias) = GetRange(buffer).GetScaleBias(); (var mul, var shift) = ExtractValueAndShift(bias, 24, 15); config.Weights = Quantize(buffer, scale, bias); config.ArgX = (int)Math.Round(mul); config.ShiftX = shift; return(Enumerable.Repeat(scale, weights.Dimensions[0]).ToArray(), bias); #endif }
private static void QuantizeActivation(K210Conv2d layer, double postMul, QuantizationRange range, QuantizationRange beforeActRange, K210ConvLayerConfig config) { if (layer.NonTrivialActivation == null) { switch (layer.FusedActivationFunction) { case ActivationFunctionType.Linear: case ActivationFunctionType.Relu: case ActivationFunctionType.Relu6: break; default: throw new NotSupportedException($"Activation of {layer.FusedActivationFunction} is not supported."); } var starts = new ulong[] { 0x800000000, 0xf7d4cf4b8, 0xf8ed5a20c, 0xfa05e4f60, 0xfb2e05baa, 0xfc46908fe, 0xfd5f1b652, 0xfe77a63a6, 0xff9fc6ff0, 0xfffd4a9b7, 0, 0x7FFFFFFF0, 0x7FFFFFFF1, 0x7FFFFFFF2, 0x7FFFFFFF3, 0x7FFFFFFF4 }; for (int i = 0; i < starts.Length; i++) { var param = config.ActConfigs[i] = new K210LayerActConfig(); param.StartX = starts[i]; if (i == 10) { (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul, 16, 20); param.Mul = (int)Math.Round(mul); param.Shift = shift; } } } else if (layer.NonTrivialActivation is LeakyRelu leakyRelu) { (var scale, var bias) = range.GetScaleBias(8); var zero = (long)(Quantizer.Quantize(0, scale, bias) * postMul); var yTable = Generator.IntegerStep(0, (int)-bias, 15).Take(14).ToArray(); for (int i = 0; i < 16; i++) { var param = config.ActConfigs[i] = new K210LayerActConfig(); if (i == 0) { param.StartX = 0x800000000; } else if (i == 15) { (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul, 16, 20); param.StartX = (ulong)zero; param.Mul = (int)Math.Round(mul); param.Shift = shift; param.Add = (byte)(-bias); } else { // f(x) = (1 - slope) * zero + x * slope // f(x1) - f(x0) = (x1 - x0) * slope // x0 = zero - (zero - y0) / slope var add = (byte)yTable[i - 1]; var y0 = add * postMul; var x0 = zero - (zero - y0) / leakyRelu.Slope; (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul * leakyRelu.Slope, 16, 20); param.StartX = (ulong)(long)Math.Floor(x0); param.Mul = (int)Math.Round(mul); param.Shift = shift; param.Add = add; } } } else { throw new NotSupportedException($"Activation of {layer.NonTrivialActivation.GetType().Name} is not supported."); } }
private static void QuantizeBiasAndOutput(K210Conv2d layer, Tensor <float> bias, ChannelwiseRange range, ChannelwiseRange beforeActRange, double[] scale, K210ConvLayerConfig config) { var upshift = 10; var postMul = Math.Pow(2, upshift); if (layer.IsChannelwiseOutput) { for (int i = 0; i < config.BNConfigs.Length; i++) { (var so, var bo) = range.Channels[i].GetScaleBias(8); var b = bias[i]; var scomb = so * postMul / scale[i]; (var mul, var shift) = Quantizer.ExtractValueAndShift(scomb, 22, 15); config.BNConfigs[i] = new K210LayerBNConfig { Mul = (int)Math.Round(mul), Shift = shift, Add = (int)Math.Round((b * so - bo) * postMul) }; } } else { (var so, var bo) = range.Global.GetScaleBias(8); #if CHANNEL_WISE for (int i = 0; i < config.BNConfigs.Length; i++) { var b = bias[i]; var scomb = so * postMul / scale[i]; (var mul, var shift) = Quantizer.ExtractValueAndShift(scomb, 22, 15); config.BNConfigs[i] = new K210LayerBNConfig { Mul = (int)Math.Round(mul), Shift = shift, Add = (int)Math.Round((b * so - bo) * postMul) }; } #else var scomb = so / scale[0]; (var mul, var shift) = ExtractValueAndShift(scomb, 22, 255); var upscale = shift - 15; Debug.Assert(upscale >= 0); var postMul = Math.Round(mul) / mul * Math.Pow(2, upscale); for (int i = 0; i < config.BNConfigs.Length; i++) { var b = bias[i]; config.BNConfigs[i] = new K210LayerBNConfig { Mul = (int)Math.Round(mul), Shift = 15, Add = (int)Math.Round((b * so - bo) * postMul) }; } #endif } QuantizeActivation(layer, postMul, range.Global, beforeActRange.Global, config); }