Esempio n. 1
0
        public QuantizedMaxPool2dLayerArgument Convert(QuantizedMaxPool2d layer, ConvertContext context)
        {
            var inputRange  = context.Quantization.Distributions[layer.Input.Connection.From].Global;
            var outputRange = context.Quantization.Distributions[layer.Output].Global;

            (var sa, var ba) = inputRange.GetScaleBias(8);
            (var so, var bo) = outputRange.GetScaleBias(8);

            (var mulO, var shiftO) = Quantizer.ExtractValueAndShift(so / sa, 32, 32);

            return(new QuantizedMaxPool2dLayerArgument
            {
                InputWidth = (uint)layer.Input.Dimensions[3],
                InputHeight = (uint)layer.Input.Dimensions[2],
                InputChannels = (uint)layer.Input.Dimensions[1],
                OutputWidth = (uint)layer.Output.Dimensions[3],
                OutputHeight = (uint)layer.Output.Dimensions[2],
                OutputChannels = (uint)layer.Output.Dimensions[1],
                KernelWidth = (uint)layer.FilterWidth,
                KernelHeight = (uint)layer.FilterHeight,
                StrideWidth = (uint)layer.StrideWidth,
                StrideHeight = (uint)layer.StrideHeight,
                PaddingWidth = (uint)Layer.GetPadding(layer.Input.Dimensions[3], layer.Output.Dimensions[3], layer.StrideWidth, 1, layer.FilterWidth),
                PaddingHeight = (uint)Layer.GetPadding(layer.Input.Dimensions[2], layer.Output.Dimensions[2], layer.StrideHeight, 1, layer.FilterHeight)
            });
        }
Esempio n. 2
0
        public QuantizedAddLayerArgument Convert(QuantizedAdd layer, ConvertContext context)
        {
            var inputARange = context.Quantization.Distributions[layer.InputA.Connection.From].Global;
            var inputBRange = context.Quantization.Distributions[layer.InputB.Connection.From].Global;
            var outputRange = context.Quantization.Distributions[layer.Output].Global;

            (var sa, var ba) = inputARange.GetScaleBias(8);
            (var sb, var bb) = inputBRange.GetScaleBias(8);
            (var so, var bo) = outputRange.GetScaleBias(8);

            (var mulA, var shiftA) = Quantizer.ExtractValueAndShift(sb, 32, 32);
            (var mulB, var shiftB) = Quantizer.ExtractValueAndShift(sa, 32, 32);
            (var mulO, var shiftO) = Quantizer.ExtractValueAndShift(so / (sa * sb), 32, 32);

            return(new QuantizedAddLayerArgument
            {
                InputAOffset = (int)ba,
                InputAMul = (int)Math.Round(mulA),
                InputAShift = shiftA,
                InputBOffset = (int)bb,
                InputBMul = (int)Math.Round(mulB),
                InputBShift = shiftB,
                OutputOffset = (int)(-bo),
                OutputMul = (int)Math.Round(mulO),
                OutputShift = shiftO,
                Count = (uint)(layer.Output.Dimensions.GetSize())
            });
        }
Esempio n. 3
0
 private static (double scale, double bias) QuantizeInput(QuantizationRange range, K210ConvLayerConfig config)
 {
     (var scale, var bias) = range.GetScaleBias(8);
     (var mul, var shift)  = Quantizer.ExtractValueAndShift(bias, 24, 15);
     config.ArgW           = (int)Math.Round(mul);
     config.ShiftW         = shift;
     return(scale, bias);
 }
Esempio n. 4
0
        public static (double[] scale, double bias) QuantizeWeights(bool isConv2d, Tensor <float> weights, K210ConvLayerConfig config, int weightsBits)
        {
#if CHANNEL_WISE
            var kernels     = weights.ToDenseTensor().Buffer.Span;
            var channels    = weights.Dimensions[isConv2d ? 0 : 1];
            var channelSize = weights.Dimensions.GetSize() / channels;

            var totalRange = Quantizer.GetRange(kernels);
            var scales     = new double[channels];

            for (int i = 0; i < channels; i++)
            {
                double s;
                var    buffer = kernels.Slice(i * channelSize, channelSize);
                var    range  = Quantizer.GetRange(buffer);

                var s1 = totalRange.Max / range.Max;
                var s2 = totalRange.Min / range.Min;
                s = (s1 < 0 || s2 < 0) ? Math.Max(s1, s2) : Math.Min(s1, s2);

                Debug.Assert(s > 0);
                for (int j = 0; j < buffer.Length; j++)
                {
                    buffer[j] = (float)(buffer[j] * s);
                }
                scales[i] = s;
            }

            (var scale, var bias) = Quantizer.GetRange(kernels).GetScaleBias(weightsBits);

            (var mul, var shift) = Quantizer.ExtractValueAndShift(bias, 24, 15);
            config.Weights       = Quantizer.Quantize(kernels, scale, bias, weightsBits);
            config.ArgX          = (int)Math.Round(mul);
            config.ShiftX        = shift;

            for (int i = 0; i < scales.Length; i++)
            {
                scales[i] *= scale;
            }
            return(scales, bias);
#else
            var buffer = weights.ToDenseTensor().Buffer.Span;
            (var scale, var bias) = GetRange(buffer).GetScaleBias();

            (var mul, var shift) = ExtractValueAndShift(bias, 24, 15);
            config.Weights       = Quantize(buffer, scale, bias);
            config.ArgX          = (int)Math.Round(mul);
            config.ShiftX        = shift;
            return(Enumerable.Repeat(scale, weights.Dimensions[0]).ToArray(), bias);
#endif
        }
Esempio n. 5
0
        private static void QuantizeActivation(K210Conv2d layer, double postMul, QuantizationRange range, QuantizationRange beforeActRange, K210ConvLayerConfig config)
        {
            if (layer.NonTrivialActivation == null)
            {
                switch (layer.FusedActivationFunction)
                {
                case ActivationFunctionType.Linear:
                case ActivationFunctionType.Relu:
                case ActivationFunctionType.Relu6:
                    break;

                default:
                    throw new NotSupportedException($"Activation of {layer.FusedActivationFunction} is not supported.");
                }

                var starts = new ulong[]
                {
                    0x800000000, 0xf7d4cf4b8, 0xf8ed5a20c, 0xfa05e4f60,
                    0xfb2e05baa, 0xfc46908fe, 0xfd5f1b652, 0xfe77a63a6,
                    0xff9fc6ff0, 0xfffd4a9b7, 0, 0x7FFFFFFF0,
                    0x7FFFFFFF1, 0x7FFFFFFF2, 0x7FFFFFFF3, 0x7FFFFFFF4
                };

                for (int i = 0; i < starts.Length; i++)
                {
                    var param = config.ActConfigs[i] = new K210LayerActConfig();
                    param.StartX = starts[i];

                    if (i == 10)
                    {
                        (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul, 16, 20);
                        param.Mul            = (int)Math.Round(mul);
                        param.Shift          = shift;
                    }
                }
            }
            else if (layer.NonTrivialActivation is LeakyRelu leakyRelu)
            {
                (var scale, var bias) = range.GetScaleBias(8);
                var zero   = (long)(Quantizer.Quantize(0, scale, bias) * postMul);
                var yTable = Generator.IntegerStep(0, (int)-bias, 15).Take(14).ToArray();

                for (int i = 0; i < 16; i++)
                {
                    var param = config.ActConfigs[i] = new K210LayerActConfig();
                    if (i == 0)
                    {
                        param.StartX = 0x800000000;
                    }
                    else if (i == 15)
                    {
                        (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul, 16, 20);
                        param.StartX         = (ulong)zero;
                        param.Mul            = (int)Math.Round(mul);
                        param.Shift          = shift;
                        param.Add            = (byte)(-bias);
                    }
                    else
                    {
                        // f(x) = (1 - slope) * zero + x * slope
                        // f(x1) - f(x0) = (x1 - x0) * slope
                        // x0 = zero - (zero - y0) / slope
                        var add = (byte)yTable[i - 1];
                        var y0  = add * postMul;
                        var x0  = zero - (zero - y0) / leakyRelu.Slope;

                        (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul * leakyRelu.Slope, 16, 20);
                        param.StartX         = (ulong)(long)Math.Floor(x0);
                        param.Mul            = (int)Math.Round(mul);
                        param.Shift          = shift;
                        param.Add            = add;
                    }
                }
            }
            else
            {
                throw new NotSupportedException($"Activation of {layer.NonTrivialActivation.GetType().Name} is not supported.");
            }
        }
Esempio n. 6
0
        private static void QuantizeBiasAndOutput(K210Conv2d layer, Tensor <float> bias, ChannelwiseRange range, ChannelwiseRange beforeActRange, double[] scale, K210ConvLayerConfig config)
        {
            var upshift = 10;
            var postMul = Math.Pow(2, upshift);

            if (layer.IsChannelwiseOutput)
            {
                for (int i = 0; i < config.BNConfigs.Length; i++)
                {
                    (var so, var bo) = range.Channels[i].GetScaleBias(8);

                    var b = bias[i];

                    var scomb = so * postMul / scale[i];

                    (var mul, var shift) = Quantizer.ExtractValueAndShift(scomb, 22, 15);

                    config.BNConfigs[i] = new K210LayerBNConfig
                    {
                        Mul   = (int)Math.Round(mul),
                        Shift = shift,
                        Add   = (int)Math.Round((b * so - bo) * postMul)
                    };
                }
            }
            else
            {
                (var so, var bo) = range.Global.GetScaleBias(8);
#if CHANNEL_WISE
                for (int i = 0; i < config.BNConfigs.Length; i++)
                {
                    var b = bias[i];

                    var scomb = so * postMul / scale[i];

                    (var mul, var shift) = Quantizer.ExtractValueAndShift(scomb, 22, 15);

                    config.BNConfigs[i] = new K210LayerBNConfig
                    {
                        Mul   = (int)Math.Round(mul),
                        Shift = shift,
                        Add   = (int)Math.Round((b * so - bo) * postMul)
                    };
                }
#else
                var scomb = so / scale[0];

                (var mul, var shift) = ExtractValueAndShift(scomb, 22, 255);
                var upscale = shift - 15;
                Debug.Assert(upscale >= 0);
                var postMul = Math.Round(mul) / mul * Math.Pow(2, upscale);

                for (int i = 0; i < config.BNConfigs.Length; i++)
                {
                    var b = bias[i];

                    config.BNConfigs[i] = new K210LayerBNConfig
                    {
                        Mul   = (int)Math.Round(mul),
                        Shift = 15,
                        Add   = (int)Math.Round((b * so - bo) * postMul)
                    };
                }
#endif
            }

            QuantizeActivation(layer, postMul, range.Global, beforeActRange.Global, config);
        }