private void GenerateBinBn(BinaryWriter bw, K210ConvLayerConfig layer, K210Conv2dParamAddress paramAddress, K210BinGenerationContext context) { paramAddress.Bn = context.AlignStreamPosition(128); for (int j = 0; j < layer.BNConfigs.Length; j++) { var bn = layer.BNConfigs[j]; var reg = new kpu_batchnorm_argument_t(); reg.norm_add = (uint)bn.Add; reg.norm_mul = (uint)bn.Mul; reg.norm_shift = (byte)bn.Shift; bw.Write(reg.Value); } }
private void GenerateBinActivation(BinaryWriter bw, K210ConvLayerConfig layer, K210Conv2dParamAddress paramAddress, K210BinGenerationContext context) { paramAddress.Activation = context.AlignStreamPosition(256); var reg = new kpu_activate_table_t(); var configs = layer.ActConfigs; for (int i = 0; i < configs.Length; i++) { var config = configs[i]; ref var param = ref reg.activate_para[i]; param.x_start = config.StartX; param.y_mul = (ushort)config.Mul; param.shift_number = (byte)config.Shift; }
private void GenerateBinWeights(BinaryWriter bw, K210ConvLayerConfig layer, K210Conv2dParamAddress paramAddress, K210BinGenerationContext context) { paramAddress.Weights = context.AlignStreamPosition(128); if (context.WeightsBits == 8) { foreach (var v in layer.Weights) { bw.Write((byte)v); } } else { foreach (var v in layer.Weights) { bw.Write(v); } } }
public K210Conv2dLayerArgument Convert(K210Conv2d layer, ConvertContext context) { var config = new K210ConvLayerConfig { BNConfigs = new K210LayerBNConfig[layer.OutputChannels], ActConfigs = new K210LayerActConfig[16] }; (var sw, var bw) = QuantizeWeights(layer.Conv2dType == K210Conv2dType.Conv2d, layer.Weights, config, context.WeightsBits); (var sx, var bx) = QuantizeInput(context.Quantization.Distributions[layer.Input.Connection.From].Global, config); config.ArgAdd = (long)Math.Round(bw * bx * layer.KernelWidth * layer.KernelHeight); var scale = new double[layer.OutputChannels]; for (int i = 0; i < scale.Length; i++) { scale[i] = sw[i] * sx; } QuantizeBiasAndOutput(layer, layer.Bias, context.Quantization.Distributions[layer.Output], context.Quantization.AdditionalDistributions[layer.OutputBeforeActivation], scale, config); config.InputChannels = layer.InputChannels; config.OutputChannels = layer.OutputChannels; config.InputWidth = layer.Input.Dimensions[3]; config.InputHeight = layer.Input.Dimensions[2]; (config.InputGroups, config.InputRowLength) = K210Helper.GetRowLayout(config.InputWidth); config.OutputWidth = layer.Output.Dimensions[3]; config.OutputHeight = layer.Output.Dimensions[2]; (config.OutputGroups, config.OutputRowLength) = K210Helper.GetRowLayout(config.OutputWidth); config.KernelType = layer.KernelWidth == 3 ? 1 : 0; config.IsDepthwise = layer.Conv2dType == K210Conv2dType.DepthwiseConv2d; config.PoolType = (int)layer.PoolType; config.PadValue = (int)Math.Round(-bx); if (layer.Conv2dType == K210Conv2dType.Conv2d) { var kernelSize = (int)layer.Weights.Length * context.WeightsBits / 8; var oneChannelSize = layer.KernelWidth * layer.KernelHeight * layer.InputChannels * context.WeightsBits / 8; var sizeLimit = context.WeightsBits == 8 ? 30 : 60; var oneLoadChannels = Math.Min(layer.OutputChannels, (int)Math.Floor(sizeLimit * 1024.0 / oneChannelSize)); config.OneLoadKernelsSize = oneChannelSize * oneLoadChannels; config.LoadTimes = (int)Math.Ceiling(layer.OutputChannels / (double)oneLoadChannels); config.OutputChannelsOnTime = oneLoadChannels; } else { config.OneLoadKernelsSize = (int)layer.Weights.Length * context.WeightsBits / 8; config.LoadTimes = 1; config.OutputChannelsOnTime = layer.OutputChannels; } var inputOneLineChannels = Math.Min(layer.InputChannels, config.InputGroups); config.InputSize = config.InputRowLength * config.InputHeight * config.InputChannels / inputOneLineChannels; var outputOneLineChannels = Math.Min(layer.OutputChannels, config.OutputGroups); config.OutputSize = config.OutputRowLength * config.OutputHeight * config.OutputChannels / outputOneLineChannels; return(new K210Conv2dLayerArgument { Config = config, ParamAddress = new K210Conv2dParamAddress() }); }
private static void QuantizeActivation(K210Conv2d layer, double postMul, QuantizationRange range, QuantizationRange beforeActRange, K210ConvLayerConfig config) { if (layer.NonTrivialActivation == null) { switch (layer.FusedActivationFunction) { case ActivationFunctionType.Linear: case ActivationFunctionType.Relu: case ActivationFunctionType.Relu6: break; default: throw new NotSupportedException($"Activation of {layer.FusedActivationFunction} is not supported."); } var starts = new ulong[] { 0x800000000, 0xf7d4cf4b8, 0xf8ed5a20c, 0xfa05e4f60, 0xfb2e05baa, 0xfc46908fe, 0xfd5f1b652, 0xfe77a63a6, 0xff9fc6ff0, 0xfffd4a9b7, 0, 0x7FFFFFFF0, 0x7FFFFFFF1, 0x7FFFFFFF2, 0x7FFFFFFF3, 0x7FFFFFFF4 }; for (int i = 0; i < starts.Length; i++) { var param = config.ActConfigs[i] = new K210LayerActConfig(); param.StartX = starts[i]; if (i == 10) { (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul, 16, 20); param.Mul = (int)Math.Round(mul); param.Shift = shift; } } } else if (layer.NonTrivialActivation is LeakyRelu leakyRelu) { (var scale, var bias) = range.GetScaleBias(8); var zero = (long)(Quantizer.Quantize(0, scale, bias) * postMul); var yTable = Generator.IntegerStep(0, (int)-bias, 15).Take(14).ToArray(); for (int i = 0; i < 16; i++) { var param = config.ActConfigs[i] = new K210LayerActConfig(); if (i == 0) { param.StartX = 0x800000000; } else if (i == 15) { (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul, 16, 20); param.StartX = (ulong)zero; param.Mul = (int)Math.Round(mul); param.Shift = shift; param.Add = (byte)(-bias); } else { // f(x) = (1 - slope) * zero + x * slope // f(x1) - f(x0) = (x1 - x0) * slope // x0 = zero - (zero - y0) / slope var add = (byte)yTable[i - 1]; var y0 = add * postMul; var x0 = zero - (zero - y0) / leakyRelu.Slope; (var mul, var shift) = Quantizer.ExtractValueAndShift(1 / postMul * leakyRelu.Slope, 16, 20); param.StartX = (ulong)(long)Math.Floor(x0); param.Mul = (int)Math.Round(mul); param.Shift = shift; param.Add = add; } } } else { throw new NotSupportedException($"Activation of {layer.NonTrivialActivation.GetType().Name} is not supported."); } }
private static void QuantizeBiasAndOutput(K210Conv2d layer, Tensor <float> bias, ChannelwiseRange range, ChannelwiseRange beforeActRange, double[] scale, K210ConvLayerConfig config) { var upshift = 10; var postMul = Math.Pow(2, upshift); if (layer.IsChannelwiseOutput) { for (int i = 0; i < config.BNConfigs.Length; i++) { (var so, var bo) = range.Channels[i].GetScaleBias(8); var b = bias[i]; var scomb = so * postMul / scale[i]; (var mul, var shift) = Quantizer.ExtractValueAndShift(scomb, 22, 15); config.BNConfigs[i] = new K210LayerBNConfig { Mul = (int)Math.Round(mul), Shift = shift, Add = (int)Math.Round((b * so - bo) * postMul) }; } } else { (var so, var bo) = range.Global.GetScaleBias(8); #if CHANNEL_WISE for (int i = 0; i < config.BNConfigs.Length; i++) { var b = bias[i]; var scomb = so * postMul / scale[i]; (var mul, var shift) = Quantizer.ExtractValueAndShift(scomb, 22, 15); config.BNConfigs[i] = new K210LayerBNConfig { Mul = (int)Math.Round(mul), Shift = shift, Add = (int)Math.Round((b * so - bo) * postMul) }; } #else var scomb = so / scale[0]; (var mul, var shift) = ExtractValueAndShift(scomb, 22, 255); var upscale = shift - 15; Debug.Assert(upscale >= 0); var postMul = Math.Round(mul) / mul * Math.Pow(2, upscale); for (int i = 0; i < config.BNConfigs.Length; i++) { var b = bias[i]; config.BNConfigs[i] = new K210LayerBNConfig { Mul = (int)Math.Round(mul), Shift = 15, Add = (int)Math.Round((b * so - bo) * postMul) }; } #endif } QuantizeActivation(layer, postMul, range.Global, beforeActRange.Global, config); }
private static (double scale, double bias) QuantizeInput(QuantizationRange range, K210ConvLayerConfig config) { (var scale, var bias) = range.GetScaleBias(8); (var mul, var shift) = Quantizer.ExtractValueAndShift(bias, 24, 15); config.ArgW = (int)Math.Round(mul); config.ShiftW = shift; return(scale, bias); }
public static (double[] scale, double bias) QuantizeWeights(bool isConv2d, Tensor <float> weights, K210ConvLayerConfig config, int weightsBits) { #if CHANNEL_WISE var kernels = weights.ToDenseTensor().Buffer.Span; var channels = weights.Dimensions[isConv2d ? 0 : 1]; var channelSize = weights.Dimensions.GetSize() / channels; var totalRange = Quantizer.GetRange(kernels); var scales = new double[channels]; for (int i = 0; i < channels; i++) { double s; var buffer = kernels.Slice(i * channelSize, channelSize); var range = Quantizer.GetRange(buffer); var s1 = totalRange.Max / range.Max; var s2 = totalRange.Min / range.Min; s = (s1 < 0 || s2 < 0) ? Math.Max(s1, s2) : Math.Min(s1, s2); Debug.Assert(s > 0); for (int j = 0; j < buffer.Length; j++) { buffer[j] = (float)(buffer[j] * s); } scales[i] = s; } (var scale, var bias) = Quantizer.GetRange(kernels).GetScaleBias(weightsBits); (var mul, var shift) = Quantizer.ExtractValueAndShift(bias, 24, 15); config.Weights = Quantizer.Quantize(kernels, scale, bias, weightsBits); config.ArgX = (int)Math.Round(mul); config.ShiftX = shift; for (int i = 0; i < scales.Length; i++) { scales[i] *= scale; } return(scales, bias); #else var buffer = weights.ToDenseTensor().Buffer.Span; (var scale, var bias) = GetRange(buffer).GetScaleBias(); (var mul, var shift) = ExtractValueAndShift(bias, 24, 15); config.Weights = Quantize(buffer, scale, bias); config.ArgX = (int)Math.Round(mul); config.ShiftX = shift; return(Enumerable.Repeat(scale, weights.Dimensions[0]).ToArray(), bias); #endif }