Beispiel #1
0
#pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format

        public EncConvLayer(int channel, int kernelSize, double dropoutRate, string activationFn,
                            double activationDropoutRate) : base(nameof(EncConvLayer))
        {
            Conv1 = torch.nn.Sequential(
                ("conv", new ConvSeparable(channel, channel, kernelSize, kernelSize / 2, dropoutRate)),
                ("activation", new ActivationFunction(activationFn)),
                ("dropout", torch.nn.Dropout(activationDropoutRate))
                );
            LayerNorm1 = torch.nn.LayerNorm(new long[] { channel });

            Conv2 = torch.nn.Sequential(
                ("conv", new ConvSeparable(channel, channel, kernelSize, kernelSize / 2, dropoutRate)),
                ("activation", new ActivationFunction(activationFn)),
                ("dropout", torch.nn.Dropout(activationDropoutRate))
                );
            LayerNorm2 = torch.nn.LayerNorm(new long[] { channel });

            RegisterComponents();
        }
Beispiel #2
0
#pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format


        public FeedForwardLayer(
            int embeddingDim             = 768,
            int ffnEmbeddingDim          = 3072,
            double dropoutRate           = 0.1,
            double activationDropoutRate = 0.1,
            string activationFn          = "relu",
            bool dynamicDropout          = false)
            : base(nameof(FeedForwardLayer))
        {
            // Initialize parameters
            if (dynamicDropout)
            {
                dropoutRate = CalculateDropout(dropoutRate, embeddingDim,
                                               SearchSpace.HiddenSizeChoices[SearchSpace.HiddenSizeChoices.Length - 1]);
                activationDropoutRate = CalculateDropout(activationDropoutRate, embeddingDim,
                                                         SearchSpace.HiddenSizeChoices[SearchSpace.HiddenSizeChoices.Length - 1]);
            }

            // Layer norm associated with the position wise feed-forward NN
            var fullConnected1         = torch.nn.Linear(embeddingDim, ffnEmbeddingDim);
            var activation             = new ActivationFunction(activationFn);
            var activationDropoutLayer = torch.nn.Dropout(activationDropoutRate);
            var fullConnected2         = torch.nn.Linear(ffnEmbeddingDim, embeddingDim);
            var dropoutLayer           = torch.nn.Dropout(dropoutRate);

            ModelUtils.InitNormal(fullConnected1.weight, mean: 0.0, std: 0.02);
            ModelUtils.InitZeros(fullConnected1.bias);
            ModelUtils.InitNormal(fullConnected2.weight, mean: 0.0, std: 0.02);
            ModelUtils.InitZeros(fullConnected2.bias);

            FullConnects = torch.nn.Sequential(
                ("fc1", fullConnected1),
                ("activation", activation),
                ("dropout1", activationDropoutLayer),
                ("fc2", fullConnected2),
                ("dropout2", dropoutLayer)
                );
            FinalLayerNorm = torch.nn.LayerNorm(new long[] { embeddingDim });

            RegisterComponents();
        }
Beispiel #3
0
#pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format


        public SelfAttentionLayer(
            int embeddingDim            = 768,
            int numAttentionHeads       = 8,
            double dropoutRate          = 0.1f,
            double attentionDropoutRate = 0.1f,
            bool addBiasKv        = false,
            bool addZeroAttention = false)
            : base(nameof(SelfAttentionLayer))
        {
            SelfAttention = new MultiHeadAttention(
                embeddingDim,
                numAttentionHeads,
                dropout: attentionDropoutRate,
                addBiasKv: addBiasKv,
                addZeroAttention: addZeroAttention,
                selfAttention: true);
            DropoutLayer = torch.nn.Dropout(dropoutRate);

            // Layer norm associated with the self attention layer
            LayerNorm = torch.nn.LayerNorm(new long[] { embeddingDim });

            RegisterComponents();
        }
        public TransformerEncoder(
            int paddingIdx,
            int vocabSize,
            double dropout                = 0.1f,
            double attentionDropout       = 0.1f,
            double activationDropout      = 0.1f,
            string activationFn           = "relu",
            bool dynamicDropout           = false,
            bool addBiasKv                = false,
            bool addZeroAttention         = false,
            int maxSeqLen                 = 256,
            bool learnedPositionEmbedding = true,
            int embedSize                 = -1,
            int?embedScale                = null,
            IList <int> arches            = null,
            bool usePositionEmbedding     = true,
            bool offsetPositionsByPadding = true,
            int numSegments               = 2,
            bool encoderNormalizeBefore   = false,
            int numEncoderLayers          = 6,
            bool applyBertInit            = false,
            bool freezeEmbeddings         = false,
            bool freezeLayers             = false,
            bool freezeTransfer           = false,
            int nTransLayersToFreeze      = 0)
            : base(nameof(TransformerEncoder))
        {
            Contracts.AssertValue(arches);
            Contracts.AssertNonEmpty(arches);

            PaddingIdx     = paddingIdx;
            DiscreteArches = arches.ToList();
            DistillBlocks  = 4;

            // Embedding modules
            EmbedScale          = embedScale;
            TokenEmbedding      = torch.nn.Embedding(vocabSize, embedSize, paddingIdx);
            PositionalEmbedding = usePositionEmbedding
                ? PositionalEmbedding.GetPositionalEmbedding(maxSeqLen, embedSize,
                                                             paddingIdx, learnedPositionEmbedding)
                : null;
            SegmentEmbedding = numSegments > 0
                ? torch.nn.Embedding(numSegments, embedSize)
                : null;
            EmbeddingLayerNorm = encoderNormalizeBefore
                ? torch.nn.LayerNorm(new long[] { embedSize })
                : null;
            DropoutLayer = torch.nn.Dropout(dropout);

            ModelUtils.InitNormal(TokenEmbedding.weight, mean: 0.0, std: 0.02);
            ModelUtils.InitZeros(TokenEmbedding.weight[paddingIdx]);
            if (SegmentEmbedding != null)
            {
                ModelUtils.InitNormal(SegmentEmbedding.weight, mean: 0.0, std: 0.02);
            }

            // Encoder layers
            var layers = Enumerable.Range(0, numEncoderLayers)
                         .Select(i => new TransformerCellDiscrete(
                                     arches[i],
                                     dropout,
                                     attentionDropout,
                                     activationDropout,
                                     activationFn,
                                     addBiasKv,
                                     addZeroAttention,
                                     dynamicDropout) as torch.nn.Module)
                         .ToArray();

            Layers = new ModuleList(layers);

            var blockPerLayer = numEncoderLayers / DistillBlocks;

            HiddenSizePerBlock = CheckBlockHiddenSize(blockPerLayer);

            EmbedTransfer = new EmbedTransferDiscrete(embedSize, HiddenSizePerBlock[0]);
            var hiddenSizePerBlockExtend = HiddenSizePerBlock.Append(HiddenSizePerBlock[HiddenSizePerBlock.Count - 1]).ToList();
            var hiddenTransferList       = Enumerable.Range(0, HiddenSizePerBlock.Count)
                                           .Select(i => new HiddenTransferDiscrete(hiddenSizePerBlockExtend[i],
                                                                                   hiddenSizePerBlockExtend[i + 1]) as torch.nn.Module)
                                           .ToArray();

            HiddenTransferList = new ModuleList(hiddenTransferList);

            if (freezeEmbeddings)
            {
                ModelUtils.FreezeModuleParams(TokenEmbedding);
                ModelUtils.FreezeModuleParams(PositionalEmbedding);
                ModelUtils.FreezeModuleParams(SegmentEmbedding);
                ModelUtils.FreezeModuleParams(EmbeddingLayerNorm);
            }

            if (freezeLayers)
            {
                ModelUtils.FreezeModuleParams(Layers);
                ModelUtils.FreezeModuleParams(HiddenTransferList);
            }

            if (freezeTransfer)
            {
                ModelUtils.FreezeModuleParams(HiddenTransferList);
            }

            for (var i = 0; i < nTransLayersToFreeze; ++i)
            {
                ModelUtils.FreezeModuleParams(Layers[i]);
            }

            RegisterComponents();
        }