Esempi in C# (CSharp) per Seq2SeqSharp LayerNormalization.AddNorm

Linguaggio di programmazione: C# (CSharp)

Spazio dei nomi/nome del pacchetto: Seq2SeqSharp

Classe/tipologia: LayerNormalization

Metodo/funzione: AddNorm

Esempi su hotexamples.com: 1

Seq2SeqSharp LayerNormalization.AddNorm in C# (CSharp): 1 esempio trovato. Questo è il miglior esempio reale in C# (CSharp) per Seq2SeqSharp.LayerNormalization.AddNorm, estratto da progetti open source. Lo puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Norm(14)

Load(11)

Save(11)

getParams(8)

Process(5)

GetParams(4)

AddNorm(1)

Esempio n. 1

Mostra file

File: SelfAttention.cs Progetto: awesomedotnetcore/Seq2SeqSharp

        /// <summary>
        /// Scaled multi-heads attention component with skip connectioned feed forward layers
        /// </summary>
        /// <param name="input">The input tensor</param>
        /// <param name="g">The instance of computing graph</param>
        /// <returns></returns>
        public IWeightTensor Perform(IWeightTensor input, int batchSize, IComputeGraph graph)
        {
            using (IComputeGraph g = graph.CreateSubGraph(m_name))
            {
                int           seqLen = input.Rows / batchSize;
                IWeightTensor nInput = layerNorm1.Norm(input, g);

                //Input projections
                IWeightTensor allQ = g.View(g.Affine(nInput, Q, Qb), batchSize, seqLen, m_multiHeadNum, m_d);
                IWeightTensor allK = g.View(g.Affine(nInput, K, Kb), batchSize, seqLen, m_multiHeadNum, m_d);
                IWeightTensor allV = g.View(g.Affine(nInput, V, Vb), batchSize, seqLen, m_multiHeadNum, m_d);

                //Multi-head attentions
                IWeightTensor Qs = g.View(g.Permute(allQ, 2, 0, 1, 3), m_multiHeadNum * batchSize, seqLen, m_d);
                IWeightTensor Ks = g.View(g.Permute(allK, 2, 0, 3, 1), m_multiHeadNum * batchSize, m_d, seqLen);
                IWeightTensor Vs = g.View(g.Permute(allV, 2, 0, 1, 3), m_multiHeadNum * batchSize, seqLen, m_d);

                // Scaled softmax
                float         scale = 1.0f / (float)Math.Sqrt(m_d);
                IWeightTensor attn  = g.MulBatch(Qs, Ks, m_multiHeadNum * batchSize, scale);
                IWeightTensor attn2 = g.View(attn, m_multiHeadNum * batchSize * seqLen, seqLen);

                IWeightTensor softmax  = g.Softmax(attn2, inPlace: true);
                IWeightTensor softmax2 = g.View(softmax, m_multiHeadNum * batchSize, seqLen, seqLen);
                IWeightTensor o        = g.View(g.MulBatch(softmax2, Vs, m_multiHeadNum * batchSize), m_multiHeadNum, batchSize, seqLen, m_d);
                IWeightTensor W        = g.View(g.Permute(o, 1, 2, 0, 3), batchSize * seqLen, m_multiHeadNum * m_d);

                // Output projection
                IWeightTensor finalAttResults = g.Dropout(g.Affine(W, W0, b0), batchSize, m_dropoutRatio, inPlace: true);

                //Skip connection and layer normaliztion
                IWeightTensor normAddedAttResult = layerNorm2.AddNorm(finalAttResults, input, g);

                //Feed forward
                IWeightTensor ffnResult     = feedForwardLayer1.Process(normAddedAttResult, batchSize, g);
                IWeightTensor reluFFNResult = g.Relu(ffnResult);
                IWeightTensor ffn2Result    = feedForwardLayer2.Process(reluFFNResult, batchSize, g);

                //Skip connection and layer normaliztion
                IWeightTensor addFFNResult = graph.Add(ffn2Result, normAddedAttResult);

                return(addFFNResult);
            }
        }