Esempio n. 1
0
        //public IWeightTensor Perform(IWeightTensor inputQ, IWeightTensor keyMask, int batchSize, IComputeGraph graph)
        //{
        //    if (m_sharedQKV == false)
        //    {
        //        throw new ArgumentException($"Layer '{m_name}' is not in shared QKV mode, please call another Perform function with three separated input tensors.");
        //    }

        //    using (IComputeGraph g = graph.CreateSubGraph($"{m_name}_MultiHeadAttention_SharedQKV"))
        //    {
        //        int seqLenQ = inputQ.Rows / batchSize;
        //        IWeightTensor inputQNorm = layerNormQ.Norm(inputQ, g);

        //        //Input projections
        //        float scale = 1.0f; // / (float)(m_inputDim);
        //        IWeightTensor mulQ, mulK, mulV;

        //        using (IWeightTensor inputQNormView = g.View(inputQNorm, dims: new long[] { 1, inputQ.Rows, inputQ.Columns }))
        //        {
        //            using (IWeightTensor inputQNormViewExp = g.Expand(inputQNormView, dims: new long[] { 3, inputQ.Rows, inputQ.Columns }))
        //            {
        //                using (IWeightTensor mulQKV = g.MulBatch(inputQNormViewExp, QKV, null, 3, scale))
        //                {
        //                    mulQ = g.Select(mulQKV, 0, 0);
        //                    mulK = g.Select(mulQKV, 0, 1);
        //                    mulV = g.Select(mulQKV, 0, 2);
        //                }
        //            }
        //        }

        //        IWeightTensor allQ = g.View(mulQ, dims: new long[] { batchSize, seqLenQ, m_multiHeadNum, m_d });
        //        IWeightTensor allK = g.View(mulK, dims: new long[] { batchSize, seqLenQ, m_multiHeadNum, m_d });
        //        IWeightTensor allV = g.View(mulV, dims: new long[] { batchSize, seqLenQ, m_multiHeadNum, m_d });

        //        //Multi-head attentions
        //        IWeightTensor Qs = g.View(g.Permute(allQ, 2, 0, 1, 3), dims: new long[] { m_multiHeadNum * batchSize, seqLenQ, m_d });
        //        IWeightTensor Ks = g.View(g.Permute(allK, 2, 0, 3, 1), dims: new long[] { m_multiHeadNum * batchSize, m_d, seqLenQ });
        //        IWeightTensor Vs = g.View(g.Permute(allV, 2, 0, 1, 3), dims: new long[] { m_multiHeadNum * batchSize, seqLenQ, m_d });

        //        // Scaled softmax
        //        scale = 1.0f / (float)(Math.Sqrt(m_d));
        //        IWeightTensor attn = g.MulBatch(Qs, Ks, null, m_multiHeadNum * batchSize, scale);
        //        IWeightTensor softmax = g.Softmax(attn, keyMask, inPlace: true);
        //        IWeightTensor o = g.View(g.MulBatch(softmax, Vs, null, m_multiHeadNum * batchSize), dims: new long[] { m_multiHeadNum, batchSize, seqLenQ, m_d });

        //        IWeightTensor W = g.View(g.Permute(o, 1, 2, 0, 3), dims: new long[] { batchSize * seqLenQ, m_multiHeadNum * m_d });

        //        // Output projection
        //        IWeightTensor finalAttResults = g.Dropout(g.Affine(W, W0, b0), batchSize, m_dropoutRatio, inPlace: true);

        //        return graph.Add(finalAttResults, inputQ);
        //    }
        //}


        public virtual List <IWeightTensor> getParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>
            {
                W0,
                b0
            };

            //if (m_sharedQKV)
            //{
            //    response.Add(QKV);
            //}
            //else
            //{
            response.Add(Q);
            response.Add(Qb);

            response.Add(K);
            response.Add(Kb);

            response.Add(V);
            response.Add(Vb);
            //    }

            response.AddRange(layerNormQ.getParams());

            return(response);
        }
Esempio n. 2
0
        public virtual List <IWeightTensor> getParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>();

            response.AddRange(layerNorm2.getParams());
            response.AddRange(feedForwardLayer1.GetParams());
            response.AddRange(feedForwardLayer2.GetParams());

            return(response);
        }
Esempio n. 3
0
        public List <IWeightTensor> getParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>();

            response.Add(m_Wxhc);
            response.Add(m_b);

            response.AddRange(m_layerNorm1.getParams());
            response.AddRange(m_layerNorm2.getParams());

            return(response);
        }
        public List <IWeightMatrix> getParams()
        {
            List <IWeightMatrix> response = new List <IWeightMatrix>();

            response.Add(Wxhc);
            response.Add(b);

            response.AddRange(layerNorm1.getParams());
            response.AddRange(layerNorm2.getParams());

            return(response);
        }
Esempio n. 5
0
        public virtual List <IWeightTensor> getParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>
            {
                m_Wxh,
                m_b
            };

            response.AddRange(m_layerNorm1.getParams());
            response.AddRange(m_layerNorm2.getParams());

            return(response);
        }
Esempio n. 6
0
        public List <IWeightTensor> GetParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>();

            foreach (MultiHeadAttention item in m_encoders)
            {
                response.AddRange(item.getParams());
            }

            foreach (var item in m_posFFNs)
            {
                response.AddRange(item.getParams());
            }

            response.AddRange(layerNorm.getParams());

            return(response);
        }
        public virtual List <IWeightTensor> getParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>
            {
                W0,
                b0
            };

            response.Add(Q);
            response.Add(Qb);

            response.Add(K);
            response.Add(Kb);

            response.Add(V);
            response.Add(Vb);

            response.AddRange(layerNormQ.getParams());

            return(response);
        }
Esempio n. 8
0
        public virtual List <IWeightTensor> getParams()
        {
            List <IWeightTensor> response = new List <IWeightTensor>
            {
                Q,
                Qb,

                K,
                Kb,

                V,
                Vb,

                W0,
                b0
            };

            response.AddRange(layerNorm1.getParams());

            return(response);
        }