Beispiel #1
0
        /// <summary>
        /// Init Model Parameters.
        /// </summary>
        /// <param name="dnnData"></param>
        public override void Init(DNN dnn)
        {
            MathOperatorManager.GlobalInstance.Zero(parameters, Num);
            int ParNum = 0;

            for (int i = 0; i < dnn.neurallinks.Count; i++)
            {
                dnn.neurallinks[i].weight.CopyOutFromCuda();

                int mnum = dnn.neurallinks[i].Neural_In.Number * dnn.neurallinks[i].N_Winsize * dnn.neurallinks[i].Neural_Out.Number;
                MathOperatorManager.GlobalInstance.Matrix_Add_OFFSET(parameters, ParNum, dnn.neurallinks[i].weight, 0, mnum, 1.0f);
                ParNum += mnum;

                if (ParameterSetting.UpdateBias)
                {
                    mnum = dnn.neurallinks[i].Neural_Out.Number;
                    MathOperatorManager.GlobalInstance.Matrix_Add_OFFSET(parameters, ParNum, dnn.neurallinks[i].bias, 0, mnum, 1.0f);
                    ParNum += mnum;
                }
            }

            parameters.CopyOutFromCuda();
            GradIdx = 0;
        }
Beispiel #2
0
        /*return the loss using by feedstream */
        //unsafe public float feedstream_batch( BatchSample_Input query_batch,  BatchSample_Input doc_batch, List<BatchSample_Input> negdoc_batches, bool train_update)
        unsafe public float feedstream_batch(BatchSample_Input query_batch, BatchSample_Input doc_batch, bool train_update, StreamReader srNCEProbDist)
        {
            /// forward (query doc, negdoc) streaming.
            Forward_CalSimilarityScore(query_batch, doc_batch);
            Negative_Sampling(query_batch.batchsize);

            MathOperatorManager.GlobalInstance.Cosine_Similarity_EX_Full(dnn_model_query.neurallayers.Last().Output,
                                                                         dnn_model_doc.neurallayers.Last().Output, GPU_negative_index_Array,
                                                                         alphaCudaPiece, ParameterSetting.NTRIAL, ParameterSetting.BATCH_SIZE,
                                                                         query_batch.batchsize, dnn_model_query.OutputLayerSize, ParameterSetting.DSSMEpsilon); //float.Epsilon);

            float maxlogpD = 0;

            if (ParameterSetting.reserved_settings.Contains("_maxlogpd_"))
            {
                string[] spl = ParameterSetting.reserved_settings.Split('_');
                for (int i = 0; i < spl.Length; i++)
                {
                    if (spl[i] == "maxlogpd" && i < spl.Length - 1)
                    {
                        maxlogpD = float.Parse(spl[i + 1]);
                        break;
                    }
                }
            }
            if (srNCEProbDist != null)
            {
                string line = string.Empty;
                for (int i = 0; i < ParameterSetting.BATCH_SIZE; i++)
                {
                    line = srNCEProbDist.ReadLine();
                    if (line == null)
                    {
                        break;
                    }
                    float logprob = float.Parse(line.Trim());
                    if (logprob > maxlogpD)
                    {
                        logprob = maxlogpD;
                    }
                    doc_dist[i] = logprob;
                    //doc_dist[i] = -(i + 1);
                }
            }
            else
            {
                for (int i = 0; i < ParameterSetting.BATCH_SIZE; i++)
                {
                    doc_dist[i] = (float)(-Math.Log(LearningParameters.total_doc_num));
                }
            }

            distCudaPiece.CopyIntoCuda(ParameterSetting.BATCH_SIZE); //this sets D+

            MathOperatorManager.GlobalInstance.FillOut_Dist_NCE_Full(distCudaPiece, GPU_negative_index_Array, ParameterSetting.NTRIAL, ParameterSetting.BATCH_SIZE, doc_batch.batchsize);
            MathOperatorManager.GlobalInstance.Calculate_Alpha_NCE(alphaCudaPiece, distCudaPiece, ParameterSetting.NTRIAL + 1, ParameterSetting.BATCH_SIZE, query_batch.batchsize, ParameterSetting.PARM_GAMMA);

            float error = 0;

            if (ParameterSetting.LOSS_REPORT == 1)
            {
                alphaCudaPiece.CopyOutFromCuda();
                for (int i = 0; i < query_batch.batchsize; i++)
                {
                    float mlambda = 0;

                    mlambda = -(float)Math.Log(Math.Max(float.Epsilon, 1 - first_alpha[i] / ParameterSetting.PARM_GAMMA));
                    for (int nt = 1; nt <= ParameterSetting.NTRIAL; nt++)
                    {
                        mlambda += -(float)Math.Log(Math.Max(float.Epsilon, 1 - first_alpha[nt * ParameterSetting.BATCH_SIZE + i] / ParameterSetting.PARM_GAMMA));
                    }

                    if (float.IsNaN(mlambda))
                    {
                        //Console.WriteLine("IsNaN");
                        throw new Exception("Error! NaN.");
                    }
                    if (float.IsInfinity(mlambda))
                    {
                        //Console.WriteLine("IsInfinity");
                        throw new Exception("Error! IsInfinity.");
                    }
                    error += mlambda;
                }
            }
            if (train_update)
            {
                Negative_Sampling_Transpose(query_batch.batchsize);

                /******* Calculate the error derivatives on the top layer outputs *****/
                calculate_deltaQD_TOP(Pos_QD_Pair_TOP, query_batch.batchsize);


                /// Only support GPU version now.
                calculate_deltaQD_TOPEX_Full(Neg_QD_Pair_TOP, query_batch.batchsize);

                // Query Derive Merge
                MathOperatorManager.GlobalInstance.Matrix_WeightAdd(dnn_model_query.neurallayers.Last().ErrorDeriv, Pos_QD_Pair_TOP.cuda_layer_Deriv_Q,
                                                                    query_batch.batchsize, dnn_model_query.OutputLayerSize, alphaCudaPiece, 0, 0);

                MathOperatorManager.GlobalInstance.Matrix_WeightAdd_Full(dnn_model_query.neurallayers.Last().ErrorDeriv, Neg_QD_Pair_TOP.cuda_layer_Deriv_Q,
                                                                         ParameterSetting.NTRIAL, ParameterSetting.BATCH_SIZE,
                                                                         query_batch.batchsize, dnn_model_query.OutputLayerSize, alphaCudaPiece, ParameterSetting.BATCH_SIZE, -1);

                // Doc Derive Merge
                MathOperatorManager.GlobalInstance.Matrix_WeightAdd(dnn_model_doc.neurallayers.Last().ErrorDeriv, Pos_QD_Pair_TOP.cuda_layer_Deriv_D,
                                                                    doc_batch.batchsize, dnn_model_doc.OutputLayerSize, alphaCudaPiece, 0, 0);

                MathOperatorManager.GlobalInstance.Matrix_WeightAdd_EX_Full(dnn_model_doc.neurallayers.Last().ErrorDeriv, Neg_QD_Pair_TOP.cuda_layer_Deriv_D,
                                                                            GPU_Inver_negative_index_Array,
                                                                            GPU_Inver_negative_value_Array, ParameterSetting.NTRIAL, ParameterSetting.BATCH_SIZE, doc_batch.batchsize,
                                                                            dnn_model_doc.OutputLayerSize, alphaCudaPiece, ParameterSetting.BATCH_SIZE, -1);
                // back propagate
                dnn_model_query.backward_propagate_deriv(query_batch);
                dnn_model_doc.backward_propagate_deriv(doc_batch);

                // update here
                // here we have to do all the backprop computations before updating the model, because the model's actual weights will affect the backprop computation
                dnn_model_query.update_weight(LearningParameters.momentum, LearningParameters.learning_rate * query_batch.batchsize / ParameterSetting.BATCH_SIZE);
                dnn_model_doc.update_weight(LearningParameters.momentum, LearningParameters.learning_rate * query_batch.batchsize / ParameterSetting.BATCH_SIZE);

                // and now it should support shared models
            }
            return(error);
        }
Beispiel #3
0
 unsafe public void CopyOutFromCuda()
 {
     weight.CopyOutFromCuda();
     bias.CopyOutFromCuda();
 }