Exemplo n.º 1
0
 /// <summary>
 /// Merge the given
 /// <c>Cost</c>
 /// data with the data in this
 /// instance.
 /// </summary>
 /// <param name="otherCost"/>
 public virtual void Merge(Classifier.Cost otherCost)
 {
     this.cost           += otherCost.GetCost();
     this.percentCorrect += otherCost.GetPercentCorrect();
     ArrayMath.AddInPlace(this.gradW1, otherCost.GetGradW1());
     ArrayMath.PairwiseAddInPlace(this.gradb1, otherCost.GetGradb1());
     ArrayMath.AddInPlace(this.gradW2, otherCost.GetGradW2());
     ArrayMath.AddInPlace(this.gradE, otherCost.GetGradE());
 }
Exemplo n.º 2
0
        /// <summary>
        /// Determine the total cost on the dataset associated with this
        /// classifier using the current learned parameters.
        /// </summary>
        /// <remarks>
        /// Determine the total cost on the dataset associated with this
        /// classifier using the current learned parameters. This cost is
        /// evaluated using mini-batch adaptive gradient descent.
        /// This method launches multiple threads, each of which evaluates
        /// training cost on a partition of the mini-batch.
        /// </remarks>
        /// <param name="batchSize"/>
        /// <param name="regParameter">Regularization parameter (lambda)</param>
        /// <param name="dropOutProb">
        /// Drop-out probability. Hidden-layer units in the
        /// neural network will be randomly turned off
        /// while training a particular example with this
        /// probability.
        /// </param>
        /// <returns>
        /// A
        /// <see cref="Cost"/>
        /// object which describes the total cost of the given
        /// weights, and includes gradients to be used for further
        /// training
        /// </returns>
        public virtual Classifier.Cost ComputeCostFunction(int batchSize, double regParameter, double dropOutProb)
        {
            ValidateTraining();
            IList <Example> examples = Edu.Stanford.Nlp.Parser.Nndep.Util.GetRandomSubList(dataset.examples, batchSize);
            // Redo precomputations for only those features which are triggered
            // by examples in this mini-batch.
            ICollection <int> toPreCompute = GetToPreCompute(examples);

            PreCompute(toPreCompute);
            // Set up parameters for feedforward
            Classifier.FeedforwardParams @params = new Classifier.FeedforwardParams(batchSize, dropOutProb);
            // Zero out saved-embedding gradients
            gradSaved = new double[][] {  };
            int numChunks = config.trainingThreads;
            IList <IList <Example> > chunks = CollectionUtils.PartitionIntoFolds(examples, numChunks);

            // Submit chunks for processing on separate threads
            foreach (ICollection <Example> chunk in chunks)
            {
                jobHandler.Put(new Pair <ICollection <Example>, Classifier.FeedforwardParams>(chunk, @params));
            }
            jobHandler.Join(false);
            // Join costs from each chunk
            Classifier.Cost cost = null;
            while (jobHandler.Peek())
            {
                Classifier.Cost otherCost = jobHandler.Poll();
                if (cost == null)
                {
                    cost = otherCost;
                }
                else
                {
                    cost.Merge(otherCost);
                }
            }
            if (cost == null)
            {
                return(null);
            }
            // Backpropagate gradients on saved pre-computed values to actual
            // embeddings
            cost.BackpropSaved(toPreCompute);
            cost.AddL2Regularization(regParameter);
            return(cost);
        }
Exemplo n.º 3
0
 /// <summary>
 /// Update classifier weights using the given training cost
 /// information.
 /// </summary>
 /// <param name="cost">
 /// Cost information as returned by
 /// <see cref="ComputeCostFunction(int, double, double)"/>
 /// .
 /// </param>
 /// <param name="adaAlpha">Global AdaGrad learning rate</param>
 /// <param name="adaEps">
 /// Epsilon value for numerical stability in AdaGrad's
 /// division
 /// </param>
 public virtual void TakeAdaGradientStep(Classifier.Cost cost, double adaAlpha, double adaEps)
 {
     ValidateTraining();
     double[][] gradW1 = cost.GetGradW1();
     double[][] gradW2 = cost.GetGradW2();
     double[][] gradE  = cost.GetGradE();
     double[]   gradb1 = cost.GetGradb1();
     for (int i = 0; i < W1.Length; ++i)
     {
         for (int j = 0; j < W1[i].Length; ++j)
         {
             eg2W1[i][j] += gradW1[i][j] * gradW1[i][j];
             W1[i][j]    -= adaAlpha * gradW1[i][j] / System.Math.Sqrt(eg2W1[i][j] + adaEps);
         }
     }
     for (int i_1 = 0; i_1 < b1.Length; ++i_1)
     {
         eg2b1[i_1] += gradb1[i_1] * gradb1[i_1];
         b1[i_1]    -= adaAlpha * gradb1[i_1] / System.Math.Sqrt(eg2b1[i_1] + adaEps);
     }
     for (int i_2 = 0; i_2 < W2.Length; ++i_2)
     {
         for (int j = 0; j < W2[i_2].Length; ++j)
         {
             eg2W2[i_2][j] += gradW2[i_2][j] * gradW2[i_2][j];
             W2[i_2][j]    -= adaAlpha * gradW2[i_2][j] / System.Math.Sqrt(eg2W2[i_2][j] + adaEps);
         }
     }
     if (config.doWordEmbeddingGradUpdate)
     {
         for (int i_3 = 0; i_3 < E.Length; ++i_3)
         {
             for (int j = 0; j < E[i_3].Length; ++j)
             {
                 eg2E[i_3][j] += gradE[i_3][j] * gradE[i_3][j];
                 E[i_3][j]    -= adaAlpha * gradE[i_3][j] / System.Math.Sqrt(eg2E[i_3][j] + adaEps);
             }
         }
     }
 }