Ejemplo n.º 1
0
        public virtual double[] ParamsToVector()
        {
            int totalSize = TotalParamSize();

            return(NeuralUtils.ParamsToVector(totalSize, binaryTransform.ValueIterator(), binaryClassification.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(binaryTensors.ValueIterator()), unaryClassification.Values.GetEnumerator(), wordVectors.Values
                                              .GetEnumerator()));
        }
Ejemplo n.º 2
0
 public virtual void VectorToParams(double[] theta)
 {
     NeuralUtils.VectorToParams(theta, binaryTransform.ValueIterator(), binaryClassification.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(binaryTensors.ValueIterator()), unaryClassification.Values.GetEnumerator(), wordVectors.Values.GetEnumerator
                                    ());
 }
        protected internal override void Calculate(double[] theta)
        {
            model.VectorToParams(theta);
            SentimentCostAndGradient.ModelDerivatives derivatives;
            if (model.op.trainOptions.nThreads == 1)
            {
                derivatives = ScoreDerivatives(trainingBatch);
            }
            else
            {
                // TODO: because some addition operations happen in different
                // orders now, this results in slightly different values, which
                // over time add up to significantly different models even when
                // given the same random seed.  Probably not a big deal.
                // To be more specific, for trees T1, T2, T3, ... Tn,
                // when using one thread, we sum the derivatives T1 + T2 ...
                // When using multiple threads, we first sum T1 + ... + Tk,
                // then sum Tk+1 + ... + T2k, etc, for split size k.
                // The splits are then summed in order.
                // This different sum order results in slightly different numbers.
                MulticoreWrapper <IList <Tree>, SentimentCostAndGradient.ModelDerivatives> wrapper = new MulticoreWrapper <IList <Tree>, SentimentCostAndGradient.ModelDerivatives>(model.op.trainOptions.nThreads, new SentimentCostAndGradient.ScoringProcessor(this
                                                                                                                                                                                                                                                                  ));
                // use wrapper.nThreads in case the number of threads was automatically changed
                foreach (IList <Tree> chunk in CollectionUtils.PartitionIntoFolds(trainingBatch, wrapper.NThreads()))
                {
                    wrapper.Put(chunk);
                }
                wrapper.Join();
                derivatives = new SentimentCostAndGradient.ModelDerivatives(model);
                while (wrapper.Peek())
                {
                    SentimentCostAndGradient.ModelDerivatives batchDerivatives = wrapper.Poll();
                    derivatives.Add(batchDerivatives);
                }
            }
            // scale the error by the number of sentences so that the
            // regularization isn't drowned out for large training batchs
            double scale = (1.0 / trainingBatch.Count);

            value      = derivatives.error * scale;
            value     += ScaleAndRegularize(derivatives.binaryTD, model.binaryTransform, scale, model.op.trainOptions.regTransformMatrix, false);
            value     += ScaleAndRegularize(derivatives.binaryCD, model.binaryClassification, scale, model.op.trainOptions.regClassification, true);
            value     += ScaleAndRegularizeTensor(derivatives.binaryTensorTD, model.binaryTensors, scale, model.op.trainOptions.regTransformTensor);
            value     += ScaleAndRegularize(derivatives.unaryCD, model.unaryClassification, scale, model.op.trainOptions.regClassification, false, true);
            value     += ScaleAndRegularize(derivatives.wordVectorD, model.wordVectors, scale, model.op.trainOptions.regWordVector, true, false);
            derivative = NeuralUtils.ParamsToVector(theta.Length, derivatives.binaryTD.ValueIterator(), derivatives.binaryCD.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(derivatives.binaryTensorTD.ValueIterator()), derivatives.unaryCD.Values.GetEnumerator
                                                        (), derivatives.wordVectorD.Values.GetEnumerator());
        }