public virtual Classifier.Cost Process(Pair <ICollection <Example>, Classifier.FeedforwardParams> input) { ICollection <Example> examples = input.First(); Classifier.FeedforwardParams @params = input.Second(); // We can't fix the seed used with ThreadLocalRandom // TODO: Is this a serious problem? ThreadLocalRandom random = ThreadLocalRandom.Current(); this.gradW1 = new double[this._enclosing.W1.Length][]; this.gradb1 = new double[this._enclosing.b1.Length]; this.gradW2 = new double[this._enclosing.W2.Length][]; this.gradE = new double[this._enclosing.E.Length][]; double cost = 0.0; double correct = 0.0; foreach (Example ex in examples) { IList <int> feature = ex.GetFeature(); IList <int> label = ex.GetLabel(); double[] scores = new double[this._enclosing.numLabels]; double[] hidden = new double[this._enclosing.config.hiddenSize]; double[] hidden3 = new double[this._enclosing.config.hiddenSize]; // Run dropout: randomly drop some hidden-layer units. `ls` // contains the indices of those units which are still active int[] ls = IIntStream.Range(0, this._enclosing.config.hiddenSize).Filter(null).ToArray(); int offset = 0; for (int j = 0; j < this._enclosing.config.numTokens; ++j) { int tok = feature[j]; int index = tok * this._enclosing.config.numTokens + j; if (this._enclosing.preMap.Contains(index)) { // Unit activations for this input feature value have been // precomputed int id = this._enclosing.preMap[index]; // Only extract activations for those nodes which are still // activated (`ls`) foreach (int nodeIndex in ls) { hidden[nodeIndex] += this._enclosing.saved[id][nodeIndex]; } } else { foreach (int nodeIndex in ls) { for (int k = 0; k < this._enclosing.config.embeddingSize; ++k) { hidden[nodeIndex] += this._enclosing.W1[nodeIndex][offset + k] * this._enclosing.E[tok][k]; } } } offset += this._enclosing.config.embeddingSize; } // Add bias term and apply activation function foreach (int nodeIndex_1 in ls) { hidden[nodeIndex_1] += this._enclosing.b1[nodeIndex_1]; hidden3[nodeIndex_1] = Math.Pow(hidden[nodeIndex_1], 3); } // Feed forward to softmax layer (no activation yet) int optLabel = -1; for (int i = 0; i < this._enclosing.numLabels; ++i) { if (label[i] >= 0) { foreach (int nodeIndex in ls) { scores[i] += this._enclosing.W2[i][nodeIndex_1] * hidden3[nodeIndex_1]; } if (optLabel < 0 || scores[i] > scores[optLabel]) { optLabel = i; } } } double sum1 = 0.0; double sum2 = 0.0; double maxScore = scores[optLabel]; for (int i_1 = 0; i_1 < this._enclosing.numLabels; ++i_1) { if (label[i_1] >= 0) { scores[i_1] = Math.Exp(scores[i_1] - maxScore); if (label[i_1] == 1) { sum1 += scores[i_1]; } sum2 += scores[i_1]; } } cost += (Math.Log(sum2) - Math.Log(sum1)) / @params.GetBatchSize(); if (label[optLabel] == 1) { correct += +1.0 / @params.GetBatchSize(); } double[] gradHidden3 = new double[this._enclosing.config.hiddenSize]; for (int i_2 = 0; i_2 < this._enclosing.numLabels; ++i_2) { if (label[i_2] >= 0) { double delta = -(label[i_2] - scores[i_2] / sum2) / @params.GetBatchSize(); foreach (int nodeIndex in ls) { this.gradW2[i_2][nodeIndex_1] += delta * hidden3[nodeIndex_1]; gradHidden3[nodeIndex_1] += delta * this._enclosing.W2[i_2][nodeIndex_1]; } } } double[] gradHidden = new double[this._enclosing.config.hiddenSize]; foreach (int nodeIndex_2 in ls) { gradHidden[nodeIndex_2] = gradHidden3[nodeIndex_2] * 3 * hidden[nodeIndex_2] * hidden[nodeIndex_2]; this.gradb1[nodeIndex_2] += gradHidden[nodeIndex_2]; } offset = 0; for (int j_1 = 0; j_1 < this._enclosing.config.numTokens; ++j_1) { int tok = feature[j_1]; int index = tok * this._enclosing.config.numTokens + j_1; if (this._enclosing.preMap.Contains(index)) { int id = this._enclosing.preMap[index]; foreach (int nodeIndex in ls) { this._enclosing.gradSaved[id][nodeIndex_2] += gradHidden[nodeIndex_2]; } } else { foreach (int nodeIndex in ls) { for (int k = 0; k < this._enclosing.config.embeddingSize; ++k) { this.gradW1[nodeIndex_2][offset + k] += gradHidden[nodeIndex_2] * this._enclosing.E[tok][k]; this.gradE[tok][k] += gradHidden[nodeIndex_2] * this._enclosing.W1[nodeIndex_2][offset + k]; } } } offset += this._enclosing.config.embeddingSize; } } return(new Classifier.Cost(this, cost, correct, this.gradW1, this.gradb1, this.gradW2, this.gradE)); }