public List <Tensor> GetLossList(ProcessData data) { Tensor[] lossList = new Tensor[this.Count]; List <Tensor> pushes = this.GetAllOutputs(data.Data); Tensor loss; //这里对softmax层进行了特别对待处理,应该如何统一化? if (layers[layerCount - 1].Sign == LayerSign.SoftMaxLayer) { loss = data.Label - pushes[this.layerCount - 1]; } else { loss = LossFunc.Loss(data.Label, pushes[this.layerCount - 1]);; } Tensor nextWeight = TensorBuilder.FromMatrix(Matrix <double> .Build.DenseIdentity(data.Label[0, 0].ColumnCount)); LayerSign nextType = LayerSign.Nothing; for (int i = this.layerCount - 1; i >= 2; i--) { loss = layers[i].ComputeLoss(loss, pushes[i - 1], nextWeight, nextType); lossList[i] = loss; nextWeight = layers[i].Weight; nextType = layers[i].Sign; } loss = layers[1].ComputeLoss(loss, pushes[0], nextWeight, nextType); lossList[1] = loss; nextWeight = layers[1].Weight; nextType = layers[1].Sign; loss = layers[0].ComputeLoss(loss, data.Data, nextWeight, nextType); lossList[0] = loss; return(lossList.ToList()); }
/// <summary> /// 获取梯度列表,同时为了简化运算同时输出损失列表 /// </summary> /// <param name="data"></param> /// <returns>第一个为梯度列表,第二个为损失列表</returns> public Tuple <Tensor[], Tensor[]> GetGradientList(ProcessData data) { Tensor[] gradientList = new Tensor[this.Count]; Tensor[] lossList = new Tensor[this.Count]; List <Tensor> pushes = this.GetAllOutputs(data.Data); Tensor loss; //这里对softmax层进行了特别对待处理,应该如何统一化? if (layers[layerCount - 1].Sign == LayerSign.SoftMaxLayer)//反射是否开销太大了? { loss = data.Label - pushes[this.layerCount - 1]; } else { loss = LossFunc.Loss(data.Label, pushes[this.layerCount - 1]); } Tensor nextWeight = TensorBuilder.FromMatrix(Matrix <double> .Build.DenseIdentity(data.Label[0, 0].RowCount)); LayerSign nextType = LayerSign.Nothing; for (int i = this.layerCount - 1; i >= 2; i--) { (gradientList[i], loss) = layers[i].GetGradient(loss, pushes[i - 1], nextWeight, nextType); lossList[i] = loss; //loss = layers[i].ComputeLoss(loss, pushes[i - 1], nextWeight); nextWeight = layers[i].Weight; nextType = layers[i].Sign; } (gradientList[1], loss) = layers[1].GetGradient(loss, pushes[0], nextWeight, nextType); lossList[1] = loss; //loss = layers[1].ComputeLoss(loss, pushes[0], nextWeight); nextWeight = layers[1].Weight; nextType = layers[1].Sign; (gradientList[0], lossList[0]) = layers[0].GetGradient(loss, data.Data, nextWeight, nextType); return(new Tuple <Tensor[], Tensor[]>(gradientList, lossList)); }
/// <summary> /// 计算梯度(同时返回损失,减少后续计算开销) /// </summary> /// <param name="nextLoss"></param> /// <param name="preOutput"></param> /// <param name="nextWeight"></param> /// <returns>第一个为梯度,第二个为损失</returns> public abstract Tuple <Tensor, Tensor> GetGradient(Tensor nextLoss, Tensor preOutput, Tensor nextWeight, LayerSign nextType);
public abstract Tensor ComputeLoss(Tensor nextLoss, Tensor preOutput, Tensor nextWeight, LayerSign nextType);
public Layer(NoiseType _type, LayerSign _sign) { type = _type; sign = _sign; }