public void Blob_Initialization() { var blob = new Tensor(); Assert.Equal(0, blob.Num); Assert.Equal(0, blob.Count); Assert.Equal(0, blob.Channels); Assert.Equal(0, blob.Height); Assert.Equal(0, blob.Width); Assert.Throws<InvalidOperationException>(() => blob.OnCpu()); // Assert.Throws<InvalidOperationException>(() => blob.Data); // Assert.Throws<InvalidOperationException>(() => blob.Diff); var preshapedBlob = new Tensor(2, 3, 4, 5); Assert.Equal(2, preshapedBlob.Num); Assert.Equal(3, preshapedBlob.Channels); Assert.Equal(4, preshapedBlob.Height); Assert.Equal(5, preshapedBlob.Width); Assert.Equal(120, preshapedBlob.Count); using (var preshapedBlobCpu = preshapedBlob.OnCpu()) { Assert.NotNull(preshapedBlobCpu.Data); Assert.NotNull(preshapedBlobCpu.Diff); Assert.Equal(preshapedBlob.Count, preshapedBlobCpu.Data.Count); Assert.Equal(preshapedBlob.Count, preshapedBlobCpu.Diff.Count); } }
public void AveragePoolingLayer_Forward() { var bottom = new Tensor(1, 1, 3, 3); var filler = new ConstantFiller(2.0d); filler.Fill(bottom); var layer = new AveragePoolingLayer(3, 1, 1); layer.Setup(bottom, top); Assert.Equal(1, top.Num); Assert.Equal(1, top.Channels); Assert.Equal(3, top.Height); Assert.Equal(3, top.Width); layer.Forward(bottom, top); using (var topCpu = top.OnCpu()) { var topData = topCpu.Data; AssertInRange(8.0d / 9, topData[0]); AssertInRange(4.0d / 3, topData[1]); AssertInRange(8.0d / 9, topData[2]); AssertInRange(4.0d / 3, topData[3]); AssertInRange(2.0d, topData[4]); AssertInRange(4.0d / 3, topData[5]); AssertInRange(8.0d / 9, topData[6]); AssertInRange(4.0d / 3, topData[7]); AssertInRange(8.0d / 9, topData[8]); } }
public void Filler_GaussianDense(double meanParam, double stdParam) { var blob = new Tensor(2, 3, 4, 5); var config = new GaussianFillerConfiguration(meanParam, stdParam); var filler = new GaussianFiller(config); filler.Fill(blob); double mean = 0; double var = 0; int count = blob.Count; using (var blobCpu = blob.OnCpu()) { for (int i = 0; i < count; i++) { mean += blobCpu.DataAt(i); var += (blobCpu.DataAt(i) - config.Mean) * (blobCpu.DataAt(i) - config.Mean); } mean /= count; var /= count; Assert.True(mean >= config.Mean - config.Std * 5); Assert.True(mean <= config.Mean + config.Std * 5); double targetVar = config.Std * config.Std; Assert.True(var >= (targetVar / 5.0d)); Assert.True(var <= (targetVar * 5.0d)); } }
public EuclideanLossLayerTests() { var filler = new GaussianFiller(); blobBottomData = new Tensor(10, 5, 1, 1); filler.Fill(blobBottomData); bottom.Add(blobBottomData); blobBottomLabel = new Tensor(10, 5, 1, 1); filler.Fill(blobBottomLabel); bottom.Add(blobBottomLabel); }
public void Filler_Constant() { var blob = new Tensor(2, 3, 4, 5); var config = new ConstantFillerConfiguration(10.0d); var filler = new ConstantFiller(config); filler.Fill(blob); using (var blobCpu = blob.OnCpu()) { int count = blobCpu.Count; var data = blobCpu.Data; for (int i = 0; i < count; i++) Assert.Equal(data[i], 10.0d); } }
public void Blob_Reshape() { var blob = new Tensor(); blob.Reshape(2, 3, 4, 5); Assert.Equal(2, blob.Num); Assert.Equal(3, blob.Channels); Assert.Equal(4, blob.Height); Assert.Equal(5, blob.Width); Assert.Equal(120, blob.Count); using (var blobCpu = blob.OnCpu()) { Assert.NotNull(blobCpu.Data); Assert.NotNull(blobCpu.Diff); Assert.Equal(blobCpu.Count, blobCpu.Data.Count); Assert.Equal(blobCpu.Count, blobCpu.Diff.Count); } }
public static Tensor sum(this Tensor x, Union <int, int[]> dim, bool keepdim = false) { if (x.dtype == torch.@bool) { throw new TorchException("TorchException: torch.sum is not implemented for bool tensors."); } int[] dims; if ((int[])dim != null) { dims = (int[])dim; } else { dims = new int[] { (int)dim }; } for (int i = 0; i < dims.Length; i++) { dims[i] = (dims[i] >= 0) ? dims[i] : (x.__shape.Length + dims[i]); } var shape_permutation = new int[x.__shape.Length]; int p1 = 0; int p2 = 0; int p3 = 0; var y_shape = new int[keepdim ? x.__shape.Length : (x.__shape.Length - dims.Length)]; var reduce_size = 1; for (int i = 0; i < x.__shape.Length; i++) { if (dims.Contains(i)) { reduce_size *= x.__shape[i]; if (keepdim) { y_shape[p3++] = 1; } shape_permutation[p2++ + (x.__shape.Length - dims.Length)] = i; } else { shape_permutation[p1++] = i; y_shape[p3++] = x.__shape[i]; } } var xp = x.permute(shape_permutation); var y = new Tensor(y_shape, x.dtype, (!torch.autograd.grad_mode.no_grad.prev) && x.requires_grad); switch (x.dtype) { case torch.float16: { MKL.Sum(xp.__half, reduce_size, y.__half); if (y.requires_grad) { y.__backward_fn = () => { MKL.dSum(x.grad.__half, reduce_size, y.grad.__half); if (x.__backward_fn != null) { x.__backward_fn(); } }; } break; } case torch.float32: { MKL.Sum(xp.__float, reduce_size, y.__float); if (y.requires_grad) { y.__backward_fn = () => { MKL.dSum(x.grad.__float, reduce_size, y.grad.__float); if (x.__backward_fn != null) { x.__backward_fn(); } }; } break; } case torch.float64: { MKL.Sum(xp.__double, reduce_size, y.__double); if (y.requires_grad) { y.__backward_fn = () => { MKL.dSum(x.grad.__double, reduce_size, y.grad.__double); if (x.__backward_fn != null) { x.__backward_fn(); } }; } break; } case torch.int8: { MKL.Sum(xp.__int8, reduce_size, y.__int8); break; } case torch.uint8: { MKL.Sum(xp.__uint8, reduce_size, y.__uint8); break; } case torch.int16: { MKL.Sum(xp.__int16, reduce_size, y.__int16); break; } case torch.int32: { MKL.Sum(xp.__int32, reduce_size, y.__int32); break; } case torch.int64: { MKL.Sum(xp.__int64, reduce_size, y.__int64); break; } } return(y); }
/// <summary> /// Blases for tensor. /// </summary> /// <param name="tensor">The tensor.</param> /// <returns>PooledObject<CudaBlas>.</returns> public PooledObject <CudaBlas> BlasForTensor(Tensor tensor) { return(BlasForDevice(CudaHelpers.GetDeviceId(tensor))); }
public void inputTensorMemAlloc() { inputTensor = new Tensor(inputTensorDims); }
public void Train(string checkpoint, string run, int?counter, CancellationToken cancellation) { new Session().UseSelf(session => { var context = tf.placeholder(tf.int32, new TensorShape(this.batchSize, null)); var output = Gpt2Model.Model(this.hParams, input: context); Tensor labels = context[Range.All, Range.StartAt(1)]; Tensor logits = output["logits"][Range.All, Range.EndAt(new Index(1, fromEnd: true))]; var loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits_dyn( labels: labels, logits: logits)); var sample = Gpt2Sampler.SampleSequence( this.hParams, length: this.sampleLength, context: context, batchSize: this.batchSize, temperature: 1.0f, topK: 40); var trainVars = tf.trainable_variables().Where((dynamic var) => var.name.Contains("model")); var optimizer = new AdamOptimizer(learning_rate: 0.0002).minimize(loss, var_list: trainVars); var saver = new Saver( var_list: trainVars, max_to_keep: 5, keep_checkpoint_every_n_hours: 1); session.run(tf.global_variables_initializer()); Console.WriteLine("Loading checkpoint " + checkpoint); saver.restore(session, checkpoint); Console.WriteLine("Loading dataset..."); var sampler = new TrainingSampler(this.dataset, this.random); Console.WriteLine($"Dataset has {sampler.TokenCount} tokens"); string counterFile = Path.Combine(Gpt2Checkpoints.CheckpointDir, run, "counter"); if (counter == null && File.Exists(counterFile)) { counter = int.Parse(File.ReadAllText(counterFile), CultureInfo.InvariantCulture) + 1; } counter = counter ?? 1; string runCheckpointDir = Path.Combine(Gpt2Checkpoints.CheckpointDir, run); string runSampleDir = Path.Combine(SampleDir, run); void Save() { Directory.CreateDirectory(runCheckpointDir); Console.WriteLine("Saving " + Path.Combine(runCheckpointDir, Invariant($"model-{counter}"))); saver.save(session, Path.Combine(runCheckpointDir, "model"), global_step: counter.Value); File.WriteAllText(path: counterFile, contents: Invariant($"{counter}")); } void GenerateSamples() { var contextTokens = np.array(new[] { this.encoder.EncodedEndOfText }); var allText = new List <string>(); int index = 0; string text = null; while (index < this.SampleNum) { var @out = session.run(sample, feed_dict: new PythonDict <object, object> { [context] = Enumerable.Repeat(contextTokens, this.batchSize), }); foreach (int i in Enumerable.Range(0, Math.Min(this.SampleNum - index, this.batchSize))) { text = this.encoder.Decode(@out[i]); text = Invariant($"======== SAMPLE {index + 1} ========\n{text}\n"); allText.Add(text); index++; } } Console.WriteLine(text); Directory.CreateDirectory(runSampleDir); File.WriteAllLines( path: Path.Combine(runSampleDir, Invariant($"samples-{counter}")), contents: allText); } var avgLoss = (0.0, 0.0); var startTime = DateTime.Now; while (!cancellation.IsCancellationRequested) { if (counter % this.SaveEvery == 0) { Save(); } if (counter % this.SampleEvery == 0) { GenerateSamples(); } var batch = Enumerable.Range(0, this.batchSize) .Select(_ => sampler.Sample(1024)) .ToArray(); var placeholderValues = new PythonDict <object, object> { [context] = batch.ToPythonList(), }; var tuple = session.run_dyn((optimizer, loss), feed_dict: placeholderValues); var lv = tuple.Item2; avgLoss = (avgLoss.Item1 * 0.99 + lv, avgLoss.Item2 * 0.99 + 1); Console.WriteLine($"[{counter} | {DateTime.Now-startTime}] loss={lv} avg={avgLoss.Item1/avgLoss.Item2}"); counter++; } Console.WriteLine("Interrupted"); Save(); }); }
/// <summary>操作を実行</summary> public void Execute(Tensor inmap, Tensor infilter, Tensor outmap) { Execute(new Tensor[] { inmap, infilter, outmap }); }
public void MaxPoolingLayer_ForwardRectangularWithSquareKernel(int topLayer) { Contract.Requires(topLayer > 0); const int num = 2; const int channels = 2; var bottom = new Tensor(num, channels, 3, 5); var topList = new Tensor[topLayer]; for (int i = 0; i < topLayer; i++) topList[i] = new Tensor(); // Input: 2x 2 channels of: // [1 2 5 2 3] // [9 4 1 4 8] // [1 2 5 2 3] using (var bottomCpu = bottom.OnCpu()) { var bottomData = bottomCpu.Data; for (int i = 0; i < 15 * num * channels; i += 15) { bottomData[i + 0] = 1; bottomData[i + 1] = 2; bottomData[i + 2] = 5; bottomData[i + 3] = 2; bottomData[i + 4] = 3; bottomData[i + 5] = 9; bottomData[i + 6] = 4; bottomData[i + 7] = 1; bottomData[i + 8] = 4; bottomData[i + 9] = 8; bottomData[i + 10] = 1; bottomData[i + 11] = 2; bottomData[i + 12] = 5; bottomData[i + 13] = 2; bottomData[i + 14] = 3; } } var layer = new MaxPoolingLayer(2, 1, 0); layer.Setup(new TensorCollection { bottom }, topList); foreach ( var top in topList) { Assert.Equal(num, top.Num); Assert.Equal(channels, top.Channels); Assert.Equal(2, top.Height); Assert.Equal(4, top.Width); } layer.Forward(new TensorCollection { bottom }, topList); // Expected output: 2x 2 channels of: // [9 5 5 8] // [9 5 5 8] for (int i = 0; i < 8 * num * channels; i += 8) { using (var topCpu = topList[0].OnCpu()) { var topData = topCpu.Data; Assert.Equal(9, topData[i + 0]); Assert.Equal(5, topData[i + 1]); Assert.Equal(5, topData[i + 2]); Assert.Equal(8, topData[i + 3]); Assert.Equal(9, topData[i + 4]); Assert.Equal(5, topData[i + 5]); Assert.Equal(5, topData[i + 6]); Assert.Equal(8, topData[i + 7]); } } if ( topList.Length > 1 ) { // Expected mask output: 2x 2 channels of: // [5 2 2 9] // [5 12 12 9] for (int i = 0; i < 8 * num * channels; i += 8) { using (var topCpu = topList[1].OnCpu()) { var topData = topCpu.Data; Assert.Equal(5, topData[i + 0]); Assert.Equal(2, topData[i + 1]); Assert.Equal(2, topData[i + 2]); Assert.Equal(9, topData[i + 3]); Assert.Equal(5, topData[i + 4]); Assert.Equal(12, topData[i + 5]); Assert.Equal(12, topData[i + 6]); Assert.Equal(9, topData[i + 7]); } } } }
Tensor Condition(Tensor a, Tensor b) { return(tf.cond(a < b, a, b)); }
public static int AccNone(Tensor target, Tensor output) { return(0); }
public void Filler_GaussianSparse(double meanParam, double stdParam) { var blob = new Tensor(2, 3, 4, 5); var config = new GaussianFillerConfiguration(meanParam, stdParam) { IsSparse = true }; var filler = new GaussianFiller(config); filler.Fill(blob); double mean = 0; double var = 0; using (var blobCpu = blob.OnCpu()) { int count = blob.Count; int zeroes = 0; for (int i = 0; i < count; i++) { if (blobCpu.DataAt(i) == 0.0d) { zeroes++; } else { mean += blobCpu.DataAt(i); var += (blobCpu.DataAt(i) - config.Mean) * (blobCpu.DataAt(i) - config.Mean); } } mean /= (count - zeroes); var /= (count - zeroes); Assert.True(mean >= config.Mean - config.Std * 5); Assert.True(mean <= config.Mean + config.Std * 5); double targetVar = config.Std * config.Std; Assert.True(var >= (targetVar / 5.0d)); Assert.True(var <= (targetVar * 5.0d)); } }
public void Filler_Xavier() { var blob = new Tensor(2, 3, 4, 5); var filler = new XavierFiller(); filler.Fill(blob); int fanIn = blob.Count / blob.Num; double scale = Math.Sqrt(3 / fanIn); using (var blobCpu = blob.OnCpu()) { int count = blob.Count; var data = blobCpu.Data; for (int i = 0; i < count; i++) { Assert.True(data[i] >= -scale); Assert.True(data[i] <= scale); } } }
public void Filler_Uniform(double min, double max) { var blob = new Tensor(2, 3, 4, 5); var config = new UniformFillerConfiguration(min, max); var filler = new UniformFiller(config); filler.Fill(blob); using (var blobCpu = blob.OnCpu()) { int count = blob.Count; var data = blobCpu.Data; for (int i = 0; i < count; i++) { Assert.True(data[i] >= min); Assert.True(data[i] <= max); } } }
public void Filler_PositiveUnitball() { var blob = new Tensor(2, 3, 4, 5); var filler = new PositiveUnitballFiller(); filler.Fill(blob); int num = blob.Num; int count = blob.Count; int dim = count / num; using (var blobCpu = blob.OnCpu()) { var data = blobCpu.Data; for (int i = 0; i < count; i++) { Assert.True(data[i] >= 0.0d); Assert.True(data[i] <= 1.0d); } for (int i = 0; i < num; i++) { double sum = 0; for (int j = 0; j < dim; j++) sum += blobCpu.DataAt(i * dim + j); Assert.True(sum >= 0.999f); Assert.True(sum <= 1.001f); } } }
private bool TryPinAsTensor <T>( out MemoryHandle pinnedMemoryHandle, out IntPtr dataBufferPointer, out int dataBufferLength, out ReadOnlySpan <int> shape, out int rank, out TensorElementType nativeElementType ) { nativeElementType = TensorElementType.DataTypeMax; //invalid dataBufferPointer = IntPtr.Zero; dataBufferLength = 0; shape = null; rank = 0; pinnedMemoryHandle = default; Debug.Assert(typeof(T) != typeof(string), "NamedOnnxValue.TryPinAsTensor() must not be called with a string Tensor value"); if (_value is Tensor <T> ) { Tensor <T> t = _value as Tensor <T>; if (t.IsReversedStride) { //TODO: not sure how to support reverse stride. may be able to calculate the shape differently throw new NotSupportedException(nameof(Tensor <T>) + " of reverseStride is not supported"); } DenseTensor <T> dt = null; if (_value is DenseTensor <T> ) { dt = _value as DenseTensor <T>; } else { dt = t.ToDenseTensor(); } shape = dt.Dimensions; // does not work for reverse stride rank = dt.Rank; pinnedMemoryHandle = dt.Buffer.Pin(); unsafe { dataBufferPointer = (IntPtr)pinnedMemoryHandle.Pointer; } // find the native type if (typeof(T) == typeof(float)) { nativeElementType = TensorElementType.Float; dataBufferLength = dt.Buffer.Length * sizeof(float); } else if (typeof(T) == typeof(double)) { nativeElementType = TensorElementType.Double; dataBufferLength = dt.Buffer.Length * sizeof(double); } else if (typeof(T) == typeof(int)) { nativeElementType = TensorElementType.Int32; dataBufferLength = dt.Buffer.Length * sizeof(int); } else if (typeof(T) == typeof(uint)) { nativeElementType = TensorElementType.UInt32; dataBufferLength = dt.Buffer.Length * sizeof(uint); } else if (typeof(T) == typeof(long)) { nativeElementType = TensorElementType.Int64; dataBufferLength = dt.Buffer.Length * sizeof(long); } else if (typeof(T) == typeof(ulong)) { nativeElementType = TensorElementType.UInt64; dataBufferLength = dt.Buffer.Length * sizeof(ulong); } else if (typeof(T) == typeof(short)) { nativeElementType = TensorElementType.Int16; dataBufferLength = dt.Buffer.Length * sizeof(short); } else if (typeof(T) == typeof(ushort)) { nativeElementType = TensorElementType.UInt16; dataBufferLength = dt.Buffer.Length * sizeof(ushort); } else if (typeof(T) == typeof(byte)) { nativeElementType = TensorElementType.UInt8; dataBufferLength = dt.Buffer.Length * sizeof(byte); } else if (typeof(T) == typeof(sbyte)) { nativeElementType = TensorElementType.Int8; dataBufferLength = dt.Buffer.Length * sizeof(sbyte); } else if (typeof(T) == typeof(string)) { nativeElementType = TensorElementType.String; dataBufferLength = dt.Buffer.Length * IntPtr.Size; } else if (typeof(T) == typeof(bool)) { nativeElementType = TensorElementType.Bool; dataBufferLength = dt.Buffer.Length * sizeof(bool); // Assumes sizeof(BOOL) is always 1 byte in native } else { //TODO: may extend the supported types // do not throw exception, rather assign the sentinel value nativeElementType = TensorElementType.DataTypeMax; } return(true); } return(false); }
/// <summary> /// Computes exponential linear: <c>exp(features) - 1</c> if &lt; 0, <c>features</c> otherwise. /// </summary> /// <param name="features"> /// </param> /// <param name="name"> /// If specified, the created operation in the graph will be this one, otherwise it will be named 'Elu'. /// </param> /// <returns> /// The Operation can be fetched from the resulting Tensor, by fetching the Operation property from the result. /// </returns> /// <remarks> /// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) /// ](http://arxiv.org/abs/1511.07289) /// </remarks> public static Tensor elu(Tensor features, string name = "Elu") { var op = _op_def_lib._apply_op_helper("Elu", name: name, args: new { features }); return(op.output); }
public void CheckSingle(Layer layer, TensorCollection bottom, TensorCollection top, int checkBottom, int topId, int topDataId, bool elementwise = false) { //TODO If implemented at all the ability of the layer to access stored blobs, we need to recheck this. if ( elementwise ) { Assert.True(topId >= 0); Assert.True(topDataId >= 0); int topCount = top[topId].Count; for (int blobId = 0; blobId < bottom.Count; blobId++) Assert.Equal(topCount, bottom[blobId].Count); } // First, figure out what blobs we need to check against. var blobsToCheck = new TensorCollection(); var propagateDown = new List<bool>().Repeated(bottom.Count, checkBottom < 0); if ( checkBottom < 0 ) { // We are not checking the bottom. for (int i = 0; i < bottom.Count; i++) blobsToCheck.Add(bottom[i]); } else { // We are checking the bottom, therefore we must ensure that the blob checked exists. Assert.True(checkBottom < bottom.Count); blobsToCheck.Add(bottom[checkBottom]); propagateDown[checkBottom] = true; } //TODO Add a general random generator that layers should use, to ensure we always apply it when layers are non-deterministic. // Compute the gradient analytically using Backward // Get any loss from the layer double computedObjective = layer.Forward(bottom, top); // Get additional loss from the objective computedObjective += GetObjectiveAndGradient(top, topId, topDataId); layer.Backward(top, propagateDown, bottom); // Store computed gradients for all checked blobs var computedGradientsBlob = new Tensor[blobsToCheck.Count]; for ( int blobId = 0; blobId < blobsToCheck.Count; blobId++ ) { var currentBlob = blobsToCheck[blobId]; computedGradientsBlob[blobId] = new Tensor(currentBlob); using (var currentBlobCpu = currentBlob.OnCpu()) using (var computedGradientsBlobCpu = computedGradientsBlob[blobId].OnCpu()) { var currentDiff = currentBlobCpu.Diff; var computedGradients = computedGradientsBlobCpu.Data; currentDiff.CopyTo(computedGradients); } } // Compute derivative of top w.r.t. each bottom and parameter input using // finite differencing. for (int blobId = 0; blobId < blobsToCheck.Count; blobId++ ) { var currentBlob = blobsToCheck[blobId]; using (var currentBlobCpu = currentBlob.OnCpu()) using (var computedGradientsBlobCpu = computedGradientsBlob[blobId].OnCpu()) { var computedGradients = computedGradientsBlobCpu.Data; for (int featId = 0; featId < currentBlob.Count; featId++) { // For an element-wise layer, we only need to do finite differencing to // compute the derivative of topData[top_id][top_data_id] w.r.t. // bottomData[blob_id][i] only for i == top_data_id. For any other // i != top_data_id, we know the derivative is 0 by definition, and simply // check that that's true. double estimatedGradient = 0; if (!elementwise || featId == topDataId) { //TODO Add a general random generator that layers should use, to ensure we always apply it when layers are non-deterministic. // Do finite differencing. // Compute loss with step-size added to input. currentBlobCpu.Data[featId] += step; double positiveObjective = layer.Forward(bottom, top); positiveObjective += GetObjectiveAndGradient(top, topId, topDataId); // Compute loss with step-size subtracted from input. currentBlobCpu.Data[featId] -= step * 2; //TODO Add a general random generator that layers should use, to ensure we always apply it when layers are non-deterministic. double negativeObjective = layer.Forward(bottom, top); negativeObjective += GetObjectiveAndGradient(top, topId, topDataId); // Recover original input value. currentBlobCpu.Data[featId] += step; estimatedGradient = (positiveObjective - negativeObjective) / step / 2.0d; } double computedGradient = computedGradients[featId]; double feature = currentBlobCpu.Data[featId]; if (kink - kinkRange > Math.Abs(feature) || Math.Abs(feature) > kink + kinkRange) { // We check relative accuracy, but for too small values, we threshold // the scale factor by 1 double scale = Math.Max(Math.Max(Math.Abs(computedGradient), Math.Abs(estimatedGradient)), 1.0d); Assert.InRange(computedGradient - estimatedGradient, -threshold * scale, threshold * scale); } } } } }
/// <summary> /// Computes softmax cross entropy cost and gradients to backpropagate. /// </summary> /// <param name="features"></param> /// <param name="labels"></param> /// <param name="name"></param> /// <returns></returns> public static (Tensor, Tensor) softmax_cross_entropy_with_logits(Tensor features, Tensor labels, string name = null) { var _op = _op_def_lib._apply_op_helper("SoftmaxCrossEntropyWithLogits", name: name, args: new { features, labels }); return(_op.outputs[0], _op.outputs[1]); }
/// <summary> /// Method with AutoGraph attribute will be converted to FuncGraph /// when it's invoked for the first time. /// </summary> /// <param name="a"></param> /// <param name="b"></param> /// <returns></returns> // [AutoGraph] Tensor Mul(Tensor a, Tensor b) { return(a * b); }
/// <summary> /// Computes softmax cross entropy cost and gradients to backpropagate. /// </summary> /// <param name="features"> /// batch_size x num_classes matrix /// </param> /// <param name="labels"> /// batch_size vector with values in [0, num_classes). /// This is the label for the given minibatch entry. /// </param> /// <param name="name"> /// If specified, the created operation in the graph will be this one, otherwise it will be named 'SparseSoftmaxCrossEntropyWithLogits'. /// </param> /// <returns> /// Returns a tuple with multiple values, as follows: /// loss : Per example loss (batch_size vector). /// backprop : backpropagated gradients (batch_size x num_classes matrix). /// The Operation can be fetched from any of the Tensorreturned in the tuple values, by fetching the Operation property. /// </returns> /// <remarks> /// Unlike <c>SoftmaxCrossEntropyWithLogits</c>, this operation does not accept /// a matrix of label probabilities, but rather a single label per row /// of features. This label is considered to have probability 1.0 for the /// given row. /// /// Inputs are the logits, not probabilities. /// </remarks> public static (Tensor loss, Tensor backprop) sparse_softmax_cross_entropy_with_logits(Tensor features, Tensor labels, string name = "SparseSoftmaxCrossEntropyWithLogits") { var op = _op_def_lib._apply_op_helper("SparseSoftmaxCrossEntropyWithLogits", name: name, args: new { features, labels }); int _idx = 0; var loss = op.outputs[_idx++]; var backprop = op.outputs[_idx++]; return(loss, backprop); }
public void ExecuteTest() { const int length = 256, ch = 8; Random rd = new Random(1234); int[] idxes = (new int[length * ch]).Select((_, idx) => idx).ToArray(); float[] x1 = (new float[length * ch]).Select((_) => (float)rd.Next(5)).ToArray(); float[] x2 = (new float[length * ch]).Select((_) => (float)rd.Next(5)).ToArray(); { Tensor t1 = new Tensor(Shape.Map1D(ch, length), x1); Tensor t2 = new Tensor(Shape.Map1D(ch, length), x2); Tensor o = NotEqual(t1, t2); AssertError.Tolerance(idxes.Select((idx) => x1[idx] != x2[idx] ? 1f : 0f).ToArray(), o.State, 1e-7f, 1e-5f); } { Tensor t1 = new Tensor(Shape.Map1D(ch, length), x1); Tensor o = NotEqual(t1, t1); AssertError.Tolerance(idxes.Select((idx) => x1[idx] != x1[idx] ? 1f : 0f).ToArray(), o.State, 1e-7f, 1e-5f); } { InputNode t1 = new Tensor(Shape.Map1D(ch, length), x1); InputNode t2 = new Tensor(Shape.Map1D(ch, length), x2); var n1 = t1 + 0; var n2 = t2 + 0; OutputNode o = VariableNode.NotEqual(n1, n2).Save(); Flow flow = Flow.FromOutputs(o); flow.Execute(); AssertError.Tolerance(idxes.Select((idx) => x1[idx] != x2[idx] ? 1f : 0f).ToArray(), o.State, 1e-7f, 1e-5f); } { InputNode t1 = new Tensor(Shape.Map1D(ch, length), x1); InputNode t2 = new Tensor(Shape.Map1D(ch, length), x2); var n1 = t1 + 0; OutputNode o = VariableNode.NotEqual(n1, t2).Save(); Flow flow = Flow.FromOutputs(o); flow.Execute(); AssertError.Tolerance(idxes.Select((idx) => x1[idx] != x2[idx] ? 1f : 0f).ToArray(), o.State, 1e-7f, 1e-5f); } { InputNode t1 = new Tensor(Shape.Map1D(ch, length), x1); InputNode t2 = new Tensor(Shape.Map1D(ch, length), x2); var n2 = t2 + 0; OutputNode o = VariableNode.NotEqual(t1, n2).Save(); Flow flow = Flow.FromOutputs(o); flow.Execute(); AssertError.Tolerance(idxes.Select((idx) => x1[idx] != x2[idx] ? 1f : 0f).ToArray(), o.State, 1e-7f, 1e-5f); } { InputNode t1 = new Tensor(Shape.Map1D(ch, length), x1); var n1 = t1 + 0; OutputNode o = VariableNode.NotEqual(n1, n1).Save(); Flow flow = Flow.FromOutputs(o); flow.Execute(); AssertError.Tolerance(idxes.Select((idx) => x1[idx] != x1[idx] ? 1f : 0f).ToArray(), o.State, 1e-7f, 1e-5f); } }
public static Tensor Invoke(CudaReduceKernels reduceKernels, string kernelName, float init, ReduceInitType initType, Tensor result, Tensor src, int dim, object extraArg = null) { if (src.DimensionCount == 0) { return(result); } var context = CudaHelpers.TSContextForTensor(src); var cudaContext = context.CudaContextForTensor(src); var requiredOutputSize = (long[])src.Sizes.Clone(); requiredOutputSize[dim] = 1; var writeTarget = TensorResultBuilder.GetWriteTarget(result, src, false, requiredOutputSize); ThrowIfAnyTensorInvalid(writeTarget, src); var inElements = src.ElementCount(); var reductionSize = src.Sizes[dim]; var reductionStride = src.Strides[dim]; var outElements = inElements / reductionSize; var contigReduction = reductionStride == 1; // We must make sure that when the tensor is passed to the kernel, src.Sizes[dim] is set to 1 // This includes for the purposes of determining which tensor specializations to use (changing // the dimension size to 1 may make the tensor non-contiguous var newSizes = (long[])src.Sizes.Clone(); newSizes[dim] = 1; var srcSlim = new Tensor(newSizes, src.Strides, src.Storage, src.StorageOffset); var config = new ApplySpecialization(writeTarget, srcSlim); object totalSlices = config.Use32BitIndices ? (uint)outElements : (ulong)outElements; object reductionSizeTyped = config.Use32BitIndices ? (uint)reductionSize : (ulong)reductionSize; object reductionStrideTyped = config.Use32BitIndices ? (uint)reductionStride : (ulong)reductionStride; object initValueTyped = ReduceInitConverter.GetInitValue(init, initType, src.ElementType); var ptx = reduceKernels.GetPtx(context.Compiler); if (contigReduction) { var block = GetContigReduceBlock(cudaContext, outElements, reductionSize); var grid = GetContigReduceGrid(outElements); uint smemSize = (uint)src.ElementType.Size() * block.x; var fullName = "contig_" + PermutationGenerator.GetMangledName(kernelName, config); if (extraArg == null) { InvokeReduce(context, cudaContext, ptx, fullName, grid, block, smemSize, config, writeTarget, srcSlim, reductionSizeTyped, totalSlices, initValueTyped); } else { InvokeReduce(context, cudaContext, ptx, fullName, grid, block, smemSize, config, writeTarget, srcSlim, reductionSizeTyped, totalSlices, initValueTyped, extraArg); } } else { var deviceProps = context.DeviceInfoForContext(cudaContext); var block = GetNonContigReduceBlock(deviceProps); var grid = GetNoncontigReduceGrid(deviceProps, outElements); uint smemSize = 0; var fullName = "noncontig_" + PermutationGenerator.GetMangledName(kernelName, config); if (extraArg == null) { InvokeReduce(context, cudaContext, ptx, fullName, grid, block, smemSize, config, writeTarget, srcSlim, reductionStrideTyped, reductionSizeTyped, totalSlices, initValueTyped); } else { InvokeReduce(context, cudaContext, ptx, fullName, grid, block, smemSize, config, writeTarget, srcSlim, reductionStrideTyped, reductionSizeTyped, totalSlices, initValueTyped, extraArg); } } return(writeTarget); }
public Tensor Std(Tensor result, Tensor src, int dimension, bool normByN) { return(this.RunVarOp(result, src, dimension, normByN, true)); }
public bool CanHandle(Tensor x, Tensor y = null) { throw new NotImplementedException(); }
/// <summary> /// Cudas the context for tensor. /// </summary> /// <param name="tensor">The tensor.</param> /// <returns>CudaContext.</returns> public CudaContext CudaContextForTensor(Tensor tensor) { return(CudaContextForDevice(CudaHelpers.GetDeviceId(tensor))); }
private static Tensor _BroadcastMul(Tensor vec, Tensor mat) { vec = array_ops.expand_dims(vec, -1); return(vec * mat); }
/// <summary> /// DNNs for tensor. /// </summary> /// <param name="tensor">The tensor.</param> /// <returns>PooledObject<ManagedCuda.CudaDNN.CudaDNNContext>.</returns> public PooledObject <ManagedCuda.CudaDNN.CudaDNNContext> DNNForTensor(Tensor tensor) { var deviceId = CudaHelpers.GetDeviceId(tensor); return(devices[deviceId].DnnHandles.Get()); }
public PythonObject(Tensor t) { this._pobj = t.PyObject; }
public abstract void Correct(Tensor weights, Tensor gradients, Dictionary <string, Tensor> parameters, bool resetDw, int iteration);
public RecognitionResult[] Recognize(Tensor image) { HashSet <string> opNames = new HashSet <string>(); HashSet <string> couldBeInputs = new HashSet <string>(); HashSet <string> couldBeOutputs = new HashSet <string>(); foreach (Operation op in _graph) { String name = op.Name; opNames.Add(name); if (op.NumInputs == 0 && op.OpType.Equals("Placeholder")) { couldBeInputs.Add(op.Name); } foreach (Output output in op.Outputs) { int[] shape = _graph.GetTensorShape(output); if (output.NumConsumers == 0) { couldBeOutputs.Add(name); } } } Output input = _graph["image_tensor"]; Output[] outputs = new Output[] { _graph["detection_boxes"], _graph["detection_scores"], _graph["detection_classes"], _graph["num_detections"], _graph["detection_masks"] }; Tensor[] finalTensor = _session.Run(new Output[] { input }, new Tensor[] { image }, outputs); int numDetections = (int)(finalTensor[3].Data as float[])[0]; float[,,] detectinBoxes = finalTensor[0].JaggedData as float[, , ]; float[,] detectionScores = finalTensor[1].JaggedData as float[, ]; float[,] detectionClasses = finalTensor[2].JaggedData as float[, ]; float[,,,] detectionMask = finalTensor[4].JaggedData as float[, , , ]; List <RecognitionResult> results = new List <RecognitionResult>(); int numberOfClasses = detectionScores.GetLength(1); for (int i = 0; i < numDetections; i++) { RecognitionResult r = new RecognitionResult(); r.Class = (int)detectionClasses[0, i]; r.Label = Labels[r.Class - 1]; r.Probability = detectionScores[0, i]; r.Region = new float[] { detectinBoxes[0, i, 0], detectinBoxes[0, i, 1], detectinBoxes[0, i, 2], detectinBoxes[0, i, 3] }; results.Add(r); float[,] m = new float[detectionMask.GetLength(2), detectionMask.GetLength(3)]; for (int j = 0; j < m.GetLength(0); j++) { for (int k = 0; k < m.GetLength(1); k++) { m[j, k] = detectionMask[0, i, j, k]; } } r.Mask = m; } return(results.ToArray()); }
public static NamedOnnxValue CreateFromTensor <T>(string name, Tensor <T> value) { return(new NamedOnnxValue(name, value)); }
public void CheckExhaustive( Layer layer, Tensor bottom, Tensor top, int checkBottom = -1) { this.CheckExhaustive(layer, new TensorCollection { bottom }, new TensorCollection { top }, checkBottom); }
/// <summary>大なりイコール</summary> public static Tensor GreaterThanOrEqual(Tensor x1, Tensor x2) { return(BinaryArithmetric(x1, x2, new Operators.LogicalArithmetric.GreaterThanOrEqual(x1.Shape))); }
Tensor Add(Tensor a, Tensor b) { return(a + b); }
public void CheckEltwise( Layer layer, Tensor bottom, Tensor top) { this.CheckEltwise(layer, new TensorCollection { bottom }, new TensorCollection { top }); }
/// <summary>大なりイコール</summary> public static Tensor GreaterThanOrEqual(float c, Tensor x) { return(BinaryLeftConstantArithmetric(x, new Operators.LogicalArithmetric.GreaterThanOrEqualLeftConstant(c, x.Shape))); }
public void CheckSingle( Layer layer, Tensor bottom, Tensor top, int checkBottom, int topId, int topDataId, bool elementWise = false) { this.CheckSingle(layer, new TensorCollection { bottom }, new TensorCollection { top }, checkBottom, topId, topDataId, elementWise); }
public List <Tensor> get_updates(List <Tensor> parameters, Dictionary <Tensor, IWeightConstraint> constraints, Tensor loss) { // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/optimizers.py#L221 List <Tensor> grads = this.get_gradients(loss, parameters); List <int?[]> shapes = parameters.Select(p => K.get_variable_shape(p)).ToList(); List <Tensor> accumulators = shapes.Select(shape => K.zeros(shape)).ToList(); this.weights = accumulators; this.updates = new List <Tensor>(); Tensor lr = this.lr; if (this.initial_decay > 0) { lr = lr * (1.0 / (1.0 + this.decay * this.iterations)); this.updates.Add(K.update_add(this.iterations, 1)); } for (int i = 0; i < parameters.Count; i++) { Tensor p = parameters[i]; Tensor g = grads[i]; Tensor a = accumulators[i]; // update accumulator Tensor new_a = this.rho * a + (1.0 - this.rho) * K.square(g); this.updates.Add(K.update(a, new_a)); Tensor new_p = p - lr * g / (K.sqrt(new_a) + this.epsilon); // apply constraints if (constraints.ContainsKey(p)) { IWeightConstraint c = constraints[p]; new_p = c.Call(new_p); } this.updates.Add(K.update(p, new_p)); } return(this.updates); }
public VdCnn(int alphabet_size, int document_max_len, int num_class) { embedding_size = 16; filter_sizes = new int[] { 3, 3, 3, 3, 3 }; num_filters = new int[] { 64, 64, 128, 256, 512 }; num_blocks = new int[] { 2, 2, 2, 2 }; learning_rate = 0.001f; cnn_initializer = tensorflow.keras.initializers.he_normal(); fc_initializer = tf.truncated_normal_initializer(stddev: 0.05f); x = tf.placeholder(tf.int32, new TensorShape(-1, document_max_len), name: "x"); y = tf.placeholder(tf.int32, new TensorShape(-1), name: "y"); is_training = tf.placeholder(tf.@bool, new TensorShape(), name: "is_training"); global_step = tf.Variable(0, trainable: false); // Embedding Layer tf_with(tf.name_scope("embedding"), delegate { var init_embeddings = tf.random_uniform(new int[] { alphabet_size, embedding_size }, -1.0f, 1.0f); embeddings = tf.get_variable("embeddings", initializer: init_embeddings); x_emb = tf.nn.embedding_lookup(embeddings, x); x_expanded = tf.expand_dims(x_emb, -1); }); Tensor conv0 = null; Tensor conv1 = null; Tensor conv2 = null; Tensor conv3 = null; Tensor conv4 = null; Tensor h_flat = null; Tensor fc1_out = null; Tensor fc2_out = null; // First Convolution Layer tf_with(tf.variable_scope("conv-0"), delegate { conv0 = tf.layers.conv2d(x_expanded, filters: num_filters[0], kernel_size: new int[] { filter_sizes[0], embedding_size }, kernel_initializer: cnn_initializer, activation: tf.nn.relu()); conv0 = tf.transpose(conv0, new int[] { 0, 1, 3, 2 }); }); tf_with(tf.name_scope("conv-block-1"), delegate { conv1 = conv_block(conv0, 1); }); tf_with(tf.name_scope("conv-block-2"), delegate { conv2 = conv_block(conv1, 2); }); tf_with(tf.name_scope("conv-block-3"), delegate { conv3 = conv_block(conv2, 3); }); tf_with(tf.name_scope("conv-block-4"), delegate { conv4 = conv_block(conv3, 4, max_pool: false); }); // ============= k-max Pooling ============= tf_with(tf.name_scope("k-max-pooling"), delegate { var h = tf.transpose(tf.squeeze(conv4, new int[] { -1 }), new int[] { 0, 2, 1 }); var top_k = tf.nn.top_k(h, k: 8, sorted: false)[0]; h_flat = tf.reshape(top_k, new int[] { -1, 512 * 8 }); }); // ============= Fully Connected Layers ============= tf_with(tf.name_scope("fc-1"), scope => { fc1_out = tf.layers.dense(h_flat, 2048, activation: tf.nn.relu(), kernel_initializer: fc_initializer); }); tf_with(tf.name_scope("fc-2"), scope => { fc2_out = tf.layers.dense(fc1_out, 2048, activation: tf.nn.relu(), kernel_initializer: fc_initializer); }); tf_with(tf.name_scope("fc-3"), scope => { logits = tf.layers.dense(fc2_out, num_class, activation: null, kernel_initializer: fc_initializer); predictions = tf.argmax(logits, -1, output_type: tf.int32); }); // ============= Loss and Accuracy ============= tf_with(tf.name_scope("loss"), delegate { var y_one_hot = tf.one_hot(y, num_class); loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits: logits, labels: y_one_hot)); var update_ops = tf.get_collection <object>(tf.GraphKeys.UPDATE_OPS); tf_with(tf.control_dependencies(update_ops.Select(x => (Operation)x).ToArray()), delegate { var adam = tf.train.AdamOptimizer(learning_rate); adam.minimize(loss, global_step: global_step); }); }); }
public void Train(IFlowsheet flowsheet, TextArea ta = null, Eto.OxyPlot.Plot plot = null) { var nl = Environment.NewLine; var g = tf.Graph(); g.as_default(); if (session != null) { session.Dispose(); session = null; } session = tf.Session(graph: g); tf_with(tf.variable_scope("Train"), delegate { if (flowsheet != null) { flowsheet.ShowMessage("Training Started...", IFlowsheet.MessageType.Information); } else { Application.Instance.Invoke(() => { ta.Append("Training Started..." + nl, true); }); } // tf Graph Input var X = tf.placeholder(tf.float32, shape: (-1, n_x), name: "X"); var Y = tf.placeholder(tf.float32, shape: (-1, n_y), name: "Y"); Tensor outlayer = null; var sigma = 1.0f; var weight_initializer = tf.variance_scaling_initializer(mode: "FAN_AVG", uniform: true, factor: sigma); var bias_initializer = tf.zeros_initializer; var n_neurons_1 = Parameters.NumberOfNeuronsOnFirstLayer; var n_neurons_2 = n_neurons_1 / 2; var n_neurons_3 = n_neurons_2 / 2; var n_neurons_4 = n_neurons_3 / 2; RefVariable W_hidden_1, W_hidden_2, W_hidden_3, W_hidden_4, W_out; RefVariable bias_hidden_1, bias_hidden_2, bias_hidden_3, bias_hidden_4, bias_out; Tensor hidden_1, hidden_2, hidden_3, hidden_4; switch (Parameters.NumberOfLayers) { case 2: // Hidden weights W_hidden_1 = tf.Variable(weight_initializer.call(new int[] { n_x, n_neurons_1 }, dtype: TF_DataType.TF_FLOAT), name: "W1"); bias_hidden_1 = tf.Variable(bias_initializer.call(n_neurons_1, dtype: TF_DataType.TF_FLOAT), name: "b1"); W_hidden_2 = tf.Variable(weight_initializer.call(new int[] { n_neurons_1, n_neurons_2 }, dtype: TF_DataType.TF_FLOAT), name: "W2"); bias_hidden_2 = tf.Variable(bias_initializer.call(n_neurons_2, dtype: TF_DataType.TF_FLOAT), name: "b2"); // Output weights W_out = tf.Variable(weight_initializer.call(new int[] { n_neurons_2, n_y }, dtype: TF_DataType.TF_FLOAT), name: "Wout"); bias_out = tf.Variable(bias_initializer.call(n_y, dtype: TF_DataType.TF_FLOAT), name: "bout"); // Hidden layer hidden_1 = tf.nn.relu(tf.add(tf.matmul(X, W_hidden_1), bias_hidden_1), name: "h1"); hidden_2 = tf.nn.relu(tf.add(tf.matmul(hidden_1, W_hidden_2), bias_hidden_2), name: "h2"); // Output layer outlayer = tf.add(tf.matmul(hidden_2, W_out), bias_out, name: "out"); break; case 3: // Hidden weights W_hidden_1 = tf.Variable(weight_initializer.call(new int[] { n_x, n_neurons_1 }, dtype: TF_DataType.TF_FLOAT), name: "W1"); bias_hidden_1 = tf.Variable(bias_initializer.call(n_neurons_1, dtype: TF_DataType.TF_FLOAT), name: "b1"); W_hidden_2 = tf.Variable(weight_initializer.call(new int[] { n_neurons_1, n_neurons_2 }, dtype: TF_DataType.TF_FLOAT), name: "W2"); bias_hidden_2 = tf.Variable(bias_initializer.call(n_neurons_2, dtype: TF_DataType.TF_FLOAT), name: "b2"); W_hidden_3 = tf.Variable(weight_initializer.call(new int[] { n_neurons_2, n_neurons_3 }, dtype: TF_DataType.TF_FLOAT), name: "W3"); bias_hidden_3 = tf.Variable(bias_initializer.call(n_neurons_3, dtype: TF_DataType.TF_FLOAT), name: "b3"); // Output weights W_out = tf.Variable(weight_initializer.call(new int[] { n_neurons_3, n_y }, dtype: TF_DataType.TF_FLOAT), name: "Wout"); bias_out = tf.Variable(bias_initializer.call(n_y, dtype: TF_DataType.TF_FLOAT), name: "bout"); // Hidden layer hidden_1 = tf.nn.relu(tf.add(tf.matmul(X, W_hidden_1), bias_hidden_1), name: "h1"); hidden_2 = tf.nn.relu(tf.add(tf.matmul(hidden_1, W_hidden_2), bias_hidden_2), name: "h2"); hidden_3 = tf.nn.relu(tf.add(tf.matmul(hidden_2, W_hidden_3), bias_hidden_3), name: "h3"); // Output layer outlayer = tf.add(tf.matmul(hidden_3, W_out), bias_out, name: "out"); break; case 4: // Hidden weights W_hidden_1 = tf.Variable(weight_initializer.call(new int[] { n_x, n_neurons_1 }, dtype: TF_DataType.TF_FLOAT), name: "W1"); bias_hidden_1 = tf.Variable(bias_initializer.call(n_neurons_1, dtype: TF_DataType.TF_FLOAT), name: "b1"); W_hidden_2 = tf.Variable(weight_initializer.call(new int[] { n_neurons_1, n_neurons_2 }, dtype: TF_DataType.TF_FLOAT), name: "W2"); bias_hidden_2 = tf.Variable(bias_initializer.call(n_neurons_2, dtype: TF_DataType.TF_FLOAT), name: "b2"); W_hidden_3 = tf.Variable(weight_initializer.call(new int[] { n_neurons_2, n_neurons_3 }, dtype: TF_DataType.TF_FLOAT), name: "W3"); bias_hidden_3 = tf.Variable(bias_initializer.call(n_neurons_3, dtype: TF_DataType.TF_FLOAT), name: "b3"); W_hidden_4 = tf.Variable(weight_initializer.call(new int[] { n_neurons_3, n_neurons_4 }, dtype: TF_DataType.TF_FLOAT), name: "W4"); bias_hidden_4 = tf.Variable(bias_initializer.call(n_neurons_4, dtype: TF_DataType.TF_FLOAT), name: "b4"); // Output weights W_out = tf.Variable(weight_initializer.call(new int[] { n_neurons_4, n_y }, dtype: TF_DataType.TF_FLOAT), name: "Wout"); bias_out = tf.Variable(bias_initializer.call(n_y, dtype: TF_DataType.TF_FLOAT), name: "bout"); // Hidden layer hidden_1 = tf.nn.relu(tf.add(tf.matmul(X, W_hidden_1), bias_hidden_1), name: "h1"); hidden_2 = tf.nn.relu(tf.add(tf.matmul(hidden_1, W_hidden_2), bias_hidden_2), name: "h2"); hidden_3 = tf.nn.relu(tf.add(tf.matmul(hidden_2, W_hidden_3), bias_hidden_3), name: "h3"); hidden_4 = tf.nn.relu(tf.add(tf.matmul(hidden_3, W_hidden_4), bias_hidden_4), name: "h4"); // Output layer outlayer = tf.add(tf.matmul(hidden_4, W_out), bias_out, name: "out"); break; } // Mean squared error var mse = tf.reduce_sum(tf.pow(outlayer - Y, 2.0f), name: "mse"); var learn_rate = tf.constant(Parameters.LearningRate); var opt = tf.train.AdamOptimizer(learn_rate).minimize(mse); // Fit neural net var batch_size = Parameters.BatchSize; var mse_train = new List <float>(); var mse_test = new List <float>(); // Initialize the variables (i.e. assign their default value) var init = tf.global_variables_initializer(); // Run the initializer session.run(init); // Start training var epochs = Parameters.NumberOfEpochs; foreach (var e in range(epochs)) { // Shuffle training data var shuffle_indices = np.random.permutation(np.arange(len(x_train))); var shuffled_x = new NDArray(np.float32, x_train.shape); var shuffled_y = new NDArray(np.float32, y_train.shape); int i0 = 0; foreach (var idx0 in shuffle_indices) { shuffled_x[i0] = x_train[idx0]; shuffled_y[i0] = y_train[idx0]; i0 += 1; } // Minibatch training foreach (var i in range(0, len(y_train) / batch_size)) { var start = i * batch_size; var batch_x = shuffled_x[start.ToString() + ":" + (start + batch_size).ToString(), Slice.All]; var batch_y = shuffled_y[start.ToString() + ":" + (start + batch_size).ToString(), Slice.All]; // Run optimizer with batch session.run(opt, (X, batch_x), (Y, batch_y)); // Show progress var divrem = 0; Math.DivRem(e, 5, out divrem); if (divrem == 0) { // MSE train and test mse_train.Add(session.run(mse, (X, x_train), (Y, y_train))); mse_test.Add(session.run(mse, (X, x_test), (Y, y_test))); if (flowsheet != null) { flowsheet.ShowMessage("Epoch: " + e.ToString(), IFlowsheet.MessageType.Information); flowsheet.ShowMessage("MSE (training): " + mse_train.Last().ToString(), IFlowsheet.MessageType.Information); flowsheet.ShowMessage("MSE (testing): " + mse_test.Last().ToString(), IFlowsheet.MessageType.Information); } else { Application.Instance.Invoke(() => { ta.Append("Epoch: " + e.ToString() + nl, true); ta.Append("MSE (training): " + mse_train.Last().ToString() + nl, true); ta.Append("MSE (testing): " + mse_test.Last().ToString() + nl, true); (plot.Model.Series[0] as OxyPlot.Series.LineSeries).Points.Add(new DataPoint(e, mse_train.Last())); (plot.Model.Series[1] as OxyPlot.Series.LineSeries).Points.Add(new DataPoint(e, mse_test.Last())); plot.Model.InvalidatePlot(true); }); } if (e > 10 && (Math.Abs(mse_train.Last() - mse_train[mse_train.Count - 2]) / mse_train[mse_train.Count - 2] < Parameters.RelativeMSETolerance)) { break; } } } } if (flowsheet != null) { flowsheet.ShowMessage("Training Finished!", IFlowsheet.MessageType.Information); } else { Application.Instance.Invoke(() => { ta.Append("Training Finished!" + nl, true); }); } x_test_unscaled = new NDArray(np.float32, x_test.shape); x_train_unscaled = new NDArray(np.float32, x_train.shape); for (var i = 0; i < x_test.shape[0]; i++) { for (var j = 0; j < x_test.shape[1]; j++) { x_test_unscaled[i][j] = Classes.Utils.UnScale(x_test[i][j], Parameters.MinValues[j], Parameters.MaxValues[j], Parameters.MinScale, Parameters.MaxScale); } } for (var i = 0; i < x_train.shape[0]; i++) { for (var j = 0; j < x_train.shape[1]; j++) { x_train_unscaled[i][j] = Classes.Utils.UnScale(x_train[i][j], Parameters.MinValues[j], Parameters.MaxValues[j], Parameters.MinScale, Parameters.MaxScale); } } var idx = Parameters.Labels.IndexOf(Parameters.Labels_Outputs.First()); y_test_unscaled = new NDArray(np.float32, y_test.shape); y_train_unscaled = new NDArray(np.float32, y_train.shape); for (var i = 0; i < y_test.shape[0]; i++) { for (var j = 0; j < y_test.shape[1]; j++) { y_test_unscaled[i][j] = Classes.Utils.UnScale(y_test[i][j], Parameters.MinValues[idx + j], Parameters.MaxValues[idx + j], Parameters.MinScale, Parameters.MaxScale); } } for (var i = 0; i < y_train.shape[0]; i++) { for (var j = 0; j < y_train.shape[1]; j++) { y_train_unscaled[i][j] = Classes.Utils.UnScale(y_train[i][j], Parameters.MinValues[idx + j], Parameters.MaxValues[idx + j], Parameters.MinScale, Parameters.MaxScale); } } yp_test = session.run(outlayer, (X, x_test)); yp_train = session.run(outlayer, (X, x_train)); yp_test_unscaled = new NDArray(np.float32, yp_test.shape); yp_train_unscaled = new NDArray(np.float32, yp_train.shape); for (var i = 0; i < yp_test.shape[0]; i++) { for (var j = 0; j < yp_test.shape[1]; j++) { yp_test_unscaled[i][j] = Classes.Utils.UnScale(yp_test[i][j], Parameters.MinValues[idx + j], Parameters.MaxValues[idx + j], Parameters.MinScale, Parameters.MaxScale); } } for (var i = 0; i < yp_train.shape[0]; i++) { for (var j = 0; j < yp_train.shape[1]; j++) { yp_train_unscaled[i][j] = Classes.Utils.UnScale(yp_train[i][j], Parameters.MinValues[idx + j], Parameters.MaxValues[idx + j], Parameters.MinScale, Parameters.MaxScale); } } // Testing example var training_cost = session.run(mse, (X, x_train), (Y, y_train)); var testing_cost = session.run(mse, (X, x_test), (Y, y_test)); var diff = Math.Abs((float)training_cost - (float)testing_cost); if (flowsheet != null) { flowsheet.ShowMessage($"Training Cost = {testing_cost}", IFlowsheet.MessageType.Information); flowsheet.ShowMessage($"Testing Cost = {testing_cost}", IFlowsheet.MessageType.Information); flowsheet.ShowMessage($"Absolute MSE = {diff}", IFlowsheet.MessageType.Information); } else { Application.Instance.Invoke(() => { ta.Append($"Training Cost = {testing_cost}" + nl, true); ta.Append($"Testing Cost = {testing_cost}" + nl, true); ta.Append($"Absolute MSE = {diff}" + nl, true); }); } });
public void MaxPoolingLayer_BackwardsRectangularWithSquareKernel(int topLayer) { Contract.Requires(topLayer > 0); const int num = 2; const int channels = 2; var bottom = new Tensor(num, channels, 3, 5); var topList = new Tensor[topLayer]; for (int i = 0; i < topLayer; i++) topList[i] = new Tensor(); var filler = new ConstantFiller(2); filler.Fill(bottom); // Input: 2x 2 channels of: // [2 2 2 2 2] // [2 2 2 2 2] // [2 2 2 2 2] var layer = new MaxPoolingLayer(2, 1, 0); layer.Setup(new TensorCollection { bottom }, topList); layer.Forward(new TensorCollection { bottom }, topList); // Input: 2x 2 channels of: // [1 1 1 1] // [0 0 0 0] using (var topCpu = topList[0].OnCpu()) { var topDiff = topCpu.Diff; for (int i = 0; i < 8 * num * channels; i += 8) { topDiff[i + 0] = 1; topDiff[i + 1] = 1; topDiff[i + 2] = 1; topDiff[i + 3] = 1; } } // Input: 2x 2 channels of: // [1 2 2 2 1] // [1 2 2 2 1] // [0 0 0 0 0] layer.Backward(topList, new[] { true }, new TensorCollection { bottom }); using (var bottomCpu = bottom.OnCpu()) { var bottomDiff = bottomCpu.Diff; for (int i = 0; i < 15 * num * channels; i += 15) { Assert.Equal(1, bottomDiff[i + 0]); Assert.Equal(2, bottomDiff[i + 1]); Assert.Equal(2, bottomDiff[i + 2]); Assert.Equal(2, bottomDiff[i + 3]); Assert.Equal(1, bottomDiff[i + 4]); Assert.Equal(1, bottomDiff[i + 5]); Assert.Equal(2, bottomDiff[i + 6]); Assert.Equal(2, bottomDiff[i + 7]); Assert.Equal(2, bottomDiff[i + 8]); Assert.Equal(1, bottomDiff[i + 9]); Assert.Equal(0, bottomDiff[i + 10]); Assert.Equal(0, bottomDiff[i + 11]); Assert.Equal(0, bottomDiff[i + 12]); Assert.Equal(0, bottomDiff[i + 13]); Assert.Equal(0, bottomDiff[i + 14]); } } }
Tensor Min(Tensor a, Tensor b) { return(tf.cond(a < b, () => a, () => b)); }