Esempio n. 1
0
        void create_network()
        {
            Console.WriteLine("Compute Device: " + computeDevice.AsString());
            imageVariable       = Util.inputVariable(new int[] { 28, 28, 1 }, "image_tensor");
            categoricalVariable = Util.inputVariable(new int[] { 10 }, "label_tensor");

            network = imageVariable;
            network = Layers.Convolution2D(network, 32, new int[] { 3, 3 }, computeDevice, CC.ReLU);
            network = CC.Pooling(network, C.PoolingType.Max, new int[] { 2, 2 }, new int[] { 2 });
            network = Layers.Convolution2D(network, 64, new int[] { 3, 3 }, computeDevice, CC.ReLU);
            network = CC.Pooling(network, C.PoolingType.Max, new int[] { 2, 2 }, new int[] { 2 });
            network = Layers.Convolution2D(network, 64, new int[] { 3, 3 }, computeDevice, CC.ReLU);
            network = Layers.Dense(network, 64, computeDevice, activation: CC.ReLU);
            network = Layers.Dense(network, 10, computeDevice);

            Logging.detailed_summary(network);
            Logging.log_number_of_parameters(network);

            loss_function = CC.CrossEntropyWithSoftmax(network, categoricalVariable);
            eval_function = CC.ClassificationError(network, categoricalVariable);

            learner = CC.AdamLearner(
                new C.ParameterVector(network.Parameters().ToArray()),
                new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size),
                new C.TrainingParameterScheduleDouble(0.9),
                true,
                new C.TrainingParameterScheduleDouble(0.99));

            trainer   = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner }));
            evaluator = CC.CreateEvaluator(eval_function);
        }
Esempio n. 2
0
        public QNetworkConvSimple(int inputWidth, int inputHeight, int inputDepth, int actionSize,
                                  int[] filterSizes, int[] filterDepths, int[] strides, bool[] pooling,
                                  int densehiddenLayers, int densehiddenSize, bool denseUseBias, DeviceDescriptor device, float denseInitialWeightScale = 0.01f)
        {
            Device         = device;
            StateSize      = inputWidth * inputHeight * inputDepth;
            ActionSize     = actionSize;
            InputDimension = new int[3] {
                inputWidth, inputHeight, inputDepth
            };


            //create actor network part
            InputState = CNTKLib.InputVariable(InputDimension, DataType.Float);

            Debug.Assert(filterSizes.Length == strides.Length && filterDepths.Length == filterSizes.Length, "Length of filterSizes,strides and filterDepth are not the same");

            var lastLayer = InputState;

            for (int i = 0; i < filterSizes.Length; ++i)
            {
                //conv layers. Use selu activaion and selu initlaization
                lastLayer = Layers.Convolution2D(lastLayer, filterDepths[i],
                                                 filterSizes[i], filterSizes[i], device, strides[i], true, true, "QConv_" + i, Mathf.Sqrt((1.0f / (filterSizes[i] * filterSizes[i]))));
                lastLayer = new SELUDef().BuildNew(lastLayer, device, "");
                //pooling
                if (pooling[i])
                {
                    lastLayer = CNTKLib.Pooling(lastLayer, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool2");
                }
            }

            lastLayer = CNTKLib.Flatten(lastLayer, new Axis(3), "Flatten");

            //dense layers
            var inputA  = new InputLayerCNTKVar(lastLayer);
            var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None);

            outputA.HasBias            = false;
            outputA.InitialWeightScale = denseInitialWeightScale;
            SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(densehiddenLayers, densehiddenSize, denseUseBias, NormalizationMethod.None, 0, denseInitialWeightScale, new ReluDef()), outputA, device);

            //OutputQs = outputA.GetOutputVariable();
            OutputQs = outputA.GetOutputVariable();
        }
Esempio n. 3
0
        C.Function create_primary_cap(C.Function inputs, int dim_capsule, int n_channels, int[] kernel_size, int[] strides, bool pad)
        {
            var output = Layers.Convolution2D(
                inputs,
                dim_capsule * n_channels,
                kernel_size,
                computeDevice,
                strides: strides,
                use_padding: pad,
                name: "primarycap_conv2d");
            var outputShape = output.Output.Shape.Dimensions;

            System.Diagnostics.Debug.Assert((outputShape[2] == 256) && (outputShape[1] == 6) && (outputShape[0] == 6));

            var num_rows     = (int)(Util.np_prod(outputShape.ToArray()) / dim_capsule);
            var target_shape = new int[] { num_rows, dim_capsule };
            var outputs      = CC.Reshape(output, target_shape, name: "primarycap_reshape");
            var rtrn         = squash(outputs, name: "primarycap_squash", axis: 1);

            return(rtrn);
        }
        /// <summary>
        /// Create a Universal style transfer decoder. There are 5 decoders in UST, use index to specify which one to create.
        /// </summary>
        /// <param name="imageDimension">image dimension for the decoder to work with. Note that it might be different from  the decoder input dimemensions</param>
        /// <param name="index"></param>
        /// <returns></returns>
        protected Function CreateDecoders(Vector2Int imageDimension, PassIndex index)
        {
            Function   encoderLayers;
            int        ind       = (int)index + 1; //1 based index for the names
            Vector2Int inputDims = imageDimension;

            int[] xOffsets = new int[5];
            int[] yOffsets = new int[5];

            for (int i = 1; i < ind; ++i)
            {
                var temp = new Vector2Int(Mathf.CeilToInt(inputDims.x / 2.0f), Mathf.CeilToInt(inputDims.y / 2.0f));
                if (Mathf.Abs(inputDims.x / 2.0f - temp.x) > 0.00001f)
                {
                    xOffsets[i] = 1;
                }
                if (Mathf.Abs(inputDims.y / 2.0f - temp.y) > 0.00001f)
                {
                    yOffsets[i] = 1;
                }
                inputDims = temp;
            }

            //set the channel number
            int inputChannels = 64;

            switch (index)
            {
            case PassIndex.PassOne:
                inputChannels = 64;
                break;

            case PassIndex.PassTwo:
                inputChannels = 128;
                break;

            case PassIndex.PassThree:
                inputChannels = 256;
                break;

            case PassIndex.PassFour:
                inputChannels = 512;
                break;

            case PassIndex.PassFive:
                inputChannels = 512;
                break;

            default:
                inputChannels = 64;
                break;
            }

            //input variables
            Variable prev = Variable.InputVariable(new int[] { inputDims.x, inputDims.y, inputChannels }, DataType.Float, "input");

            //encoderLayers["input"] = prev;

            if (ind >= 5)
            {
                //decoder 5
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "de" + ind + "conv5_1");
                prev = CNTKLib.ReLU(prev);
                prev = Layers.Upsample2D2(prev, xOffsets[4], yOffsets[4]);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "de" + ind + "conv4_4");
                prev = CNTKLib.ReLU(prev);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "de" + ind + "conv4_3");
                prev = CNTKLib.ReLU(prev);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "de" + ind + "conv4_2");
                prev = CNTKLib.ReLU(prev);
            }

            if (ind >= 4)
            {
                //decoder 4
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "de" + ind + "conv4_1");
                prev = CNTKLib.ReLU(prev);
                prev = Layers.Upsample2D2(prev, xOffsets[3], yOffsets[3]);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "de" + ind + "conv3_4");
                prev = CNTKLib.ReLU(prev);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "de" + ind + "conv3_3");
                prev = CNTKLib.ReLU(prev);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "de" + ind + "conv3_2");
                prev = CNTKLib.ReLU(prev);
            }

            if (ind >= 3)
            {
                //decoder 3
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 128, 3, 3, device, 1, false, true, "de" + ind + "conv3_1");
                prev = CNTKLib.ReLU(prev);
                prev = Layers.Upsample2D2(prev, xOffsets[2], yOffsets[2]);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 128, 3, 3, device, 1, false, true, "de" + ind + "conv2_2");
                prev = CNTKLib.ReLU(prev);
            }

            if (ind >= 2)
            {
                //decoder 2
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 64, 3, 3, device, 1, false, true, "de" + ind + "conv2_1");
                prev = CNTKLib.ReLU(prev);
                prev = Layers.Upsample2D2(prev, xOffsets[1], yOffsets[1]);
                prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
                prev = Layers.Convolution2D(prev, 64, 3, 3, device, 1, false, true, "de" + ind + "conv1_2");
                prev = CNTKLib.ReLU(prev);
            }

            //decoder 1
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 3, 3, 3, device, 1, false, true, "de" + ind + "conv1_1");
            prev = CNTKLib.ReLU(prev, "output");

            //encoderLayers["output"] = prev;
            encoderLayers = prev;
            return(encoderLayers);
        }
        /// <summary>
        /// Create the vgg19 convolutional encoders for UST model
        /// </summary>
        /// <param name="imageDimension"></param>
        /// <returns></returns>
        protected Function CreateEncoders(Vector2Int imageDimension)
        {
            Function       encoderLayers;
            VariableVector outputs = new VariableVector();
            //input variables
            Variable prev = Variable.InputVariable(new int[] { imageDimension.x, imageDimension.y, 3 }, DataType.Float, "input");

            //encoderLayers["input"] = prev;

            //vgg preprocessing
            prev = Layers.Convolution2D(prev, 3, 1, 1, device, 1, false, true, "conv0_preprocessing");
            //encoderLayers["conv0_preprocessing"] = prev;

            //----conv1----
            //conv1_1
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 64, 3, 3, device, 1, false, true, "conv1_1");
            prev = CNTKLib.ReLU(prev, "relu1_1");
            //encoderLayers["relu1_1"] = prev;
            outputs.Add(prev);

            //conv1_2
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 64, 3, 3, device, 1, false, true, "conv1_2");
            prev = CNTKLib.ReLU(prev, "relu1_2");
            //maxpooling 1
            prev = CNTKLib.Pooling(prev, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool1");

            //----conv2----
            //conv2_1
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 128, 3, 3, device, 1, false, true, "conv2_1");
            prev = CNTKLib.ReLU(prev, "relu2_1");
            outputs.Add(prev);
            // encoderLayers["relu2_1"] = prev;
            //conv2_2
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 128, 3, 3, device, 1, false, true, "conv2_2");
            prev = CNTKLib.ReLU(prev, "relu2_2");
            //maxpooling 2
            prev = CNTKLib.Pooling(prev, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool2");

            //----conv3----
            //conv3_1
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "conv3_1");
            prev = CNTKLib.ReLU(prev, "relu3_1");
            outputs.Add(prev);
            //encoderLayers["relu3_1"] = prev;
            //conv3_2
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "conv3_2");
            prev = CNTKLib.ReLU(prev, "relu3_2");
            //conv3_3
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "conv3_3");
            prev = CNTKLib.ReLU(prev, "relu3_3");
            //conv3_4
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 256, 3, 3, device, 1, false, true, "conv3_4");
            prev = CNTKLib.ReLU(prev, "relu3_4");
            //maxpooling 3
            prev = CNTKLib.Pooling(prev, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool3");

            //----conv4----
            //conv4_1
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "conv4_1");
            prev = CNTKLib.ReLU(prev, "relu4_1");
            outputs.Add(prev);
            //encoderLayers["relu4_1"] = prev;
            //conv4_2
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "conv4_2");
            prev = CNTKLib.ReLU(prev, "relu4_2");
            //conv4_3
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "conv4_3");
            prev = CNTKLib.ReLU(prev, "relu4_3");
            //conv4_4
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "conv4_4");
            prev = CNTKLib.ReLU(prev, "relu4_4");
            //maxpooling 4
            prev = CNTKLib.Pooling(prev, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool4");

            //----conv5----
            //conv5_1
            prev = CNTKLib.Pad(prev, PaddingMode.REFLECTPAD, new SizeTVector(new uint[] { 1, 1, 0 }), new SizeTVector(new uint[] { 1, 1, 0 }));
            prev = Layers.Convolution2D(prev, 512, 3, 3, device, 1, false, true, "conv5_1");
            prev = CNTKLib.ReLU(prev, "relu5_1");
            outputs.Add(prev);

            encoderLayers = CNTKLib.Combine(outputs);
            //encoderLayers["relu5_1"] = prev;
            return(encoderLayers);
        }
Esempio n. 6
0
        void create_network()
        {
            imageVariable = Util.inputVariable(input_shape, "image");
            var conv1 = Layers.Convolution2D(
                imageVariable, 256, new int[] { 9, 9 }, computeDevice,
                use_padding: false, activation: CC.ReLU, name: "conv1");

            var primarycaps = create_primary_cap(
                conv1, dim_capsule: 8, n_channels: 32,
                kernel_size: new int[] { 9, 9 }, strides: new int[] { 2, 2 }, pad: false);

            var digitcaps = create_capsule_layer(
                primarycaps, num_capsule: 10, dim_capsule: 16,
                routings: routings, name: "digitcaps");

            var out_caps = get_length_and_remove_last_dimension(digitcaps, name: "capsnet");

            categoricalLabel = Util.inputVariable(new int[] { 10 }, "label");
            var masked_by_y = get_mask_and_infer_from_last_dimension(digitcaps, CC.Combine(new C.VariableVector()
            {
                categoricalLabel
            }));
            var masked = get_mask_and_infer_from_last_dimension(digitcaps, null);

            var decoder = create_decoder(masked.Output.Shape.Dimensions.ToArray());
            var decoder_output_training   = Model.invoke_model(decoder, new C.Variable[] { masked_by_y });
            var decoder_output_evaluation = Model.invoke_model(decoder, new C.Variable[] { masked });

            network = CC.Combine(new C.VariableVector()
            {
                out_caps, decoder_output_training
            }, "overall_training_network");
            Logging.log_number_of_parameters(network);

            // first component of the loss
            var y_true     = categoricalLabel;
            var y_pred     = out_caps;
            var digit_loss = CC.Plus(
                CC.ElementTimes(y_true, CC.Square(CC.ElementMax(DC(0), CC.Minus(DC(0.9), y_pred), ""))),
                CC.ElementTimes(DC(0.5),
                                CC.ElementTimes(CC.Minus(DC(1), y_true), CC.Square(CC.ElementMax(DC(0), CC.Minus(y_pred, DC(0.1)), "")))));

            digit_loss = CC.ReduceSum(digit_loss, C.Axis.AllStaticAxes());

            // second component of the loss
            var num_pixels_at_output = Util.np_prod(decoder_output_training.Output.Shape.Dimensions.ToArray());
            var squared_error        = CC.SquaredError(decoder_output_training, imageVariable);
            var image_mse            = CC.ElementDivide(squared_error, DC(num_pixels_at_output));

            loss_function = CC.Plus(digit_loss, CC.ElementTimes(DC(0.35), image_mse));
            eval_function = CC.ClassificationError(y_pred, y_true);

            learner = CC.AdamLearner(
                new C.ParameterVector(network.Parameters().ToArray()),
                new C.TrainingParameterScheduleDouble(0.001 * batch_size, (uint)batch_size),
                new C.TrainingParameterScheduleDouble(0.9),
                true,
                new C.TrainingParameterScheduleDouble(0.99));

            trainer   = CC.CreateTrainer(network, loss_function, eval_function, new C.LearnerVector(new C.Learner[] { learner }));
            evaluator = CC.CreateEvaluator(eval_function);
        }