Ejemplo n.º 1
0
        public void TestCreateTrainingModel()
        {
            ModelBuilder builder = create();

            NetParameter net_param = builder.CreateModel();
            RawProto     proto     = net_param.ToProto("root");
            string       strNet    = proto.ToString();

            RawProto     proto2     = RawProto.Parse(strNet);
            NetParameter net_param2 = NetParameter.FromProto(proto2);

            m_log.CHECK(net_param2.Compare(net_param), "The two net parameters should be the same!");

            // verify creating the model.
            SolverParameter solver      = builder.CreateSolver();
            RawProto        protoSolver = solver.ToProto("root");
            string          strSolver   = protoSolver.ToString();

            SettingsCaffe      settings  = new SettingsCaffe();
            CancelEvent        evtCancel = new CancelEvent();
            MyCaffeControl <T> mycaffe   = new MyCaffeControl <T>(settings, m_log, evtCancel);

            save(strNet, strSolver, false);

            //            mycaffe.LoadLite(Phase.TRAIN, strSolver, strNet, null);
            mycaffe.Dispose();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// The constructor.
        /// </summary>
        /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
        /// <param name="log">Specifies the output log.</param>
        /// <param name="evtCancel">Specifies the cancel event used to abort processing.</param>
        /// <param name="strModelType">Specifies the model type: 'vgg19', 'vgg16'</param>
        /// <param name="strModel">Specifies the network model to use.</param>
        /// <param name="rgWeights">Optionally, specifies the weights to use (or <i>null</i> to ignore).</param>
        /// <param name="bCaffeModel">Specifies whether or not the weights are in the caffe (<i>true</i>) or mycaffe (<i>false</i>) format.</param>
        /// <param name="solverType">Optionally, specifies the solver type to use (default = LBFGS).</param>
        /// <param name="dfLearningRate">Optionally, specifies the solver learning rate (default = 1.0).</param>
        public NeuralStyleTransfer(CudaDnn <T> cuda, Log log, CancelEvent evtCancel, string strModelType, string strModel, byte[] rgWeights, bool bCaffeModel, SolverParameter.SolverType solverType = SolverParameter.SolverType.LBFGS, double dfLearningRate = 1.0)
        {
            m_cuda           = cuda;
            m_log            = log;
            m_evtCancel      = evtCancel;
            m_rgWeights      = rgWeights;
            m_solverType     = solverType;
            m_dfLearningRate = dfLearningRate;

            if (m_evtCancel != null)
            {
                m_evtCancel.Reset();
            }

            RawProto proto = RawProto.Parse(strModel);

            m_param = NetParameter.FromProto(proto);

            add_input_layer(m_param);
            m_rgstrUsedLayers = load_layers(strModelType);
            prune(m_param, m_rgstrUsedLayers);
            add_gram_layers(m_param);

            m_transformationParam             = new TransformationParameter();
            m_transformationParam.color_order = (bCaffeModel) ? TransformationParameter.COLOR_ORDER.BGR : TransformationParameter.COLOR_ORDER.RGB;
            m_transformationParam.scale       = 1.0;
            m_transformationParam.mean_value  = m_rgMeanValues;

            m_persist = new PersistCaffe <T>(m_log, false);
        }
Ejemplo n.º 3
0
        private void prune(NetParameter p, List <string> rgUsedLayers)
        {
            int nPruneFrom = -1;

            // We assume that all layers after the used layers are not useful.
            for (int i = 0; i < p.layer.Count; i++)
            {
                for (int j = 0; j < p.layer[i].top.Count; j++)
                {
                    bool bIsUsed = rgUsedLayers.Contains(p.layer[i].top[j]);

                    if (nPruneFrom >= 0 && bIsUsed)
                    {
                        nPruneFrom = -1;
                        break;
                    }
                    else if (nPruneFrom < 0 && !bIsUsed)
                    {
                        nPruneFrom = i;
                    }
                }
            }

            if (nPruneFrom >= 0)
            {
                for (int i = p.layer.Count - 1; i >= nPruneFrom; i--)
                {
                    m_log.WriteLine("Pruning layer '" + p.layer[i].name);
                    p.layer.RemoveAt(i);
                }
            }
        }
        ////////////////////////////////////////////////////////////////////////////////////////////////////
        /// <summary>   Function for branching-none model. </summary>
        ///
        /// <param name="path"> Full pathname of the file. </param>
        ///
        /// <returns>   A List&lt;Function&gt; </returns>
        ////////////////////////////////////////////////////////////////////////////////////////////////////

        public static List <Function> ModelLoad(string path)
        {
            List <Function> result = new List <Function>();

            using (FileStream stream = new FileStream(path, FileMode.Open))
            {
                NetParameter netparam = Serializer.Deserialize <NetParameter>(stream);

                foreach (V1LayerParameter layer in netparam.Layers)
                {
                    Function func = CreateFunction(layer);

                    if (func != null)
                    {
                        result.Add(func);
                    }
                }

                foreach (LayerParameter layer in netparam.Layer)
                {
                    Function func = CreateFunction(layer);

                    if (func != null)
                    {
                        result.Add(func);
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// The PreprocessInput allows derivative data layers to convert a property set of input
        /// data into the bottom blob collection used as intput.
        /// </summary>
        /// <param name="customInput">Specifies the custom input data.</param>
        /// <param name="colBottom">Optionally, specifies the bottom data to fill.</param>
        /// <returns>The bottom data is returned.</returns>
        /// <remarks>The blobs returned should match the blob descriptions returned in the LayerParameter's
        /// overrides for 'PrepareRunModelInputs' and 'PrepareRunModel'.</remarks>
        public override BlobCollection <T> PreProcessInput(PropertySet customInput, BlobCollection <T> colBottom = null)
        {
            if (colBottom == null)
            {
                string   strInput = m_param.PrepareRunModelInputs();
                RawProto proto    = RawProto.Parse(strInput);
                Dictionary <string, BlobShape> rgInput = NetParameter.InputFromProto(proto);
                colBottom = new BlobCollection <T>();

                foreach (KeyValuePair <string, BlobShape> kv in rgInput)
                {
                    Blob <T> blob = new Blob <T>(m_cuda, m_log);
                    blob.Name = kv.Key;
                    blob.Reshape(kv.Value);
                    colBottom.Add(blob);
                }
            }

            string strEncInput = customInput.GetProperty("InputData");

            if (strEncInput == null)
            {
                throw new Exception("Could not find the expected input property 'InputData'!");
            }

            PreProcessInput(strEncInput, null, colBottom);

            return(colBottom);
        }
        ////////////////////////////////////////////////////////////////////////////////////////////////////
        /// <summary>   Functions for branched models. </summary>
        ///
        /// <param name="path"> Full pathname of the file. </param>
        ///
        /// <returns>   The net work. </returns>
        ////////////////////////////////////////////////////////////////////////////////////////////////////

        public static FunctionDictionary LoadNetWork(string path)
        {
            FunctionDictionary functionDictionary = new FunctionDictionary();

            using (FileStream stream = new FileStream(path, FileMode.Open))
            {
                NetParameter netparam = Serializer.Deserialize <NetParameter>(stream);

                foreach (V1LayerParameter layer in netparam.Layers)
                {
                    Function func = CreateFunction(layer);

                    if (func != null)
                    {
                        functionDictionary.Add(func);
                    }
                }

                foreach (LayerParameter layer in netparam.Layer)
                {
                    Function func = CreateFunction(layer);

                    if (func != null)
                    {
                        functionDictionary.Add(func);
                    }
                }
            }

            return(functionDictionary);
        }
Ejemplo n.º 7
0
        //分岐なしモデル用関数
        public static List <Function <T> > ModelLoad <T>(string path) where T : unmanaged, IComparable <T>
        {
            List <Function <T> > result = new List <Function <T> >();

            using (FileStream stream = new FileStream(path, FileMode.Open))
            {
                NetParameter netparam = Serializer.Deserialize <NetParameter>(stream);

                foreach (V1LayerParameter layer in netparam.Layers)
                {
                    Function <T> func = CreateFunction <T>(layer);

                    if (func != null)
                    {
                        result.Add(func);
                    }
                }

                foreach (LayerParameter layer in netparam.Layer)
                {
                    Function <T> func = CreateFunction <T>(layer);

                    if (func != null)
                    {
                        result.Add(func);
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 8
0
        //分岐ありモデル用関数
        public static FunctionDictionary <T> LoadNetWork <T>(string path) where T : unmanaged, IComparable <T>
        {
            FunctionDictionary <T> functionDictionary = new FunctionDictionary <T>();

            using (FileStream stream = new FileStream(path, FileMode.Open))
            {
                NetParameter netparam = Serializer.Deserialize <NetParameter>(stream);

                foreach (V1LayerParameter layer in netparam.Layers)
                {
                    Function <T> func = CreateFunction <T>(layer);

                    if (func != null)
                    {
                        functionDictionary.Add(func);
                    }
                }

                foreach (LayerParameter layer in netparam.Layer)
                {
                    Function <T> func = CreateFunction <T>(layer);

                    if (func != null)
                    {
                        functionDictionary.Add(func);
                    }
                }
            }

            return(functionDictionary);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Load the MNIST LeNet model and set its sources to the MNIST dataset (already loaded
        /// in the database using the MyCaffeTestApplication).
        /// </summary>
        /// <param name="ds">Specifies the MNIST dataset descriptor.</param>
        /// <returns>The NetParameter for the LeNet is returned.</returns>
        public NetParameter CreateMnistModel(DatasetDescriptor ds)
        {
            string       str      = System.Text.Encoding.Default.GetString(Properties.Resources.lenet_train_test);
            RawProto     proto    = RawProto.Parse(str);
            NetParameter netParam = NetParameter.FromProto(proto);

            for (int i = 0; i < netParam.layer.Count; i++)
            {
                LayerParameter layer = netParam.layer[i];

                if (layer.type == LayerParameter.LayerType.DATA)
                {
                    if (layer.include[0].phase == Phase.TRAIN)
                    {
                        layer.data_param.source = ds.TrainingSourceName;
                    }
                    else
                    {
                        layer.data_param.source = ds.TestingSourceName;
                    }
                }
            }

            return(netParam);
        }
Ejemplo n.º 10
0
        public BeamSearchTest2(string strName, int nDeviceID, EngineParameter.Engine engine)
            : base(strName, new List <int>() { 3, 2, 4, 1 }, nDeviceID)
        {
            m_engine = engine;

            NetParameter net_param = new NetParameter();

            LayerParameter input = new LayerParameter(LayerParameter.LayerType.INPUT);

            input.input_param.shape.Add(new BlobShape(new List <int>()
            {
                1, 1, 1
            }));
            input.input_param.shape.Add(new BlobShape(new List <int>()
            {
                80, 1, 1
            }));
            input.input_param.shape.Add(new BlobShape(new List <int>()
            {
                80, 1, 1
            }));
            input.input_param.shape.Add(new BlobShape(new List <int>()
            {
                80, 1
            }));
            input.top.Add("dec");
            input.top.Add("enc");
            input.top.Add("encr");
            input.top.Add("encc");
            net_param.layer.Add(input);

            string strModel = net_param.ToProto("root").ToString();

            m_net = new Net <T>(m_cuda, m_log, net_param, new CancelEvent(), null);
            InputLayerEx <T> layer = new InputLayerEx <T>(m_cuda, m_log, m_net.layers[0]);

            layer.OnGetData += Layer_OnGetData;
            m_net.layers[0]  = layer;

            m_rgTestSequences.Add("rdany but you can call me dany");
            m_rgTestSequences.Add("rdany call me dany");
            m_rgTestSequences.Add("rdany you can call me dany");
            m_rgTestSequences.Add("my name is dany");
            m_rgTestSequences.Add("call me dany");
            m_rgrgTestSequenceIndexes = new List <List <int> >();

            foreach (string strSequence in m_rgTestSequences)
            {
                string[]   rgstrWords = strSequence.Split(' ');
                List <int> rgIdx      = new List <int>();

                foreach (string strWord in rgstrWords)
                {
                    int nIdx = layer.Vocabulary.WordToIndex(strWord);
                    rgIdx.Add(nIdx);
                }

                m_rgrgTestSequenceIndexes.Add(rgIdx);
            }
        }
Ejemplo n.º 11
0
 public virtual TsParameter GetTsParameter(NetParameter netParameter)
 {
     return(new TsParameter
     {
         Name = GetTsName(netParameter.Name),
         FieldType = GetTsType(netParameter.FieldType),
         IsNullable = IsFieldNullable(netParameter.FieldType)
     });
 }
Ejemplo n.º 12
0
        /// <summary>
        /// The ResizeModel method gives the custom trainer the opportunity to resize the model if needed.
        /// </summary>
        /// <param name="strModel">Specifies the model descriptor.</param>
        /// <param name="rgVocabulary">Specifies the vocabulary.</param>
        /// <param name="log">Specifies the output log.</param>
        /// <returns>A new model discriptor is returned (or the same 'strModel' if no changes were made).</returns>
        /// <remarks>Note, this method is called after PreloadData.</remarks>
        string IXMyCaffeCustomTrainerRNN.ResizeModel(Log log, string strModel, BucketCollection rgVocabulary)
        {
            if (rgVocabulary == null || rgVocabulary.Count == 0)
            {
                return(strModel);
            }

            int                   nVocabCount  = rgVocabulary.Count;
            NetParameter          p            = NetParameter.FromProto(RawProto.Parse(strModel));
            string                strEmbedName = "";
            EmbedParameter        embed        = null;
            string                strIpName    = "";
            InnerProductParameter ip           = null;

            foreach (LayerParameter layer in p.layer)
            {
                if (layer.type == LayerParameter.LayerType.EMBED)
                {
                    strEmbedName = layer.name;
                    embed        = layer.embed_param;
                }
                else if (layer.type == LayerParameter.LayerType.INNERPRODUCT)
                {
                    strIpName = layer.name;
                    ip        = layer.inner_product_param;
                }
            }

            if (embed != null)
            {
                if (embed.input_dim != (uint)nVocabCount)
                {
                    log.WriteLine("WARNING: Embed layer '" + strEmbedName + "' input dim changed from " + embed.input_dim.ToString() + " to " + nVocabCount.ToString() + " to accomodate for the vocabulary count.");
                    embed.input_dim = (uint)nVocabCount;
                }
            }

            if (ip.num_output != (uint)nVocabCount)
            {
                log.WriteLine("WARNING: InnerProduct layer '" + strIpName + "' num_output changed from " + ip.num_output.ToString() + " to " + nVocabCount.ToString() + " to accomodate for the vocabulary count.");
                ip.num_output = (uint)nVocabCount;
            }

            m_rgVocabulary = rgVocabulary;

            RawProto proto = p.ToProto("root");

            return(proto.ToString());
        }
Ejemplo n.º 13
0
        /// <summary>
        /// The constructor.
        /// </summary>
        /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
        /// <param name="log">Specifies the output log.</param>
        /// <param name="evtCancel">Specifies the cancel event used to abort processing.</param>
        /// <param name="rgLayers">Specifies the layers along with their style and content weights.</param>
        /// <param name="strModelDesc">Specifies the network model descriptor to use.</param>
        /// <param name="rgWeights">Optionally, specifies the weights to use (or <i>null</i> to ignore).</param>
        /// <param name="bCaffeModel">Specifies whether or not the weights are in the caffe (<i>true</i>) or mycaffe (<i>false</i>) format.</param>
        /// <param name="solverType">Optionally, specifies the solver type to use (default = LBFGS).</param>
        /// <param name="dfLearningRate">Optionally, specifies the solver learning rate (default = 1.0).</param>
        /// <param name="nMaxImageSize">Optionally, specifies the default maximum image size (default = 840).</param>
        /// <param name="nLBFGSCorrections">Optionally, specifies the LBFGS Corrections (only used when using the LBFGS solver, default = 100).</param>
        /// <param name="netShare">Optionally, specifies a net to share.</param>
        public NeuralStyleTransfer(CudaDnn <T> cuda, Log log, CancelEvent evtCancel, Dictionary <string, Tuple <double, double> > rgLayers, string strModelDesc, byte[] rgWeights, bool bCaffeModel, SolverParameter.SolverType solverType = SolverParameter.SolverType.LBFGS, double dfLearningRate = 1.0, int nMaxImageSize = 840, int nLBFGSCorrections = 100, Net <T> netShare = null)
        {
            m_log                  = log;
            m_evtCancel            = evtCancel;
            m_rgWeights            = rgWeights;
            m_solverType           = solverType;
            m_dfLearningRate       = dfLearningRate;
            m_nDefaultMaxImageSize = nMaxImageSize;
            m_nLBFGSCorrections    = nLBFGSCorrections;

            setupNetShare(netShare, cuda);

            if (m_evtCancel != null)
            {
                m_evtCancel.Reset();
            }

            RawProto proto = RawProto.Parse(strModelDesc);

            m_param = NetParameter.FromProto(proto);

            Dictionary <string, double> rgStyle   = new Dictionary <string, double>();
            Dictionary <string, double> rgContent = new Dictionary <string, double>();

            foreach (KeyValuePair <string, Tuple <double, double> > kv in rgLayers)
            {
                if (kv.Value.Item1 != 0)
                {
                    rgStyle.Add(kv.Key, kv.Value.Item1);
                }

                if (kv.Value.Item2 != 0)
                {
                    rgContent.Add(kv.Key, kv.Value.Item2);
                }
            }

            add_input_layer(m_param);
            m_rgstrUsedLayers = load_layers(rgStyle, rgContent);
            prune(m_param, m_rgstrUsedLayers);
            add_gram_layers(m_param);

            m_transformationParam             = new TransformationParameter();
            m_transformationParam.color_order = (bCaffeModel) ? TransformationParameter.COLOR_ORDER.BGR : TransformationParameter.COLOR_ORDER.RGB;
            m_transformationParam.scale       = 1.0;
            m_transformationParam.mean_value  = m_rgMeanValues;

            m_persist = new PersistCaffe <T>(m_log, false);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Train the model.
        /// </summary>
        /// <param name="bNewWts">Specifies whether to use new weights or load existing ones (if they exist).</param>
        public void Train(bool bNewWts)
        {
            if (m_mycaffeTrain == null)
            {
                return;
            }

            byte[] rgWts = null;

            if (!bNewWts)
            {
                rgWts = loadWeights();
            }

            if (rgWts == null)
            {
                Console.WriteLine("Starting with new weights...");
            }

            SolverParameter solver = createSolver();
            NetParameter    model  = createModel();

            string strModel = model.ToProto("root").ToString();

            Console.WriteLine("Using Train Model:");
            Console.WriteLine(strModel);
            Console.WriteLine("Starting training...");

            m_mycaffeTrain.LoadLite(Phase.TRAIN, solver.ToProto("root").ToString(), model.ToProto("root").ToString(), rgWts, false, false);
            m_mycaffeTrain.SetOnTrainingStartOverride(new EventHandler(onTrainingStart));
            m_mycaffeTrain.SetOnTestingStartOverride(new EventHandler(onTestingStart));

            // Set clockwork weights.
            if (m_param.LstmEngine != EngineParameter.Engine.CUDNN)
            {
                Net <float>  net   = m_mycaffeTrain.GetInternalNet(Phase.TRAIN);
                Blob <float> lstm1 = net.parameters[2];
                lstm1.SetData(1, m_param.Hidden, m_param.Hidden);
            }

            m_mycaffeTrain.Train(m_param.Iterations);
            saveLstmState(m_mycaffeTrain);

            Image img = SimpleGraphingControl.QuickRender(m_plots, 1000, 600);

            showImage(img, "training.png");
            saveWeights(m_mycaffeTrain.GetWeights());
        }
Ejemplo n.º 15
0
        private void add_input_layer(NetParameter p)
        {
            List <int>     rgDelIdx   = new List <int>();
            LayerParameter data_param = null;
            LayerParameter input      = null;

            for (int i = 0; i < p.layer.Count; i++)
            {
                if (p.layer[i].type == LayerParameter.LayerType.DATA)
                {
                    if (data_param == null)
                    {
                        data_param        = p.layer[i];
                        m_strDataBlobName = data_param.top[0];
                    }

                    rgDelIdx.Add(i);
                }
                else if (p.layer[i].type == LayerParameter.LayerType.INPUT)
                {
                    input             = p.layer[i];
                    m_strDataBlobName = input.top[0];
                }
            }

            for (int i = rgDelIdx.Count - 1; i >= 0; i--)
            {
                p.layer.RemoveAt(rgDelIdx[i]);
            }

            if (input == null)
            {
                input = new LayerParameter(LayerParameter.LayerType.INPUT);
                int nH = 224;
                int nW = 224;
                input.input_param.shape.Add(new BlobShape(1, 3, nH, nW));
                input.name = "input1";
                input.top.Add(m_strDataBlobName);

                p.layer.Insert(0, input);
            }
            else
            {
                input.name = "input1";
            }
        }
Ejemplo n.º 16
0
        private void add_gram_layers(NetParameter p)
        {
            List <KeyValuePair <string, double> > lstStyle = m_rgLayers["style"].ToList();
            List <KeyValuePair <string, double> > lstGram  = m_rgLayers["gram"].ToList();

            for (int i = 0; i < lstStyle.Count; i++)
            {
                LayerParameter layer    = new LayerParameter(LayerParameter.LayerType.GRAM);
                string         strStyle = lstStyle[i].Key;
                string         strGram  = lstGram[i].Key;

                layer.name = strGram;

                layer.bottom.Add(strStyle);
                layer.top.Add(strGram);
                layer.gram_param.alpha = m_dfStyleDataScale1;
                layer.gram_param.disable_scaling_on_gradient = true;
                layer.gram_param.beta = m_dfStyleDataScale2;

                p.layer.Add(layer);
            }
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Set the parameters needed from the Net, namely the data source used.
        /// </summary>
        /// <param name="np">Specifies the NetParameter used.</param>
        public override void SetNetParameterUsed(NetParameter np)
        {
            base.SetNetParameterUsed(np);

            m_strSource  = null;
            m_nSourceId  = 0;
            m_nProjectID = np.ProjectID;

            foreach (LayerParameter p in np.layer)
            {
                if (p.type == LayerParameter.LayerType.DATA)
                {
                    m_strSource = p.data_param.source;
                    break;
                }
            }

            if (m_strSource != null)
            {
                m_nSourceId = m_db.GetSourceID(m_strSource);
            }
        }
Ejemplo n.º 18
0
        public void TestCreateDeployModel()
        {
            ModelBuilder builder = create();

            NetParameter net_param = builder.CreateDeployModel();
            RawProto     proto     = net_param.ToProto("root");
            string       strNet    = proto.ToString();

            RawProto     proto2     = RawProto.Parse(strNet);
            NetParameter net_param2 = NetParameter.FromProto(proto2);

            m_log.CHECK(net_param2.Compare(net_param), "The two net parameters should be the same!");

            // verify creating the model.
            SettingsCaffe      settings  = new SettingsCaffe();
            CancelEvent        evtCancel = new CancelEvent();
            MyCaffeControl <T> mycaffe   = new MyCaffeControl <T>(settings, m_log, evtCancel);

            save(strNet, null, true);

            //            mycaffe.LoadToRun(strNet, null, new BlobShape(1, 3, 300, 300));
            mycaffe.Dispose();
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Replace the Data input layer with the MemoryData input layer.
        /// </summary>
        /// <param name="strModel">Specifies the model descriptor to change.</param>
        /// <param name="nBatchSize">Specifies the batch size.</param>
        /// <returns>The new model descriptor with the MemoryData layer is returned.</returns>
        private string fixup_model(string strModel, int nBatchSize)
        {
            RawProto     proto     = RawProto.Parse(strModel);
            NetParameter net_param = NetParameter.FromProto(proto);

            for (int i = 0; i < net_param.layer.Count; i++)
            {
                if (net_param.layer[i].type == LayerParameter.LayerType.DATA)
                {
                    LayerParameter layer = new LayerParameter(LayerParameter.LayerType.INPUT);
                    layer.name    = net_param.layer[i].name;
                    layer.top     = net_param.layer[i].top;
                    layer.bottom  = net_param.layer[i].bottom;
                    layer.include = net_param.layer[i].include;

                    layer.input_param.shape.Add(new BlobShape(nBatchSize, 1, 28, 28));
                    layer.input_param.shape.Add(new BlobShape(nBatchSize, 1, 1, 1));
                    net_param.layer[i] = layer;
                }
            }

            return(net_param.ToProto("root").ToString());
        }
Ejemplo n.º 20
0
        /// <summary>
        /// The ResizeModel method gives the custom trainer the opportunity to resize the model if needed.
        /// </summary>
        /// <param name="strModel">Specifies the model descriptor.</param>
        /// <param name="rgVocabulary">Specifies the vocabulary.</param>
        /// <returns>A new model discriptor is returned (or the same 'strModel' if no changes were made).</returns>
        /// <remarks>Note, this method is called after PreloadData.</remarks>
        public string ResizeModel(string strModel, BucketCollection rgVocabulary)
        {
            if (rgVocabulary == null || rgVocabulary.Count == 0)
            {
                return(strModel);
            }

            int                   nVocabCount = rgVocabulary.Count;
            NetParameter          p           = NetParameter.FromProto(RawProto.Parse(strModel));
            EmbedParameter        embed       = null;
            InnerProductParameter ip          = null;

            foreach (LayerParameter layer in p.layer)
            {
                if (layer.type == LayerParameter.LayerType.EMBED)
                {
                    embed = layer.embed_param;
                }
                else if (layer.type == LayerParameter.LayerType.INNERPRODUCT)
                {
                    ip = layer.inner_product_param;
                }
            }

            if (embed != null)
            {
                embed.input_dim = (uint)nVocabCount;
            }

            ip.num_output = (uint)nVocabCount;

            m_rgVocabulary = rgVocabulary;

            RawProto proto = p.ToProto("root");

            return(proto.ToString());
        }
Ejemplo n.º 21
0
 /// <summary>
 /// This function allows other layers to gather needed information from the NetParameters if any, and is called when initialzing the Net.
 /// </summary>
 /// <param name="np">Specifies the NetParameter.</param>
 public virtual void SetNetParameterUsed(NetParameter np)
 {
 }
Ejemplo n.º 22
0
        /// <summary>
        /// The constructor.
        /// </summary>
        /// <param name="strBaseDirectory">Specifies the base directory that contains the data and models.</param>
        /// <param name="strDataset">Specifies the dataset that the model will run on.</param>
        /// <param name="nChannels">Specifies the number of channels in the data set (e.g. color = 3, b/w = 1).</param>
        /// <param name="bSiamese">Specifies whether or not to create a Siamese network."</param>
        /// <param name="rgIpLayers">Specifies a list of inner product layers added to the end of the network where each entry specifies the number of output and whether or not Noise is enabled for the layer.</param>
        /// <param name="bUsePool5">Specifies whether or not to use the Pool layer as the last layer.</param>
        /// <param name="bUseDilationConv5">Specifies whether or not to use dilation on block 5 layers.</param>
        /// <param name="model">Specifies the type of ResNet model to create.</param>
        /// <param name="nBatchSize">Optionally, specifies the batch size (default = 32).</param>
        /// <param name="nAccumBatchSize">Optionally, specifies the accumulation batch size (default = 32).</param>
        /// <param name="rgGpuId">Optionally, specifies a set of GPU ID's to use (when null, GPU=0 is used).</param>
        /// <param name="net">Specifies the 'base' net parameter that is to be altered.</param>
        public ResNetModelBuilder(string strBaseDirectory, string strDataset, int nChannels, bool bSiamese, List <Tuple <int, bool> > rgIpLayers, bool bUsePool5, bool bUseDilationConv5, MODEL model, int nBatchSize = 32, int nAccumBatchSize = 32, List <int> rgGpuId = null, NetParameter net = null)
            : base(strBaseDirectory, net)
        {
            if (rgGpuId == null)
            {
                m_rgGpuID.Add(0);
            }
            else
            {
                m_rgGpuID = new List <int>(rgGpuId);
            }

            m_nChannels       = nChannels;
            m_bSiamese        = bSiamese;
            m_rgIpLayers      = rgIpLayers;
            m_model           = model;
            m_strModel        = model.ToString();
            m_nBatchSize      = nBatchSize;
            m_nAccumBatchSize = nAccumBatchSize;
            m_nIterSize       = m_nAccumBatchSize / m_nBatchSize;

            m_nBatchSizePerDevice = (m_rgGpuID.Count == 1) ? m_nBatchSize : m_nBatchSize / m_rgGpuID.Count;
            m_nIterSize           = (int)Math.Ceiling((float)m_nAccumBatchSize / (m_nBatchSizePerDevice * m_rgGpuID.Count));
            m_nGpuID   = m_rgGpuID[0];
            m_dfBaseLr = 0.001;

            m_bUseDilationConv5 = bUseDilationConv5;
            m_bUsePool5         = bUsePool5;
            m_strDataset        = strDataset;

            //-------------------------------------------------------
            // Create the transformer for Training.
            //-------------------------------------------------------
            m_transformTrain             = new TransformationParameter();
            m_transformTrain.mirror      = true;
            m_transformTrain.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models.
            m_transformTrain.mean_value  = new List <double>();
            m_transformTrain.mean_value.Add(104);
            m_transformTrain.mean_value.Add(117);
            m_transformTrain.mean_value.Add(123);

            //-------------------------------------------------------
            // Create the transformer for Testing.
            //-------------------------------------------------------
            m_transformTest             = new TransformationParameter();
            m_transformTest.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models.
            m_transformTest.mean_value  = new List <double>();
            m_transformTest.mean_value.Add(104);
            m_transformTest.mean_value.Add(117);
            m_transformTest.mean_value.Add(123);
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Create the model used to train the Encoder/Decoder
        /// Seq2Seq model using two LSTM layers where the first
        /// acts as the Encoder and the second the Decoder.
        /// </summary>
        /// <param name="nInputData">Specifies the count of the input data.</param>
        /// <param name="nInputLabel">Specifies the count of the label data.</param>
        /// <param name="nBatchOverride">Specifies an override for the batch count.</param>
        /// <param name="nTimeStepOverride">Specifies an override for the time-step count.</param>
        /// <returns>The NetParameter of the model is returned.</returns>
        public NetParameter CreateModel(int nInputData, int nInputLabel, int?nBatchOverride = null, int?nTimeStepOverride = null)
        {
            NetParameter net = new NetParameter();

            int nHidden = m_nHidden;
            int nBatch  = (nBatchOverride.HasValue) ? nBatchOverride.Value : m_nBatch;
            int nSteps  = (nTimeStepOverride.HasValue) ? nTimeStepOverride.Value : m_nTimeSteps;

            m_nInputData  = nInputData;
            m_nInputLabel = nInputLabel;

            // 10,batch,1,1
            LayerParameter data = new LayerParameter(LayerParameter.LayerType.INPUT);

            data.input_param.shape.Add(new BlobShape(new List <int>()
            {
                nSteps, nBatch, nInputData
            }));
            data.top.Add("data");
            net.layer.Add(data);

            // 10,batch,1,1  (pred count)
            LayerParameter label = new LayerParameter(LayerParameter.LayerType.INPUT);

            label.input_param.shape.Add(new BlobShape(new List <int>()
            {
                nSteps, nBatch, nInputLabel
            }));
            label.top.Add("label");
            net.layer.Add(label);

            // 10,batch (0 for first batch, then all 1's)
            LayerParameter clip1 = new LayerParameter(LayerParameter.LayerType.INPUT);

            clip1.input_param.shape.Add(new BlobShape(new List <int>()
            {
                nSteps, nBatch
            }));
            clip1.top.Add("clip1");
            net.layer.Add(clip1);

            // Create the encoder layer that encodes the input 'ip1' image representatons,
            // learned from the input model.
            LayerParameter lstm1 = new LayerParameter(LayerParameter.LayerType.LSTM);

            if (lstm1.recurrent_param != null)
            {
                lstm1.recurrent_param.dropout_ratio = m_dfDropout;
                lstm1.recurrent_param.engine        = m_lstmEngine;
                lstm1.recurrent_param.num_layers    = (uint)m_nLayers;
                lstm1.recurrent_param.num_output    = (uint)nHidden;
                lstm1.recurrent_param.weight_filler = new FillerParameter("gaussian", 0, 0, 0.5);
                lstm1.recurrent_param.bias_filler   = new FillerParameter("constant", 0);
            }
            lstm1.name = "encoder";
            lstm1.bottom.Add("data");
            lstm1.bottom.Add("clip1");
            lstm1.top.Add("lstm1");
            net.layer.Add(lstm1);

            // Create the decoder layer used to decode the input encoding to the
            // data representing a section of the Sin curve.
            LayerParameter lstm2 = new LayerParameter(LayerParameter.LayerType.LSTM);

            lstm2.recurrent_param.dropout_ratio = m_dfDropout;
            lstm2.recurrent_param.engine        = m_lstmEngine;
            lstm2.recurrent_param.num_layers    = (uint)m_nLayers;
            lstm2.recurrent_param.num_output    = (uint)nHidden;
            lstm2.recurrent_param.weight_filler = new FillerParameter("gaussian", 0, 0, 0.5);
            lstm2.recurrent_param.bias_filler   = new FillerParameter("constant", 0);
            lstm2.name = "decoder";
            lstm2.bottom.Add("lstm1");
            lstm2.bottom.Add("clip1");
            lstm2.top.Add("lstm2");
            net.layer.Add(lstm2);

            // Combine the decoder output down to the input label count per step,
            // which are the number of items in the Sin curve section.
            LayerParameter ip1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            ip1.name = "ip1";
            ip1.inner_product_param.num_output    = (uint)nInputLabel;
            ip1.inner_product_param.axis          = 2;
            ip1.inner_product_param.bias_term     = true;
            ip1.inner_product_param.weight_filler = new FillerParameter("gaussian", 0, 0, 0.1);
            ip1.bottom.Add("lstm2");
            ip1.top.Add("ip1");
            net.layer.Add(ip1);

            // Calculate the loss.
            LayerParameter loss = new LayerParameter(LayerParameter.LayerType.EUCLIDEAN_LOSS);

            loss.bottom.Add("ip1");
            loss.bottom.Add("label");
            loss.top.Add("loss");
            net.layer.Add(loss);

            return(net);
        }
Ejemplo n.º 24
0
        /// <summary>
        /// The DoWork thread is the main tread used to train or run the model depending on the operation selected.
        /// </summary>
        /// <param name="sender">Specifies the sender</param>
        /// <param name="e">specifies the arguments.</param>
        private void m_bw_DoWork(object sender, DoWorkEventArgs e)
        {
            BackgroundWorker bw = sender as BackgroundWorker;

            m_input = e.Argument as InputData;
            SettingsCaffe s = new SettingsCaffe();

            s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL;

            try
            {
                m_model.Batch = m_input.Batch;
                m_mycaffe     = new MyCaffeControl <float>(s, m_log, m_evtCancel);

                // Train the model.
                if (m_input.Operation == InputData.OPERATION.TRAIN)
                {
                    m_model.Iterations = (int)((m_input.Epochs * 7000) / m_model.Batch);
                    m_log.WriteLine("Training for " + m_input.Epochs.ToString() + " epochs (" + m_model.Iterations.ToString("N0") + " iterations).", true);
                    m_log.WriteLine("INFO: " + m_model.Iterations.ToString("N0") + " iterations.", true);
                    m_log.WriteLine("Using hidden = " + m_input.HiddenSize.ToString() + ", and word size = " + m_input.WordSize.ToString() + ".", true);

                    // Load the Seq2Seq training model.
                    NetParameter    netParam    = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp);
                    string          strModel    = netParam.ToProto("root").ToString();
                    SolverParameter solverParam = m_model.CreateSolver(m_input.LearningRate);
                    string          strSolver   = solverParam.ToProto("root").ToString();
                    byte[]          rgWts       = loadWeights("sequence");

                    m_strModel  = strModel;
                    m_strSolver = strSolver;

                    m_mycaffe.OnTrainingIteration += m_mycaffe_OnTrainingIteration;
                    m_mycaffe.OnTestingIteration  += m_mycaffe_OnTestingIteration;
                    m_mycaffe.LoadLite(Phase.TRAIN, strSolver, strModel, rgWts, false, false);

                    if (!m_input.UseSoftmax)
                    {
                        MemoryLossLayer <float> lossLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>;
                        if (lossLayerTraining != null)
                        {
                            lossLayerTraining.OnGetLoss += LossLayer_OnGetLossTraining;
                        }
                        MemoryLossLayer <float> lossLayerTesting = m_mycaffe.GetInternalNet(Phase.TEST).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>;
                        if (lossLayerTesting != null)
                        {
                            lossLayerTesting.OnGetLoss += LossLayer_OnGetLossTesting;
                        }
                    }

                    m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);
                    m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);

                    TextDataLayer <float> dataLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.TEXT_DATA, "data") as TextDataLayer <float>;
                    if (dataLayerTraining != null)
                    {
                        dataLayerTraining.OnGetData += DataLayerTraining_OnGetDataTraining;
                    }

                    // Train the Seq2Seq model.
                    m_plotsSequenceLoss          = new PlotCollection("Sequence Loss");
                    m_plotsSequenceAccuracyTest  = new PlotCollection("Sequence Accuracy Test");
                    m_plotsSequenceAccuracyTrain = new PlotCollection("Sequence Accuracy Train");
                    m_mycaffe.Train(m_model.Iterations);
                    saveWeights("sequence", m_mycaffe);
                }

                // Run a trained model.
                else
                {
                    NetParameter netParam = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp, Phase.RUN);
                    string       strModel = netParam.ToProto("root").ToString();
                    byte[]       rgWts    = loadWeights("sequence");

                    strModel = m_model.PrependInput(strModel);

                    m_strModelRun = strModel;

                    int nN = m_model.TimeSteps;
                    m_mycaffe.LoadToRun(strModel, rgWts, new BlobShape(new List <int>()
                    {
                        nN, 1, 1, 1
                    }), null, null, false, false);

                    m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);
                    m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);

                    runModel(m_mycaffe, bw, m_input.InputText);
                }
            }
            catch (Exception excpt)
            {
                throw excpt;
            }
            finally
            {
                // Cleanup.
                if (m_mycaffe != null)
                {
                    m_mycaffe.Dispose();
                    m_mycaffe = null;
                }
            }
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Fills the NetParameter  with the RNN network architecture.
        /// </summary>
        /// <param name="net_param"></param>
        protected override void FillUnrolledNet(NetParameter net_param)
        {
            uint nNumOutput = m_param.recurrent_param.num_output;

            m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
            FillerParameter weight_filler = m_param.recurrent_param.weight_filler;
            FillerParameter bias_filler   = m_param.recurrent_param.bias_filler;

            // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
            // use to save redundant code.
            LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            hidden_param.inner_product_param.num_output    = nNumOutput;
            hidden_param.inner_product_param.bias_term     = false;
            hidden_param.inner_product_param.axis          = 2;
            hidden_param.inner_product_param.weight_filler = weight_filler.Clone();

            LayerParameter biased_hidden_param = hidden_param.Clone(false);

            biased_hidden_param.inner_product_param.bias_term   = true;
            biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();

            LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);

            sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM;

            LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH);

            LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);

            scale_param.scale_param.axis = 0;

            LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);

            slice_param.slice_param.axis = 0;

            List <BlobShape> rgInputShapes = new List <BlobShape>();

            RecurrentInputShapes(rgInputShapes);
            m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape.");


            //--- Add the layers ---

            LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer_param.top.Add("h_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[0]);
            net_param.layer.Add(input_layer_param);

            LayerParameter cont_slice_param = slice_param.Clone(false);

            cont_slice_param.name = "cont_slice";
            cont_slice_param.bottom.Add("cont");
            cont_slice_param.slice_param.axis = 0;
            net_param.layer.Add(cont_slice_param);

            // Add layer to transform all timesteps of x to the hidden state dimension.
            //  W_xh_x = W_xh * x + b_h
            {
                LayerParameter x_transform_param = biased_hidden_param.Clone(false);
                x_transform_param.name = "x_transform";
                x_transform_param.parameters.Add(new ParamSpec("W_xh"));
                x_transform_param.parameters.Add(new ParamSpec("b_h"));
                x_transform_param.bottom.Add("x");
                x_transform_param.top.Add("W_xh_x");
                x_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_transform_param);
            }

            if (m_bStaticInput)
            {
                // Add layer to transform x_static to the hidden state dimension.
                //  W_xh_x_static = W_xh_static * x_static
                LayerParameter x_static_transform_param = hidden_param.Clone(false);
                x_static_transform_param.inner_product_param.axis = 1;
                x_static_transform_param.name = "W_xh_x_static";
                x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static"));
                x_static_transform_param.bottom.Add("x_static");
                x_static_transform_param.top.Add("W_xh_x_static_preshape");
                x_static_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_static_transform_param);

                LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
                BlobShape      new_shape     = reshape_param.reshape_param.shape;
                new_shape.dim.Add(1);   // One timestep.
                new_shape.dim.Add(-1);  // Should infer m_nN as the dimension so we can reshape on batch size.
                new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
                reshape_param.name = "W_xh_x_static_reshape";
                reshape_param.bottom.Add("W_xh_x_static_preshape");
                reshape_param.top.Add("W_xh_x_static");
                net_param.layer.Add(reshape_param);
            }

            LayerParameter x_slice_param = slice_param.Clone(false);

            x_slice_param.name = "W_xh_x_slice";
            x_slice_param.bottom.Add("W_xh_x");
            net_param.layer.Add(x_slice_param);

            LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);

            output_concat_layer.name = "o_concat";
            output_concat_layer.top.Add("o");
            output_concat_layer.concat_param.axis = 0;

            for (int t = 1; t <= m_nT; t++)
            {
                string tm1s = (t - 1).ToString();
                string ts   = t.ToString();

                cont_slice_param.top.Add("cont_" + ts);
                x_slice_param.top.Add("W_xh_x_" + ts);


                // Add layer to flush the hidden state when beginning a new sequence,
                //  as indicated by cont_t.
                //      h_conted_{t-1} := cont_t * h_{t-1}
                //
                //  Normally, cont_t is binary (i.e., 0 or 1), so:
                //      h_conted_{t-1} := h_{t-1} if cont_t == 1
                //                        0 otherwise.
                {
                    LayerParameter cont_h_param = scale_param.Clone(false);
                    cont_h_param.name = "h_conted_" + tm1s;
                    cont_h_param.bottom.Add("h_" + tm1s);
                    cont_h_param.bottom.Add("cont_" + ts);
                    cont_h_param.top.Add("h_conted_" + tm1s);
                    net_param.layer.Add(cont_h_param);
                }

                // Add layer to compute
                //     W_hh_h_{t-1} := W_hh * h_conted_{t-1}
                {
                    LayerParameter w_param = hidden_param.Clone(false);
                    w_param.name = "W_hh_h_" + tm1s;
                    w_param.parameters.Add(new ParamSpec("W_hh"));
                    w_param.bottom.Add("h_conted_" + tm1s);
                    w_param.top.Add("W_hh_h_" + tm1s);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add layers to compute
                //      h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h )
                //           = \tanh( W_hh_h_{t-1} + W_xh_t )
                {
                    LayerParameter h_input_sum_param = sum_param.Clone(false);
                    h_input_sum_param.name = "h_input_sum_" + ts;
                    h_input_sum_param.bottom.Add("W_hh_h_" + tm1s);
                    h_input_sum_param.bottom.Add("W_xh_x_" + ts);

                    if (m_bStaticInput)
                    {
                        h_input_sum_param.bottom.Add("W_xh_x_static");
                    }

                    h_input_sum_param.top.Add("h_neuron_input_" + ts);
                    net_param.layer.Add(h_input_sum_param);
                }
                {
                    LayerParameter h_neuron_param = tanh_param.Clone(false);
                    h_neuron_param.name = "h_neuron_input_" + ts;
                    h_neuron_param.bottom.Add("h_neuron_input_" + ts);
                    h_neuron_param.top.Add("h_" + ts);
                    net_param.layer.Add(h_neuron_param);
                }

                // Add layer to compute
                //      W_ho_h_t := W_ho * h_t + b_o
                {
                    LayerParameter w_param = biased_hidden_param.Clone(false);
                    w_param.name = "W_ho_h_" + ts;
                    w_param.parameters.Add(new ParamSpec("W_ho"));
                    w_param.parameters.Add(new ParamSpec("b_o"));
                    w_param.bottom.Add("h_" + ts);
                    w_param.top.Add("W_ho_h_" + ts);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add layer to compute
                //      o_t := \tanh( W_ho * h_t + b_o
                //           = \tanh( W_ho_h_t )
                {
                    LayerParameter o_neuron_param = tanh_param.Clone(false);
                    o_neuron_param.name = "o_neuron_" + ts;
                    o_neuron_param.bottom.Add("W_ho_h_" + ts);
                    o_neuron_param.top.Add("o_" + ts);
                    net_param.layer.Add(o_neuron_param);
                }

                output_concat_layer.bottom.Add("o_" + ts);
            }

            net_param.layer.Add(output_concat_layer.Clone(false));
        }
Ejemplo n.º 26
0
 /// <summary>
 /// Fills net_param with the recurrent network architecture.  Subclasses
 /// should define this -- see RNNLayer and LSTMLayer for examples.
 /// </summary>
 /// <param name="net_param">Specifies the net_param to be filled.</param>
 protected abstract void FillUnrolledNet(NetParameter net_param);
Ejemplo n.º 27
0
        private void layerSetUpCaffe(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // Get (recurrent) input/output names.
            List <string> rgOutputNames = new List <string>();

            OutputBlobNames(rgOutputNames);

            List <string> rgRecurInputNames = new List <string>();

            RecurrentInputBlobNames(rgRecurInputNames);

            List <string> rgRecurOutputNames = new List <string>();

            RecurrentOutputBlobNames(rgRecurOutputNames);

            int nNumRecurBlobs = rgRecurInputNames.Count;

            m_log.CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, "The number of recurrent input names must equal the number of recurrent output names.");

            // If provided, bottom[2] is a static input to the recurrent net.
            int nNumHiddenExposed = (m_bExposeHidden) ? nNumRecurBlobs : 0;

            m_bStaticInput = (colBottom.Count > 2 + nNumHiddenExposed) ? true : false;

            if (m_bStaticInput)
            {
                m_log.CHECK_GE(colBottom[2].num_axes, 1, "When static input is present, the bottom[2].num_axes must be >= 1");
                m_log.CHECK_EQ(m_nN, colBottom[2].shape(0), "When static input is present, the bottom[2].shape(0) must = N which is " + m_nN.ToString());
            }

            // Create a NetParameter; setup the inputs that aren't unique to particular
            // recurrent architectures.
            NetParameter net_param = new NetParameter();

            LayerParameter input_layer = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer.top.Add("x");
            BlobShape input_shape1 = new param.BlobShape();

            for (int i = 0; i < colBottom[0].num_axes; i++)
            {
                input_shape1.dim.Add(colBottom[0].shape(i));
            }
            input_layer.input_param.shape.Add(input_shape1);

            input_layer.top.Add("cont");
            BlobShape input_shape2 = new param.BlobShape();

            for (int i = 0; i < colBottom[1].num_axes; i++)
            {
                input_shape2.dim.Add(colBottom[1].shape(i));
            }
            input_layer.input_param.shape.Add(input_shape2);

            if (m_bStaticInput)
            {
                input_layer.top.Add("x_static");
                BlobShape input_shape3 = new BlobShape();
                for (int i = 0; i < colBottom[2].num_axes; i++)
                {
                    input_shape3.dim.Add(colBottom[2].shape(i));
                }
                input_layer.input_param.shape.Add(input_shape3);
            }

            net_param.layer.Add(input_layer);

            // Call the child's FillUnrolledNet implementation to specify the unrolled
            // recurrent architecture.
            FillUnrolledNet(net_param);

            // Prepend this layer's name to the names of each layer in the unrolled net.
            string strLayerName = m_param.name;

            if (strLayerName.Length > 0)
            {
                for (int i = 0; i < net_param.layer.Count; i++)
                {
                    LayerParameter layer = net_param.layer[i];
                    layer.name = strLayerName + "_" + layer.name;
                }
            }

            // Add 'pseudo-losses' to all outputs to force backpropagation.
            // (Setting force_backward is too agressive as we may not need to backprop to
            // all inputs, e.g., the sequence continuation indicators.)
            List <string> rgPseudoLosses = new List <string>();

            for (int i = 0; i < rgOutputNames.Count; i++)
            {
                rgPseudoLosses.Add(rgOutputNames[i] + "_pseudoloss");
                LayerParameter layer = new LayerParameter(LayerParameter.LayerType.REDUCTION, rgPseudoLosses[i]);
                layer.bottom.Add(rgOutputNames[i]);
                layer.top.Add(rgPseudoLosses[i]);
                layer.loss_weight.Add(1.0);
                net_param.layer.Add(layer);
            }

            // Create the unrolled net.
            Net <T> sharedNet = null;

            if (m_param is LayerParameterEx <T> )
            {
                RecurrentLayer <T> sharedLayer = ((LayerParameterEx <T>)m_param).SharedLayer as RecurrentLayer <T>;
                if (sharedLayer != null)
                {
                    sharedNet = sharedLayer.m_unrolledNet;
                }
            }

            m_unrolledNet = new Net <T>(m_cuda, m_log, net_param, m_evtCancel, null, m_phase, null, sharedNet);
            m_unrolledNet.set_debug_info(m_param.recurrent_param.debug_info);

            // Setup pointers to the inputs.
            m_blobXInputBlob    = m_unrolledNet.blob_by_name("x");
            m_blobContInputBlob = m_unrolledNet.blob_by_name("cont");

            if (m_bStaticInput)
            {
                m_blobXStaticInputBlob = m_unrolledNet.blob_by_name("x_static");
            }

            // Setup pointers to paired recurrent inputs/outputs.
            m_colRecurInputBlobs  = new common.BlobCollection <T>();
            m_colRecurOutputBlobs = new common.BlobCollection <T>();

            for (int i = 0; i < nNumRecurBlobs; i++)
            {
                m_colRecurInputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurInputNames[i]));
                m_colRecurOutputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurOutputNames[i]));
            }

            // Setup pointers to outputs.
            m_log.CHECK_EQ(colTop.Count() - nNumHiddenExposed, rgOutputNames.Count, "OutputBlobNames must provide output blob name for each top.");
            m_colOutputBlobs = new common.BlobCollection <T>();
            for (int i = 0; i < rgOutputNames.Count; i++)
            {
                m_colOutputBlobs.Add(m_unrolledNet.blob_by_name(rgOutputNames[i]));
            }

            // We should have 2 inputs (x and cont), plus a number of recurrent inputs,
            // plus maybe a static input.
            int nStaticInput = (m_bStaticInput) ? 1 : 0;

            m_log.CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.input_blobs.Count, "The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + ") + static inputs (" + nStaticInput.ToString() + ")");

            // This layer's parameters are any parameters in the layers of the unrolled
            // net.  We only want one copy of each parameter, so check that the parameter
            // is 'owned' by the layer, rather than shared with another.
            blobs.Clear();
            for (int i = 0; i < m_unrolledNet.parameters.Count; i++)
            {
                if (m_unrolledNet.param_owners[i] == -1)
                {
                    m_log.WriteLine("Adding parameter " + i.ToString() + ": " + m_unrolledNet.param_display_names[i]);
                    blobs.Add(m_unrolledNet.parameters[i]);
                }
            }

            // Check that param_propagate_down is set for all of the parameters in the
            // unrolled net; set param_propagate_down to true in this layer.
            for (int i = 0; i < m_unrolledNet.layers.Count; i++)
            {
                for (int j = 0; j < m_unrolledNet.layers[i].blobs.Count; j++)
                {
                    m_log.CHECK(m_unrolledNet.layers[i].param_propagate_down(j), "param_propagate_down not set for layer " + i.ToString() + ", param " + j.ToString());
                }
            }
            m_rgbParamPropagateDown = new DictionaryMap <bool>(blobs.Count, true);

            // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across
            // batches.
            for (int i = 0; i < m_colRecurOutputBlobs.Count; i++)
            {
                m_colRecurOutputBlobs[i].SetDiff(0);
            }

            // Check that the last output_names.count layers are the pseudo-losses;
            // set last_layer_index so that we don't actually run these layers.
            List <string> rgLayerNames = m_unrolledNet.layer_names;

            m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count;
            for (int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++)
            {
                m_log.CHECK(rgLayerNames[i] == rgPseudoLosses[j], "The last layer at idx " + i.ToString() + " should be the pseudo layer named " + rgPseudoLosses[j]);
            }
        }
Ejemplo n.º 28
0
        static void Main(string[] args)
        {
            if (!sqlCheck())
            {
                return;
            }

            Log log = new Log("test");

            log.OnWriteLine += Log_OnWriteLine;
            CancelEvent   cancel   = new CancelEvent();
            SettingsCaffe settings = new SettingsCaffe();

            // Load all images into memory before training.
            settings.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL;
            // Use GPU ID = 0
            settings.GpuIds = "0";

            // Load the descriptors from their respective files
            string strSolver = load_file("C:\\ProgramData\\MyCaffe\\test_data\\models\\siamese\\mnist\\solver.prototxt");
            string strModel  = load_file("C:\\ProgramData\\MyCaffe\\test_data\\models\\siamese\\mnist\\train_val.prototxt");

            RawProto       proto     = RawProto.Parse(strModel);
            NetParameter   net_param = NetParameter.FromProto(proto);
            LayerParameter layer     = net_param.FindLayer(LayerParameter.LayerType.DECODE);

            layer.decode_param.target = DecodeParameter.TARGET.CENTROID;
            proto    = net_param.ToProto("root");
            strModel = proto.ToString();

            // Load the MNIST data descriptor.
            DatasetFactory    factory = new DatasetFactory();
            DatasetDescriptor ds      = factory.LoadDataset("MNIST");

            // Create a test project with the dataset and descriptors
            ProjectEx project = new ProjectEx("Test");

            project.SetDataset(ds);
            project.ModelDescription  = strModel;
            project.SolverDescription = strSolver;

            // Crate the MyCaffeControl (with the 'float' base type)
            string strCudaPath             = "C:\\Program Files\\SignalPop\\MyCaffe\\cuda_11.3\\CudaDnnDll.11.3.dll";
            MyCaffeControl <float> mycaffe = new MyCaffeControl <float>(settings, log, cancel, null, null, null, null, strCudaPath);

            // Load the project, using the TRAIN phase.
            mycaffe.Load(Phase.TRAIN, project);

            // Train the model for 4000 iterations
            // (which uses the internal solver and internal training net)
            int nIterations = 4000;

            mycaffe.Train(nIterations);

            // Test the model for 100 iterations
            // (which uses the internal testing net)
            nIterations = 100;
            double dfAccuracy = mycaffe.Test(nIterations);

            // Report the testing accuracy.
            log.WriteLine("Accuracy = " + dfAccuracy.ToString("P"));

            mycaffe.Dispose();

            Console.Write("Press any key...");
            Console.ReadKey();
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Fills the NetParameter  with the LSTM network architecture.
        /// </summary>
        /// <param name="net_param"></param>
        protected override void FillUnrolledNet(NetParameter net_param)
        {
            uint nNumOutput = m_param.recurrent_param.num_output;

            m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
            FillerParameter weight_filler = m_param.recurrent_param.weight_filler;
            FillerParameter bias_filler   = m_param.recurrent_param.bias_filler;

            // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
            // use to save redundant code.
            LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            hidden_param.inner_product_param.num_output    = nNumOutput * 4;
            hidden_param.inner_product_param.bias_term     = false;
            hidden_param.inner_product_param.axis          = 2;
            hidden_param.inner_product_param.weight_filler = weight_filler.Clone();

            LayerParameter biased_hidden_param = hidden_param.Clone(false);

            biased_hidden_param.inner_product_param.bias_term   = true;
            biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();

            LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);

            sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM;

            LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);

            scale_param.scale_param.axis = 0;

            LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);

            slice_param.slice_param.axis = 0;

            LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT);

            List <BlobShape> rgInputShapes = new List <BlobShape>();

            RecurrentInputShapes(rgInputShapes);
            m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes.");


            //--- Add the layers ---

            LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer_param.top.Add("c_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone());
            input_layer_param.top.Add("h_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone());
            net_param.layer.Add(input_layer_param);

            LayerParameter cont_slice_param = slice_param.Clone(false);

            cont_slice_param.name = "cont_slice";
            cont_slice_param.bottom.Add("cont");
            cont_slice_param.slice_param.axis = 0;
            net_param.layer.Add(cont_slice_param);

            // Add layer to transform all timesteps of x to the hidden state dimension.
            //  W_xc_x = W_xc * x + b_c
            {
                LayerParameter x_transform_param = biased_hidden_param.Clone(false);
                x_transform_param.name = "x_transform";
                x_transform_param.parameters.Add(new ParamSpec("W_xc"));
                x_transform_param.parameters.Add(new ParamSpec("b_c"));
                x_transform_param.bottom.Add("x");
                x_transform_param.top.Add("W_xc_x");
                x_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_transform_param);
            }

            if (m_bStaticInput)
            {
                // Add layer to transform x_static to the hidden state dimension.
                //  W_xc_x_static = W_xc_static * x_static
                LayerParameter x_static_transform_param = hidden_param.Clone(false);
                x_static_transform_param.inner_product_param.axis = 1;
                x_static_transform_param.name = "W_xc_x_static";
                x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static"));
                x_static_transform_param.bottom.Add("x_static");
                x_static_transform_param.top.Add("W_xc_x_static_preshape");
                x_static_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_static_transform_param);

                LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
                BlobShape      new_shape     = reshape_param.reshape_param.shape;
                new_shape.dim.Add(1);   // One timestep.
                new_shape.dim.Add(-1);  // Should infer m_nN as the dimension so we can reshape on batch size.
                new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
                reshape_param.name = "W_xc_x_static_reshape";
                reshape_param.bottom.Add("W_xc_x_static_preshape");
                reshape_param.top.Add("W_xc_x_static");
                net_param.layer.Add(reshape_param);
            }

            LayerParameter x_slice_param = slice_param.Clone(false);

            x_slice_param.name = "W_xc_x_slice";
            x_slice_param.bottom.Add("W_xc_x");
            net_param.layer.Add(x_slice_param);

            LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);

            output_concat_layer.name = "h_concat";
            output_concat_layer.top.Add("h");
            output_concat_layer.concat_param.axis = 0;

            for (int t = 1; t <= m_nT; t++)
            {
                string tm1s = (t - 1).ToString();
                string ts   = t.ToString();

                cont_slice_param.top.Add("cont_" + ts);
                x_slice_param.top.Add("W_xc_x_" + ts);


                // Add layer to flush the hidden state when beginning a new sequence,
                //  as indicated by cont_t.
                //      h_conted_{t-1} := cont_t * h_{t-1}
                //
                //  Normally, cont_t is binary (i.e., 0 or 1), so:
                //      h_conted_{t-1} := h_{t-1} if cont_t == 1
                //                        0 otherwise.
                {
                    LayerParameter cont_h_param = scale_param.Clone(false);
                    cont_h_param.group_start = true;
                    cont_h_param.name        = "h_conted_" + tm1s;
                    cont_h_param.bottom.Add("h_" + tm1s);
                    cont_h_param.bottom.Add("cont_" + ts);
                    cont_h_param.top.Add("h_conted_" + tm1s);
                    net_param.layer.Add(cont_h_param);
                }

                // Add layer to compute
                //     W_hc_h_{t-1} := W_hc * h_conted_{t-1}
                {
                    LayerParameter w_param = hidden_param.Clone(false);
                    w_param.name = "transform_" + ts;
                    w_param.parameters.Add(new ParamSpec("W_hc"));
                    w_param.bottom.Add("h_conted_" + tm1s);
                    w_param.top.Add("W_hc_h_" + tm1s);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add the outputs of the linear transformations to compute the gate input.
                //  get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c
                //               = W_hc_h_{t-1} + W_xc_x_t + b_c
                {
                    LayerParameter input_sum_layer = sum_param.Clone(false);
                    input_sum_layer.name = "gate_input_" + ts;
                    input_sum_layer.bottom.Add("W_hc_h_" + tm1s);
                    input_sum_layer.bottom.Add("W_xc_x_" + ts);
                    if (m_bStaticInput)
                    {
                        input_sum_layer.bottom.Add("W_xc_x_static");
                    }
                    input_sum_layer.top.Add("gate_input_" + ts);
                    net_param.layer.Add(input_sum_layer);
                }

                // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t.
                //  Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t
                //  Outputs: c_t, h_t
                //      [ i_t' ]
                //      [ f_t' ] := gate_input_t
                //      [ o_t' ]
                //      [ g_t' ]
                //          i_t := \sigmoid[i_t']
                //          f_t := \sigmoid[f_t']
                //          o_t := \sigmoid[o_t']
                //          g_t := \tanh[g_t']
                //          c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
                //          h_t := o_t .* \tanh[c_t]
                {
                    LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT);
                    lstm_unit_param.bottom.Add("c_" + tm1s);
                    lstm_unit_param.bottom.Add("gate_input_" + ts);
                    lstm_unit_param.bottom.Add("cont_" + ts);
                    lstm_unit_param.top.Add("c_" + ts);
                    lstm_unit_param.top.Add("h_" + ts);
                    lstm_unit_param.name = "unit_" + ts;
                    net_param.layer.Add(lstm_unit_param);
                }

                output_concat_layer.bottom.Add("h_" + ts);
            }

            {
                LayerParameter c_T_copy_param = split_param.Clone(false);
                c_T_copy_param.bottom.Add("c_" + m_nT.ToString());
                c_T_copy_param.top.Add("c_T");
                net_param.layer.Add(c_T_copy_param);
            }

            net_param.layer.Add(output_concat_layer.Clone(false));
        }
Ejemplo n.º 30
0
        /// <summary>
        /// Create the model used to train the Encoder/Decoder using the TextData Layer as input.
        /// Seq2Seq model using two LSTM layers where the first
        /// acts as the Encoder and the second the Decoder.
        /// </summary>
        /// <param name="strInputFile">Specifies the input data.</param>
        /// <param name="strTargetFile">Specifies the target data.</param>
        /// <param name="nHiddenCount">Specifies hidden data count.</param>
        /// <param name="nWordSize">Specifies the size of the word embeddings.</param>
        /// <param name="phase">Specifies phase of the model to create.</param>
        /// <returns>The NetParameter of the model is returned.</returns>
        public NetParameter CreateModel(string strInputFile, string strTargetFile, int nHiddenCount, int nWordSize, bool bUseSoftmax, bool bUseExternalIp, Phase phase = Phase.TRAIN)
        {
            m_nHidden = nHiddenCount;
            NetParameter net = new NetParameter();

            // Add data input layer that takes care of loading inputs and feeding the data
            // to the network.
            LayerParameter data = new LayerParameter(LayerParameter.LayerType.TEXT_DATA);

            data.name = "data";
            data.text_data_param.time_steps = (uint)m_nTimeSteps;
            data.text_data_param.batch_size = (uint)m_nBatch;
            data.text_data_param.enable_normal_encoder_output  = true;
            data.text_data_param.enable_reverse_encoder_output = true;
            data.text_data_param.encoder_source = strInputFile;
            data.text_data_param.decoder_source = strTargetFile;
            data.text_data_param.sample_size    = (uint)m_nSampleSize;
            data.text_data_param.shuffle        = true;
            if (phase == Phase.RUN)
            {
                // Loaded with TextDataLayer.PreProcessInput
                data.bottom.Add("idec");    // decoder input
                data.bottom.Add("ienc");    // encoder input
                data.bottom.Add("iencr");   // encoder inputr
                data.bottom.Add("iencc");   // encoder clip
            }
            data.top.Add("dec_input");
            data.top.Add("clipD");
            data.top.Add("data");
            data.top.Add("datar");
            data.top.Add("clipE");
            data.top.Add("vocabcount");
            data.top.Add("label");
            net.layer.Add(data);

            // Create the embedding layer that converts sentence word indexes into an embedding of
            // size nWordSize for each word in the sentence.
            LayerParameter embed1 = new LayerParameter(LayerParameter.LayerType.EMBED);

            embed1.embed_param.input_dim     = 1;               // (uint)nVocabCount + 2; (set via bottom[6])
            embed1.embed_param.num_output    = (uint)nWordSize; // Word size.
            embed1.embed_param.bias_term     = true;
            embed1.embed_param.weight_filler = m_fillerParam;
            embed1.parameters.Add(new ParamSpec("embed_wts"));
            embed1.parameters.Add(new ParamSpec("embed_bias"));
            embed1.bottom.Add("data");
            embed1.bottom.Add("vocabcount");
            embed1.top.Add("embed1");
            net.layer.Add(embed1);

            // Create the encoder layer that encodes the input 'ip1' image representatons,
            // learned from the input model.
            LayerParameter lstm1 = new LayerParameter(LayerParameter.LayerType.LSTM);

            lstm1.recurrent_param.bias_filler   = new FillerParameter("constant", 0);
            lstm1.recurrent_param.weight_filler = m_fillerParam;
            lstm1.recurrent_param.engine        = EngineParameter.Engine.CUDNN;
            lstm1.recurrent_param.num_output    = (uint)m_nHidden;
            lstm1.recurrent_param.num_layers    = 2;
            lstm1.recurrent_param.dropout_ratio = 0.1;
            lstm1.name = "encoder1";
            lstm1.bottom.Add("embed1");
            lstm1.bottom.Add("clipE");
            lstm1.top.Add("lstm1");
            net.layer.Add(lstm1);

            // Create the embedding layer that converts sentence word indexes into an embedding of
            // size nWordSize for each word in the sentence.
            LayerParameter embed2 = new LayerParameter(LayerParameter.LayerType.EMBED);

            embed2.embed_param.input_dim     = 1;               // (uint)nVocabCount + 2; (set via bottom[6])
            embed2.embed_param.num_output    = (uint)nWordSize; // Word size.
            embed2.embed_param.bias_term     = true;
            embed2.embed_param.weight_filler = m_fillerParam;
            embed2.parameters.Add(new ParamSpec("embed_wts"));
            embed2.parameters.Add(new ParamSpec("embed_bias"));
            embed2.bottom.Add("datar");
            embed2.bottom.Add("vocabcount");
            embed2.top.Add("embed2");
            net.layer.Add(embed2);

            // Create the encoder layer that encodes the input 'ip1' image representatons,
            // learned from the input model.
            LayerParameter lstm2 = new LayerParameter(LayerParameter.LayerType.LSTM);

            lstm2.recurrent_param.bias_filler   = new FillerParameter("constant", 0);
            lstm2.recurrent_param.weight_filler = m_fillerParam;
            lstm2.recurrent_param.engine        = EngineParameter.Engine.CUDNN;
            lstm2.recurrent_param.num_output    = (uint)m_nHidden;
            lstm2.recurrent_param.num_layers    = 2;
            lstm2.recurrent_param.dropout_ratio = 0.1;
            lstm2.name = "encoder2";
            lstm2.bottom.Add("embed2");
            lstm2.bottom.Add("clipE");
            lstm2.top.Add("lstm2");
            net.layer.Add(lstm2);

            LayerParameter concat = new LayerParameter(LayerParameter.LayerType.CONCAT);

            concat.concat_param.axis = 2;
            concat.bottom.Add("lstm1");
            concat.bottom.Add("lstm2");
            concat.top.Add("encoded");
            net.layer.Add(concat);

            // Create embedding for decoder input.
            LayerParameter embed3 = new LayerParameter(LayerParameter.LayerType.EMBED);

            embed3.name = "dec_input_embed";
            embed3.embed_param.input_dim     = 1;               // (uint)nVocabCount + 2; (set via bottom[6])
            embed3.embed_param.num_output    = (uint)nWordSize; // Word size.
            embed3.embed_param.bias_term     = true;
            embed3.embed_param.weight_filler = m_fillerParam;
            embed3.bottom.Add("dec_input");
            embed3.bottom.Add("vocabcount");
            embed3.top.Add("dec_input_embed");
            net.layer.Add(embed3);

            LayerParameter lstm3 = new LayerParameter(LayerParameter.LayerType.LSTM_ATTENTION);

            lstm3.lstm_attention_param.bias_filler      = new FillerParameter("constant", 0);
            lstm3.lstm_attention_param.weight_filler    = m_fillerParam;
            lstm3.lstm_attention_param.num_output       = (uint)m_nHidden;
            lstm3.lstm_attention_param.num_output_ip    = (uint)((bUseExternalIp) ? 0 : 1); // (uint)nVocabCount + 2; (set via bottom[6])
            lstm3.lstm_attention_param.enable_attention = true;
            lstm3.name = "decoder";
            lstm3.bottom.Add("dec_input_embed");
            lstm3.bottom.Add("clipD");
            lstm3.bottom.Add("encoded");
            lstm3.bottom.Add("clipE");
            if (!bUseExternalIp)
            {
                lstm3.bottom.Add("vocabcount");
                lstm3.top.Add("ip1");
            }
            else
            {
                lstm3.top.Add("lstm3");
            }
            net.layer.Add(lstm3);

            if (bUseExternalIp)
            {
                LayerParameter ip1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT);
                ip1.inner_product_param.axis          = 2;
                ip1.inner_product_param.bias_filler   = new FillerParameter("constant", 0);
                ip1.inner_product_param.weight_filler = m_fillerParam;
                ip1.inner_product_param.bias_term     = true;
                ip1.bottom.Add("lstm3");
                ip1.bottom.Add("vocabcount");
                ip1.top.Add("ip1");
                net.layer.Add(ip1);
            }

            if (phase != Phase.RUN)
            {
                if (bUseSoftmax)
                {
                    LayerParameter loss = new LayerParameter(LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS);
                    loss.name = "loss";
                    loss.softmax_param.axis = 2;
                    loss.bottom.Add("ip1");
                    loss.bottom.Add("label");
                    loss.top.Add("loss");
                    net.layer.Add(loss);
                }
                else
                {
                    LayerParameter loss = new LayerParameter(LayerParameter.LayerType.MEMORY_LOSS);
                    loss.name = "loss";
                    loss.loss_param.normalization = LossParameter.NormalizationMode.NONE;
                    loss.bottom.Add("ip1");
                    loss.bottom.Add("label");
                    loss.top.Add("loss");
                    net.layer.Add(loss);
                }

                LayerParameter accuracy = new LayerParameter(LayerParameter.LayerType.ACCURACY);
                accuracy.accuracy_param.axis         = 2;
                accuracy.accuracy_param.ignore_label = 0;
                accuracy.bottom.Add("ip1");
                accuracy.bottom.Add("label");
                accuracy.top.Add("accuracy");
                accuracy.include.Add(new NetStateRule(Phase.TEST));
                net.layer.Add(accuracy);
            }
            else
            {
                LayerParameter output = new LayerParameter(LayerParameter.LayerType.SOFTMAX);
                output.softmax_param.axis = 2;
                output.bottom.Add("ip1");
                output.top.Add("softmax");
                net.layer.Add(output);
            }

            LayerParameter silence = new LayerParameter(LayerParameter.LayerType.SILENCE);

            silence.bottom.Add("label");
            silence.include.Add(new NetStateRule(Phase.RUN));
            net.layer.Add(silence);

            return(net);
        }