/// <summary> /// The constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the output log.</param> /// <param name="evtCancel">Specifies the cancel event used to abort processing.</param> /// <param name="strModelType">Specifies the model type: 'vgg19', 'vgg16'</param> /// <param name="strModel">Specifies the network model to use.</param> /// <param name="rgWeights">Optionally, specifies the weights to use (or <i>null</i> to ignore).</param> /// <param name="bCaffeModel">Specifies whether or not the weights are in the caffe (<i>true</i>) or mycaffe (<i>false</i>) format.</param> /// <param name="solverType">Optionally, specifies the solver type to use (default = LBFGS).</param> /// <param name="dfLearningRate">Optionally, specifies the solver learning rate (default = 1.0).</param> public NeuralStyleTransfer(CudaDnn <T> cuda, Log log, CancelEvent evtCancel, string strModelType, string strModel, byte[] rgWeights, bool bCaffeModel, SolverParameter.SolverType solverType = SolverParameter.SolverType.LBFGS, double dfLearningRate = 1.0) { m_cuda = cuda; m_log = log; m_evtCancel = evtCancel; m_rgWeights = rgWeights; m_solverType = solverType; m_dfLearningRate = dfLearningRate; if (m_evtCancel != null) { m_evtCancel.Reset(); } RawProto proto = RawProto.Parse(strModel); m_param = NetParameter.FromProto(proto); add_input_layer(m_param); m_rgstrUsedLayers = load_layers(strModelType); prune(m_param, m_rgstrUsedLayers); add_gram_layers(m_param); m_transformationParam = new TransformationParameter(); m_transformationParam.color_order = (bCaffeModel) ? TransformationParameter.COLOR_ORDER.BGR : TransformationParameter.COLOR_ORDER.RGB; m_transformationParam.scale = 1.0; m_transformationParam.mean_value = m_rgMeanValues; m_persist = new PersistCaffe <T>(m_log, false); }
/// <summary> /// The DataTransformer constructor. /// </summary> /// <param name="log">Specifies the Log used for output.</param> /// <param name="p">Specifies the TransformationParameter used to create the DataTransformer.</param> /// <param name="phase">Specifies the Phase under which the DataTransformer is run.</param> /// <param name="nC">Specifies the channels.</param> /// <param name="nH">Specifies the height.</param> /// <param name="nW">Specifies the width.</param> /// <param name="imgMean">Optionally, specifies the image mean to use.</param> public DataTransformer(Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean = null) { m_log = log; if (p.mean_file != null) { m_protoMean = loadProtoMean(p.mean_file); } int nDataSize = nC * nH * nW; if (imgMean != null) { nDataSize = imgMean.Channels * imgMean.Height * imgMean.Width; } m_rgTransformedData = new T[nDataSize]; m_param = p; m_phase = phase; InitRand(); if (p.use_imagedb_mean) { if (m_protoMean == null) { m_imgMean = imgMean; if (m_imgMean != null) { m_rgMeanData = m_imgMean.GetData <double>(); } } else { if (m_protoMean.data.Count > 0) { m_rgMeanData = new double[m_protoMean.data.Count]; Array.Copy(m_protoMean.data.ToArray(), m_rgMeanData, m_rgMeanData.Length); } else { m_rgMeanData = m_protoMean.double_data.ToArray(); } } } if (p.mean_value.Count > 0) { m_log.CHECK(p.use_imagedb_mean == false, "Cannot specify use_image_mean and mean_value at the same time."); for (int c = 0; c < p.mean_value.Count; c++) { m_rgMeanValues.Add(p.mean_value[c]); } } }
/// <summary> /// Resync the transformer with changes in its parameter. /// </summary> public void Update(int nDataSize = 0, SimpleDatum imgMean = null) { TransformationParameter p = m_param; if (imgMean != null) { nDataSize = imgMean.Channels * imgMean.Height * imgMean.Width; } if (nDataSize > 0 || (m_rgfTransformedData != null && nDataSize != m_rgfTransformedData.Length)) { m_rgTransformedData = new T[nDataSize]; } if (p.mean_file != null) { m_protoMean = loadProtoMean(p.mean_file); } if (p.use_imagedb_mean) { if (m_protoMean == null) { m_imgMean = imgMean; if (m_imgMean != null) { m_rgMeanData = m_imgMean.GetData <double>(); } } else { if (m_protoMean.data.Count > 0) { m_rgMeanData = new double[m_protoMean.data.Count]; Array.Copy(m_protoMean.data.ToArray(), m_rgMeanData, m_rgMeanData.Length); } else { m_rgMeanData = m_protoMean.double_data.ToArray(); } } } if (p.mean_value.Count > 0) { m_log.CHECK(p.use_imagedb_mean == false, "Cannot specify use_image_mean and mean_value at the same time."); for (int c = 0; c < p.mean_value.Count; c++) { m_rgMeanValues.Add(p.mean_value[c]); } } }
/// <summary> /// The constructor. /// </summary> /// <param name="strBaseDirectory">Specifies the base directory that contains the data and models.</param> /// <param name="strDataset">Specifies the dataset that the model will run on.</param> /// <param name="nChannels">Specifies the number of channels in the data set (e.g. color = 3, b/w = 1).</param> /// <param name="bSiamese">Specifies whether or not to create a Siamese network."</param> /// <param name="rgIpLayers">Specifies a list of inner product layers added to the end of the network where each entry specifies the number of output and whether or not Noise is enabled for the layer.</param> /// <param name="bUsePool5">Specifies whether or not to use the Pool layer as the last layer.</param> /// <param name="bUseDilationConv5">Specifies whether or not to use dilation on block 5 layers.</param> /// <param name="model">Specifies the type of ResNet model to create.</param> /// <param name="nBatchSize">Optionally, specifies the batch size (default = 32).</param> /// <param name="nAccumBatchSize">Optionally, specifies the accumulation batch size (default = 32).</param> /// <param name="rgGpuId">Optionally, specifies a set of GPU ID's to use (when null, GPU=0 is used).</param> /// <param name="net">Specifies the 'base' net parameter that is to be altered.</param> public ResNetModelBuilder(string strBaseDirectory, string strDataset, int nChannels, bool bSiamese, List <Tuple <int, bool> > rgIpLayers, bool bUsePool5, bool bUseDilationConv5, MODEL model, int nBatchSize = 32, int nAccumBatchSize = 32, List <int> rgGpuId = null, NetParameter net = null) : base(strBaseDirectory, net) { if (rgGpuId == null) { m_rgGpuID.Add(0); } else { m_rgGpuID = new List <int>(rgGpuId); } m_nChannels = nChannels; m_bSiamese = bSiamese; m_rgIpLayers = rgIpLayers; m_model = model; m_strModel = model.ToString(); m_nBatchSize = nBatchSize; m_nAccumBatchSize = nAccumBatchSize; m_nIterSize = m_nAccumBatchSize / m_nBatchSize; m_nBatchSizePerDevice = (m_rgGpuID.Count == 1) ? m_nBatchSize : m_nBatchSize / m_rgGpuID.Count; m_nIterSize = (int)Math.Ceiling((float)m_nAccumBatchSize / (m_nBatchSizePerDevice * m_rgGpuID.Count)); m_nGpuID = m_rgGpuID[0]; m_dfBaseLr = 0.001; m_bUseDilationConv5 = bUseDilationConv5; m_bUsePool5 = bUsePool5; m_strDataset = strDataset; //------------------------------------------------------- // Create the transformer for Training. //------------------------------------------------------- m_transformTrain = new TransformationParameter(); m_transformTrain.mirror = true; m_transformTrain.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTrain.mean_value = new List <double>(); m_transformTrain.mean_value.Add(104); m_transformTrain.mean_value.Add(117); m_transformTrain.mean_value.Add(123); //------------------------------------------------------- // Create the transformer for Testing. //------------------------------------------------------- m_transformTest = new TransformationParameter(); m_transformTest.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTest.mean_value = new List <double>(); m_transformTest.mean_value.Add(104); m_transformTest.mean_value.Add(117); m_transformTest.mean_value.Add(123); }
/// <summary> /// The constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the output log.</param> /// <param name="evtCancel">Specifies the cancel event used to abort processing.</param> /// <param name="rgLayers">Specifies the layers along with their style and content weights.</param> /// <param name="strModelDesc">Specifies the network model descriptor to use.</param> /// <param name="rgWeights">Optionally, specifies the weights to use (or <i>null</i> to ignore).</param> /// <param name="bCaffeModel">Specifies whether or not the weights are in the caffe (<i>true</i>) or mycaffe (<i>false</i>) format.</param> /// <param name="solverType">Optionally, specifies the solver type to use (default = LBFGS).</param> /// <param name="dfLearningRate">Optionally, specifies the solver learning rate (default = 1.0).</param> /// <param name="nMaxImageSize">Optionally, specifies the default maximum image size (default = 840).</param> /// <param name="nLBFGSCorrections">Optionally, specifies the LBFGS Corrections (only used when using the LBFGS solver, default = 100).</param> /// <param name="netShare">Optionally, specifies a net to share.</param> public NeuralStyleTransfer(CudaDnn <T> cuda, Log log, CancelEvent evtCancel, Dictionary <string, Tuple <double, double> > rgLayers, string strModelDesc, byte[] rgWeights, bool bCaffeModel, SolverParameter.SolverType solverType = SolverParameter.SolverType.LBFGS, double dfLearningRate = 1.0, int nMaxImageSize = 840, int nLBFGSCorrections = 100, Net <T> netShare = null) { m_log = log; m_evtCancel = evtCancel; m_rgWeights = rgWeights; m_solverType = solverType; m_dfLearningRate = dfLearningRate; m_nDefaultMaxImageSize = nMaxImageSize; m_nLBFGSCorrections = nLBFGSCorrections; setupNetShare(netShare, cuda); if (m_evtCancel != null) { m_evtCancel.Reset(); } RawProto proto = RawProto.Parse(strModelDesc); m_param = NetParameter.FromProto(proto); Dictionary <string, double> rgStyle = new Dictionary <string, double>(); Dictionary <string, double> rgContent = new Dictionary <string, double>(); foreach (KeyValuePair <string, Tuple <double, double> > kv in rgLayers) { if (kv.Value.Item1 != 0) { rgStyle.Add(kv.Key, kv.Value.Item1); } if (kv.Value.Item2 != 0) { rgContent.Add(kv.Key, kv.Value.Item2); } } add_input_layer(m_param); m_rgstrUsedLayers = load_layers(rgStyle, rgContent); prune(m_param, m_rgstrUsedLayers); add_gram_layers(m_param); m_transformationParam = new TransformationParameter(); m_transformationParam.color_order = (bCaffeModel) ? TransformationParameter.COLOR_ORDER.BGR : TransformationParameter.COLOR_ORDER.RGB; m_transformationParam.scale = 1.0; m_transformationParam.mean_value = m_rgMeanValues; m_persist = new PersistCaffe <T>(m_log, false); }
/// <summary> /// The DataTransformer constructor. /// </summary> /// <param name="cuda">Specifies the connection to the CudaDnn dll which is only needed when using the bbox or image transformation functionality.</param> /// <param name="log">Specifies the Log used for output.</param> /// <param name="p">Specifies the TransformationParameter used to create the DataTransformer.</param> /// <param name="phase">Specifies the Phase under which the DataTransformer is run.</param> /// <param name="nC">Specifies the channels.</param> /// <param name="nH">Specifies the height.</param> /// <param name="nW">Specifies the width.</param> /// <param name="imgMean">Optionally, specifies the image mean to use.</param> public DataTransformer(CudaDnn <T> cuda, Log log, TransformationParameter p, Phase phase, int nC, int nH, int nW, SimpleDatum imgMean = null) { m_log = log; int nDataSize = nC * nH * nW; m_param = p; InitRand(); m_phase = phase; m_bbox = new BBoxUtility <T>(cuda, log); m_imgTransforms = new ImageTransforms <T>(cuda, log, m_random); Update(nDataSize, imgMean); }
/// <summary> /// The constructor. /// </summary> /// <param name="strBaseDirectory">Specifies the base directory that contains the data and models.</param> /// <param name="strDataset">Specifies the dataset that the model will run on.</param> /// <param name="rgIpLayers">Specifies a list of inner product layers added to the end of the network where each entry specifies the number of output and whether or not Noise is enabled for the layer.</param> /// <param name="model">Specifies the type of ResNet model to create.</param> /// <param name="nBatchSize">Optionally, specifies the batch size (default = 32).</param> /// <param name="rgGpuId">Optionally, specifies a set of GPU ID's to use (when null, GPU=0 is used).</param> /// <param name="net">Specifies the 'base' net parameter that is to be altered.</param> public ResNetOctConvModelBuilder(string strBaseDirectory, string strDataset, List <Tuple <int, bool> > rgIpLayers, MODEL model, int nBatchSize = 32, List <int> rgGpuId = null, NetParameter net = null) : base(strBaseDirectory, net) { if (rgGpuId == null) { m_rgGpuID.Add(0); } else { m_rgGpuID = new List <int>(rgGpuId); } m_rgIpLayers = rgIpLayers; m_model = model; m_strModel = model.ToString(); m_nBatchSize = nBatchSize; m_nIterSize = 1; m_nGpuID = m_rgGpuID[0]; m_dfBaseLr = 0.001; m_strDataset = strDataset; //------------------------------------------------------- // Create the transformer for Training. //------------------------------------------------------- m_transformTrain = new TransformationParameter(); m_transformTrain.mirror = true; m_transformTrain.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTrain.mean_value = new List <double>(); m_transformTrain.mean_value.Add(104); m_transformTrain.mean_value.Add(117); m_transformTrain.mean_value.Add(123); //------------------------------------------------------- // Create the transformer for Testing. //------------------------------------------------------- m_transformTest = new TransformationParameter(); m_transformTest.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTest.mean_value = new List <double>(); m_transformTest.mean_value.Add(104); m_transformTest.mean_value.Add(117); m_transformTest.mean_value.Add(123); }
public void Apply(TransformationParameter param) => _query.Where($"transformation_id = ANY({_parameters.Add(param.Values)})");
/// <summary> /// The constructor. /// </summary> /// <param name="mycaffe">Specifies the instance of MyCaffe assoiated with the open project - when using more than one Brain, this is the master project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase under which to run.</param> public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_solver = mycaffe.GetInternalSolver(); m_netOutput = mycaffe.GetInternalNet(phase); m_netTarget = new Net <T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase); m_properties = properties; m_random = random; Blob <T> data = m_netOutput.blob_by_name("data"); if (data == null) { m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!"); } m_nFramesPerX = data.channels; m_nBatchSize = data.num; Blob <T> logits = m_netOutput.blob_by_name("logits"); if (logits == null) { m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!"); } m_nActionCount = logits.channels; m_transformer = m_mycaffe.DataTransformer; if (m_transformer == null) { TransformationParameter trans_param = new TransformationParameter(); int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageChannels; int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageHeight; int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageWidth; m_transformer = new DataTransformer <T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW); } for (int i = 0; i < m_nFramesPerX; i++) { m_transformer.param.mean_value.Add(255 / 2); // center each frame } m_transformer.param.scale = 1.0 / 255; // normalize m_transformer.Update(); m_blobActions = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobNextQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobExpectedQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobDone = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobLoss = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobWeights = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer <T>; if (m_memLoss == null) { m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!"); } double?dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr"); if (dfRate.HasValue) { m_dfLearningRate = dfRate.Value; } m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch); m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false); if (m_nMiniBatch > 1) { m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone(); m_colAccumulatedGradients.SetDiff(0); } }
/// <summary> /// The constructor. /// </summary> /// <param name="strBaseDirectory">Specifies the base directory that contains the data and models.</param> /// <param name="nBatchSize">Optionally, specifies the batch size (default = 32).</param> /// <param name="nAccumBatchSize">Optionally, specifies the accumulation batch size (default = 32).</param> /// <param name="rgGpuId">Optionally, specifies a set of GPU ID's to use (when null, GPU=0 is used).</param> /// <param name="bUseBatchNorm">Optionally, specifies to use batch normalization (default = false).</param> /// <param name="normMode">Optionally, specifies the normalization mode (default = VALID).</param> /// <param name="net">Specifies the 'base' net parameter that is to be altered.</param> public SsdPascalModelBuilder(string strBaseDirectory, int nBatchSize = 32, int nAccumBatchSize = 32, List <int> rgGpuId = null, bool bUseBatchNorm = false, LossParameter.NormalizationMode normMode = LossParameter.NormalizationMode.VALID, NetParameter net = null) : base(strBaseDirectory, net) { if (rgGpuId == null) { m_rgGpuID.Add(0); } else { m_rgGpuID = new List <int>(rgGpuId); } m_strJob = "SSD_" + m_nResizeWidth.ToString() + "x" + m_nResizeHeight.ToString(); // The model name is used when initially creating the NetParameter. m_strModel = "VGG_VOC0712_" + m_strJob; m_bUseBatchNorm = bUseBatchNorm; m_normalizationMode = normMode; m_nBatchSize = nBatchSize; m_nAccumBatchSize = nAccumBatchSize; m_nIterSize = m_nAccumBatchSize / m_nBatchSize; m_nBatchSizePerDevice = (m_rgGpuID.Count == 1) ? m_nBatchSize : m_nBatchSize / m_rgGpuID.Count; m_nIterSize = (int)Math.Ceiling((float)m_nAccumBatchSize / (m_nBatchSizePerDevice * m_rgGpuID.Count)); m_nGpuID = m_rgGpuID[0]; // Set the base learning rate. m_dfLocWeight = (m_dfNegPosRatio + 1.0) / 4.0; m_dfBaseLr = (m_bUseBatchNorm) ? 0.0004 : 0.00004; switch (m_normalizationMode) { case LossParameter.NormalizationMode.NONE: m_dfBaseLr /= m_nBatchSizePerDevice; break; case LossParameter.NormalizationMode.VALID: m_dfBaseLr *= 25.0 / m_dfLocWeight; break; case LossParameter.NormalizationMode.FULL: // Roughly there are 2000 prior bboxes per images (TODO: calculate and use exact number). m_dfBaseLr *= 2000; break; } // Ideally the test_batch_size should be divisible by the num_test_image, // otherwise mAP will be slightly off the true value. m_nTestIter = (int)Math.Ceiling((float)m_nNumTestImage / (float)m_nTestBatchSize); //------------------------------------------------------- // Create the transformer for Training. //------------------------------------------------------- m_transformTrain = new TransformationParameter(); m_transformTrain.mirror = true; m_transformTrain.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTrain.mean_value.Add(104); m_transformTrain.mean_value.Add(117); m_transformTrain.mean_value.Add(123); m_transformTrain.resize_param = new ResizeParameter(true); m_transformTrain.resize_param.prob = 1; m_transformTrain.resize_param.resize_mode = ResizeParameter.ResizeMode.WARP; m_transformTrain.resize_param.height = (uint)m_nResizeHeight; m_transformTrain.resize_param.width = (uint)m_nResizeWidth; m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.LINEAR); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.AREA); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.NEAREST); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.CUBIC); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.LANCZOS4); m_transformTrain.distortion_param = new DistortionParameter(true); m_transformTrain.distortion_param.brightness_prob = 0.5f; m_transformTrain.distortion_param.brightness_delta = 32; m_transformTrain.distortion_param.contrast_prob = 0.5f; m_transformTrain.distortion_param.contrast_lower = 0.5f; m_transformTrain.distortion_param.contrast_upper = 1.5f; m_transformTrain.distortion_param.saturation_prob = 0.5f; m_transformTrain.distortion_param.saturation_lower = 0.5f; m_transformTrain.distortion_param.saturation_upper = 1.5f; m_transformTrain.distortion_param.random_order_prob = 0.0f; m_transformTrain.expansion_param = new ExpansionParameter(true); m_transformTrain.expansion_param.prob = 0.5f; m_transformTrain.expansion_param.max_expand_ratio = 4.0f; m_transformTrain.emit_constraint = new EmitConstraint(true); m_transformTrain.emit_constraint.emit_type = EmitConstraint.EmitType.CENTER; //------------------------------------------------------- // Create the transformer for Testing. //------------------------------------------------------- m_transformTest = new TransformationParameter(); m_transformTest.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTest.mean_value.Add(104); m_transformTest.mean_value.Add(117); m_transformTest.mean_value.Add(123); m_transformTest.resize_param = new ResizeParameter(true); m_transformTest.resize_param.prob = 1; m_transformTest.resize_param.resize_mode = ResizeParameter.ResizeMode.WARP; m_transformTest.resize_param.height = (uint)m_nResizeHeight; m_transformTest.resize_param.width = (uint)m_nResizeWidth; m_transformTest.resize_param.interp_mode.Add(ResizeParameter.InterpMode.LINEAR); //------------------------------------------------------- // Create the batch samplers. //------------------------------------------------------- BatchSampler sampler = createSampler(1, 1); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.1f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.3f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.5f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.7f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.9f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, null, 1.0f); m_rgBatchSampler.Add(sampler); //------------------------------------------------------- // Create the Multi-box parameters. //------------------------------------------------------- m_multiBoxLossLayer = new LayerParameter(LayerParameter.LayerType.MULTIBOX_LOSS); m_multiBoxLossLayer.multiboxloss_param.loc_loss_type = MultiBoxLossParameter.LocLossType.SMOOTH_L1; m_multiBoxLossLayer.multiboxloss_param.conf_loss_type = MultiBoxLossParameter.ConfLossType.SOFTMAX; m_multiBoxLossLayer.multiboxloss_param.neg_pos_ratio = (float)m_dfNegPosRatio; m_multiBoxLossLayer.multiboxloss_param.num_classes = (uint)m_nNumClasses; m_multiBoxLossLayer.multiboxloss_param.loc_weight = (float)m_dfLocWeight; m_multiBoxLossLayer.multiboxloss_param.share_location = m_bShareLocation; m_multiBoxLossLayer.multiboxloss_param.match_type = MultiBoxLossParameter.MatchType.PER_PREDICTION; m_multiBoxLossLayer.multiboxloss_param.overlap_threshold = 0.5f; m_multiBoxLossLayer.multiboxloss_param.use_prior_for_matching = true; m_multiBoxLossLayer.multiboxloss_param.background_label_id = (uint)m_nBackgroundLabelId; m_multiBoxLossLayer.multiboxloss_param.use_difficult_gt = true; m_multiBoxLossLayer.multiboxloss_param.mining_type = MultiBoxLossParameter.MiningType.MAX_NEGATIVE; m_multiBoxLossLayer.multiboxloss_param.neg_overlap = 0.5f; m_multiBoxLossLayer.multiboxloss_param.code_type = PriorBoxParameter.CodeType.CENTER_SIZE; m_multiBoxLossLayer.multiboxloss_param.ignore_cross_boundary_bbox = false; m_multiBoxLossLayer.loss_param.normalization = m_normalizationMode; if (m_multiBoxLossLayer.multiboxloss_param.code_type == PriorBoxParameter.CodeType.CENTER_SIZE) { m_rgPriorVariance = new List <float>() { 0.1f, 0.1f, 0.2f, 0.2f } } ; else { m_rgPriorVariance = new List <float>() { 0.1f } }; //------------------------------------------------------- // Create the Detection Output parameters. //------------------------------------------------------- m_detectionOut = new DetectionOutputParameter(); m_detectionOut.num_classes = (uint)m_nNumClasses; m_detectionOut.share_location = m_bShareLocation; m_detectionOut.background_label_id = m_nBackgroundLabelId; m_detectionOut.nms_param = new NonMaximumSuppressionParameter(true); m_detectionOut.nms_param.nms_threshold = 0.45f; m_detectionOut.nms_param.top_k = 400; m_detectionOut.save_output_param = new SaveOutputParameter(true); m_detectionOut.save_output_param.output_directory = m_strBaseDir + "\\results"; m_detectionOut.save_output_param.output_name_prefix = "comp4_det_test_"; m_detectionOut.save_output_param.label_map_file = getFileName(m_strLabelMapFile, null); m_detectionOut.save_output_param.name_size_file = getFileName(m_strNameSizeFile, null); m_detectionOut.save_output_param.num_test_image = (uint)m_nNumTestImage; m_detectionOut.keep_top_k = 200; m_detectionOut.confidence_threshold = 0.01f; m_detectionOut.code_type = m_multiBoxLossLayer.multiboxloss_param.code_type; //------------------------------------------------------- // Create the Detection Evaluation parameters. //------------------------------------------------------- m_detectionEval = new DetectionEvaluateParameter(); m_detectionEval.num_classes = (uint)m_nNumClasses; m_detectionEval.background_label_id = (uint)m_nBackgroundLabelId; m_detectionEval.overlap_threshold = 0.5f; m_detectionEval.evaulte_difficult_gt = false; m_detectionEval.name_size_file = getFileName(m_strNameSizeFile, null); //------------------------------------------------------- // Setup the MultiBox head layer info. //------------------------------------------------------- // conv4_3 ==> 38 x 38 // fc7 ==> 19 x 19 // conv6_2 ==> 10 x 10 // conv7_2 ==> 5 x 5 // conv8_2 ==> 3 x 3 // conv9_2 ==> 1 x 1 List <string> rgstrMboxSourceLayers = new List <string>() { "conv4_3", "fc7", "conv6_2", "conv7_2", "conv8_2", "conv9_2" }; List <double> rgAspectWid = new List <double>() { 2, 2, 2, 2, 2, 2 }; List <double> rgAspectHt = new List <double>() { 2, 3, 3, 3, 2, 2 }; // L2 normalize conv4_3 List <double> rgNormalization = new List <double>() { 20, -1, -1, -1, -1, -1 }; List <double> rgStepsW = new List <double>() { 8, 16, 32, 64, 100, 300 }; List <double> rgStepsH = new List <double>() { 8, 16, 32, 64, 100, 300 }; int nMinDim = 300; // in percent % double dfMinRatio = 20; double dfMaxRatio = 90; double dfRatioStep = (int)Math.Floor((dfMaxRatio - dfMinRatio) / (rgstrMboxSourceLayers.Count - 2)); List <double> rgMinSizes = new List <double>(); List <double> rgMaxSizes = new List <double>(); for (double dfRatio = dfMinRatio; dfRatio < dfMaxRatio + 1; dfRatio += dfRatioStep) { rgMinSizes.Add(nMinDim * dfRatio / 100.0); rgMaxSizes.Add(nMinDim * (dfRatio + dfRatioStep) / 100.0); } rgMinSizes.Insert(0, nMinDim * 10 / 100.0); rgMaxSizes.Insert(0, nMinDim * 20 / 100.0); m_rgMultiBoxInfo = new List <MultiBoxHeadInfo>(); for (int i = 0; i < rgstrMboxSourceLayers.Count; i++) { string strSrc = rgstrMboxSourceLayers[i]; double dfMinSize = rgMinSizes[i]; double dfMaxSize = rgMaxSizes[i]; double dfStepW = rgStepsW[i]; double dfStepH = rgStepsH[i]; double dfAspectW = rgAspectWid[i]; double dfAspectH = rgAspectHt[i]; double dfNorm = rgNormalization[i]; m_rgMultiBoxInfo.Add(new MultiBoxHeadInfo(strSrc, dfMinSize, dfMaxSize, dfStepW, dfStepH, dfAspectW, dfAspectH, dfNorm, null)); } }