/// <summary> /// Read the normalization mode parameter and compute the normalizer based on the blob size. /// If the <i>normalization_mode</i> is VALID, the count of valid outputs will be read from /// <i>valid_count</i>, unless it is -1 in which case all outputs are assumed to be valid. /// </summary> /// <param name="normalization_mode">Specifies the normalization mode to use.</param> /// <param name="nValidCount">Specifies the valid count.</param> /// <returns>The normalization value is returned.</returns> protected virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount) { double dfNormalizer = 0; switch (normalization_mode) { case LossParameter.NormalizationMode.FULL: dfNormalizer = m_nOuterNum * m_nInnerNum; break; case LossParameter.NormalizationMode.VALID: dfNormalizer = (nValidCount == -1) ? (m_nOuterNum * m_nInnerNum) : nValidCount; break; case LossParameter.NormalizationMode.BATCH_SIZE: dfNormalizer = m_nOuterNum; break; case LossParameter.NormalizationMode.NONE: dfNormalizer = 1; break; default: m_log.FAIL("Unknown normalization mode: " + normalization_mode.ToString()); break; } // Some users will have no labels for some examples in order to 'turn off' a // particular loss in a multi-task setup. The max prevents NaNs in that case. return(Math.Max(1.0, dfNormalizer)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.LayerSetUp(colBottom, colTop); LayerParameter p = m_param.Clone(false); p.type = LayerParameter.LayerType.SOFTMAX; m_softmaxLayer = new SoftmaxLayer <T>(m_cuda, m_log, p); m_colSoftmaxBottom = new BlobCollection <T>(); m_colSoftmaxTop = new BlobCollection <T>(); m_colSoftmaxBottom.Add(colBottom[0]); m_colSoftmaxTop.Add(m_blobProb); m_softmaxLayer.Setup(m_colSoftmaxBottom, m_colSoftmaxTop); m_nIgnoreLabel = m_param.loss_param.ignore_label; if (m_param.loss_param.normalization == LossParameter.NormalizationMode.NONE) { m_normalization = (m_param.loss_param.normalize) ? LossParameter.NormalizationMode.VALID : LossParameter.NormalizationMode.BATCH_SIZE; } else { m_normalization = m_param.loss_param.normalization; } }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.LayerSetUp(colBottom, colTop); // Internal softmax layer. LayerParameter softmax_param = new LayerParameter(LayerParameter.LayerType.SOFTMAX); softmax_param.softmax_param.axis = m_param.infogain_loss_param.axis; softmax_param.loss_weight.Clear(); softmax_param.loss_weight.Add(1); m_softmaxLayer = new SoftmaxLayer <T>(m_cuda, m_log, softmax_param); m_colSoftmaxBottomVec.Clear(); m_colSoftmaxBottomVec.Add(colBottom[0]); m_colSoftmaxTopVec.Clear(); m_colSoftmaxTopVec.Add(m_blobProb); m_softmaxLayer.Setup(m_colSoftmaxBottomVec, m_colSoftmaxTopVec); // ignore label. m_nIgnoreLabel = m_param.loss_param.ignore_label; // normalization m_log.CHECK(!m_param.loss_param.normalize, "normalize is drepreciated, use 'normalization'."); m_normalization = m_param.loss_param.normalization; // matrix H if (colBottom.Count < 3) { m_log.CHECK(m_param.infogain_loss_param.source != null, "Infogain matrix source must be specified."); PersistCaffe <T> persist = new PersistCaffe <T>(m_log, true); BlobProto blobProto = persist.LoadBlobProto(m_param.infogain_loss_param.source, 1); m_blobInfoGain.FromProto(blobProto); } }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { // LossLayers have non-zero (1) loss by default. if (m_param.loss_weight.Count == 0) { m_param.loss_weight.Add(1.0); } m_log.CHECK(!m_param.loss_param.normalize, "normalize is drepreciated, use 'normalization'."); if (!m_param.loss_param.normalization.HasValue) { m_normalization = (m_param.loss_param.normalize) ? LossParameter.NormalizationMode.VALID : LossParameter.NormalizationMode.BATCH_SIZE; } else { m_normalization = m_param.loss_param.normalization.Value; } }
/// <summary> /// Returns the normalizer used to normalize the loss. /// </summary> /// <param name="normalization_mode">Specifies the normalization mode to use.</param> /// <param name="nValidCount">Specifies the number of valid.</param> /// <returns>The normalization value is returned.</returns> protected virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount) { double dfNormalizer = 0.0; switch (normalization_mode) { case LossParameter.NormalizationMode.FULL: m_log.CHECK_GT(m_nInnerNum, 0, "The inner number must be set."); m_log.CHECK_GT(m_nOuterNum, 0, "The outer number must be set."); dfNormalizer = m_nOuterNum * m_nInnerNum; break; case LossParameter.NormalizationMode.VALID: if (nValidCount == -1) { m_log.CHECK_GT(m_nInnerNum, 0, "The inner number must be set."); m_log.CHECK_GT(m_nOuterNum, 0, "The outer number must be set."); dfNormalizer = m_nOuterNum * m_nInnerNum; } else { dfNormalizer = nValidCount; } break; case LossParameter.NormalizationMode.BATCH_SIZE: m_log.CHECK_GT(m_nOuterNum, 0, "The outer number must be set."); dfNormalizer = m_nOuterNum; break; case LossParameter.NormalizationMode.NONE: dfNormalizer = 1.0; break; default: m_log.FAIL("Unknown normalization mode " + normalization_mode.ToString()); break; } // Some users will have no labels for some examples in order to 'turn off' a // particular loss in a multi-taks setup. The max prevents Nans in that case. return(Math.Max(dfNormalizer, 1.0)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.LayerSetUp(colBottom, colTop); m_colSigmoidBottomVec = new BlobCollection <T>(); m_colSigmoidBottomVec.Add(colBottom[0]); m_colSigmoidTopVec = new BlobCollection <T>(); m_colSigmoidTopVec.Add(m_blobSigmoidOutput); m_sigmoidLayer.Setup(m_colSigmoidBottomVec, m_colSigmoidTopVec); m_nIgnoreLabel = m_param.loss_param.ignore_label; if (m_param.loss_param.normalization != LossParameter.NormalizationMode.NONE) { m_normalization = m_param.loss_param.normalization; } else { m_normalization = (m_param.loss_param.normalize) ? LossParameter.NormalizationMode.VALID : LossParameter.NormalizationMode.BATCH_SIZE; } }
/// <summary> /// Returns the normalizer used to normalize the loss. /// </summary> /// <param name="normalization_mode">Specifies the normalization mode to use.</param> /// <param name="nValidCount">Specifies the number of valid.</param> /// <returns>The normalization value is returned.</returns> protected virtual T get_normalizer(LossParameter.NormalizationMode normalization_mode, T nValidCount) { T fNormalizer = convert(0.0); switch (normalization_mode) { case LossParameter.NormalizationMode.FULL: fNormalizer = convert(m_nOuterNum * m_nInnerNum); break; case LossParameter.NormalizationMode.VALID: if (convertD(nValidCount) == -1) { fNormalizer = convert(m_nOuterNum * m_nInnerNum); } else { fNormalizer = nValidCount; } break; case LossParameter.NormalizationMode.BATCH_SIZE: fNormalizer = convert(m_nOuterNum); break; case LossParameter.NormalizationMode.NONE: fNormalizer = convert(1.0); break; default: m_log.FAIL("Unknown normalization mode " + normalization_mode.ToString()); break; } // Some users will have no labels for some examples in order to 'turn off' a // particular loss in a multi-taks setup. The max prevents Nans in that case. return(convert(Math.Max(convertD(fNormalizer), 1.0))); }
/// <summary> /// Returns the normalizer used to normalize the loss. /// </summary> /// <param name="normalization_mode">Specifies the normalization mode to use.</param> /// <param name="nValidCount">Specifies the number of valid.</param> /// <returns>The normalization value is returned.</returns> protected virtual double get_normalizer(LossParameter.NormalizationMode normalization_mode, int nValidCount) { return(GetNormalizer(normalization_mode, m_nOuterNum, m_nInnerNum, nValidCount)); }
/// <summary> /// The constructor. /// </summary> /// <param name="strBaseDirectory">Specifies the base directory that contains the data and models.</param> /// <param name="nBatchSize">Optionally, specifies the batch size (default = 32).</param> /// <param name="nAccumBatchSize">Optionally, specifies the accumulation batch size (default = 32).</param> /// <param name="rgGpuId">Optionally, specifies a set of GPU ID's to use (when null, GPU=0 is used).</param> /// <param name="bUseBatchNorm">Optionally, specifies to use batch normalization (default = false).</param> /// <param name="normMode">Optionally, specifies the normalization mode (default = VALID).</param> /// <param name="net">Specifies the 'base' net parameter that is to be altered.</param> public SsdPascalModelBuilder(string strBaseDirectory, int nBatchSize = 32, int nAccumBatchSize = 32, List <int> rgGpuId = null, bool bUseBatchNorm = false, LossParameter.NormalizationMode normMode = LossParameter.NormalizationMode.VALID, NetParameter net = null) : base(strBaseDirectory, net) { if (rgGpuId == null) { m_rgGpuID.Add(0); } else { m_rgGpuID = new List <int>(rgGpuId); } m_strJob = "SSD_" + m_nResizeWidth.ToString() + "x" + m_nResizeHeight.ToString(); // The model name is used when initially creating the NetParameter. m_strModel = "VGG_VOC0712_" + m_strJob; m_bUseBatchNorm = bUseBatchNorm; m_normalizationMode = normMode; m_nBatchSize = nBatchSize; m_nAccumBatchSize = nAccumBatchSize; m_nIterSize = m_nAccumBatchSize / m_nBatchSize; m_nBatchSizePerDevice = (m_rgGpuID.Count == 1) ? m_nBatchSize : m_nBatchSize / m_rgGpuID.Count; m_nIterSize = (int)Math.Ceiling((float)m_nAccumBatchSize / (m_nBatchSizePerDevice * m_rgGpuID.Count)); m_nGpuID = m_rgGpuID[0]; // Set the base learning rate. m_dfLocWeight = (m_dfNegPosRatio + 1.0) / 4.0; m_dfBaseLr = (m_bUseBatchNorm) ? 0.0004 : 0.00004; switch (m_normalizationMode) { case LossParameter.NormalizationMode.NONE: m_dfBaseLr /= m_nBatchSizePerDevice; break; case LossParameter.NormalizationMode.VALID: m_dfBaseLr *= 25.0 / m_dfLocWeight; break; case LossParameter.NormalizationMode.FULL: // Roughly there are 2000 prior bboxes per images (TODO: calculate and use exact number). m_dfBaseLr *= 2000; break; } // Ideally the test_batch_size should be divisible by the num_test_image, // otherwise mAP will be slightly off the true value. m_nTestIter = (int)Math.Ceiling((float)m_nNumTestImage / (float)m_nTestBatchSize); //------------------------------------------------------- // Create the transformer for Training. //------------------------------------------------------- m_transformTrain = new TransformationParameter(); m_transformTrain.mirror = true; m_transformTrain.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTrain.mean_value.Add(104); m_transformTrain.mean_value.Add(117); m_transformTrain.mean_value.Add(123); m_transformTrain.resize_param = new ResizeParameter(true); m_transformTrain.resize_param.prob = 1; m_transformTrain.resize_param.resize_mode = ResizeParameter.ResizeMode.WARP; m_transformTrain.resize_param.height = (uint)m_nResizeHeight; m_transformTrain.resize_param.width = (uint)m_nResizeWidth; m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.LINEAR); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.AREA); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.NEAREST); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.CUBIC); m_transformTrain.resize_param.interp_mode.Add(ResizeParameter.InterpMode.LANCZOS4); m_transformTrain.distortion_param = new DistortionParameter(true); m_transformTrain.distortion_param.brightness_prob = 0.5f; m_transformTrain.distortion_param.brightness_delta = 32; m_transformTrain.distortion_param.contrast_prob = 0.5f; m_transformTrain.distortion_param.contrast_lower = 0.5f; m_transformTrain.distortion_param.contrast_upper = 1.5f; m_transformTrain.distortion_param.saturation_prob = 0.5f; m_transformTrain.distortion_param.saturation_lower = 0.5f; m_transformTrain.distortion_param.saturation_upper = 1.5f; m_transformTrain.distortion_param.random_order_prob = 0.0f; m_transformTrain.expansion_param = new ExpansionParameter(true); m_transformTrain.expansion_param.prob = 0.5f; m_transformTrain.expansion_param.max_expand_ratio = 4.0f; m_transformTrain.emit_constraint = new EmitConstraint(true); m_transformTrain.emit_constraint.emit_type = EmitConstraint.EmitType.CENTER; //------------------------------------------------------- // Create the transformer for Testing. //------------------------------------------------------- m_transformTest = new TransformationParameter(); m_transformTest.color_order = TransformationParameter.COLOR_ORDER.BGR; // to support caffe models. m_transformTest.mean_value.Add(104); m_transformTest.mean_value.Add(117); m_transformTest.mean_value.Add(123); m_transformTest.resize_param = new ResizeParameter(true); m_transformTest.resize_param.prob = 1; m_transformTest.resize_param.resize_mode = ResizeParameter.ResizeMode.WARP; m_transformTest.resize_param.height = (uint)m_nResizeHeight; m_transformTest.resize_param.width = (uint)m_nResizeWidth; m_transformTest.resize_param.interp_mode.Add(ResizeParameter.InterpMode.LINEAR); //------------------------------------------------------- // Create the batch samplers. //------------------------------------------------------- BatchSampler sampler = createSampler(1, 1); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.1f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.3f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.5f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.7f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.9f); m_rgBatchSampler.Add(sampler); sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, null, 1.0f); m_rgBatchSampler.Add(sampler); //------------------------------------------------------- // Create the Multi-box parameters. //------------------------------------------------------- m_multiBoxLossLayer = new LayerParameter(LayerParameter.LayerType.MULTIBOX_LOSS); m_multiBoxLossLayer.multiboxloss_param.loc_loss_type = MultiBoxLossParameter.LocLossType.SMOOTH_L1; m_multiBoxLossLayer.multiboxloss_param.conf_loss_type = MultiBoxLossParameter.ConfLossType.SOFTMAX; m_multiBoxLossLayer.multiboxloss_param.neg_pos_ratio = (float)m_dfNegPosRatio; m_multiBoxLossLayer.multiboxloss_param.num_classes = (uint)m_nNumClasses; m_multiBoxLossLayer.multiboxloss_param.loc_weight = (float)m_dfLocWeight; m_multiBoxLossLayer.multiboxloss_param.share_location = m_bShareLocation; m_multiBoxLossLayer.multiboxloss_param.match_type = MultiBoxLossParameter.MatchType.PER_PREDICTION; m_multiBoxLossLayer.multiboxloss_param.overlap_threshold = 0.5f; m_multiBoxLossLayer.multiboxloss_param.use_prior_for_matching = true; m_multiBoxLossLayer.multiboxloss_param.background_label_id = (uint)m_nBackgroundLabelId; m_multiBoxLossLayer.multiboxloss_param.use_difficult_gt = true; m_multiBoxLossLayer.multiboxloss_param.mining_type = MultiBoxLossParameter.MiningType.MAX_NEGATIVE; m_multiBoxLossLayer.multiboxloss_param.neg_overlap = 0.5f; m_multiBoxLossLayer.multiboxloss_param.code_type = PriorBoxParameter.CodeType.CENTER_SIZE; m_multiBoxLossLayer.multiboxloss_param.ignore_cross_boundary_bbox = false; m_multiBoxLossLayer.loss_param.normalization = m_normalizationMode; if (m_multiBoxLossLayer.multiboxloss_param.code_type == PriorBoxParameter.CodeType.CENTER_SIZE) { m_rgPriorVariance = new List <float>() { 0.1f, 0.1f, 0.2f, 0.2f } } ; else { m_rgPriorVariance = new List <float>() { 0.1f } }; //------------------------------------------------------- // Create the Detection Output parameters. //------------------------------------------------------- m_detectionOut = new DetectionOutputParameter(); m_detectionOut.num_classes = (uint)m_nNumClasses; m_detectionOut.share_location = m_bShareLocation; m_detectionOut.background_label_id = m_nBackgroundLabelId; m_detectionOut.nms_param = new NonMaximumSuppressionParameter(true); m_detectionOut.nms_param.nms_threshold = 0.45f; m_detectionOut.nms_param.top_k = 400; m_detectionOut.save_output_param = new SaveOutputParameter(true); m_detectionOut.save_output_param.output_directory = m_strBaseDir + "\\results"; m_detectionOut.save_output_param.output_name_prefix = "comp4_det_test_"; m_detectionOut.save_output_param.label_map_file = getFileName(m_strLabelMapFile, null); m_detectionOut.save_output_param.name_size_file = getFileName(m_strNameSizeFile, null); m_detectionOut.save_output_param.num_test_image = (uint)m_nNumTestImage; m_detectionOut.keep_top_k = 200; m_detectionOut.confidence_threshold = 0.01f; m_detectionOut.code_type = m_multiBoxLossLayer.multiboxloss_param.code_type; //------------------------------------------------------- // Create the Detection Evaluation parameters. //------------------------------------------------------- m_detectionEval = new DetectionEvaluateParameter(); m_detectionEval.num_classes = (uint)m_nNumClasses; m_detectionEval.background_label_id = (uint)m_nBackgroundLabelId; m_detectionEval.overlap_threshold = 0.5f; m_detectionEval.evaulte_difficult_gt = false; m_detectionEval.name_size_file = getFileName(m_strNameSizeFile, null); //------------------------------------------------------- // Setup the MultiBox head layer info. //------------------------------------------------------- // conv4_3 ==> 38 x 38 // fc7 ==> 19 x 19 // conv6_2 ==> 10 x 10 // conv7_2 ==> 5 x 5 // conv8_2 ==> 3 x 3 // conv9_2 ==> 1 x 1 List <string> rgstrMboxSourceLayers = new List <string>() { "conv4_3", "fc7", "conv6_2", "conv7_2", "conv8_2", "conv9_2" }; List <double> rgAspectWid = new List <double>() { 2, 2, 2, 2, 2, 2 }; List <double> rgAspectHt = new List <double>() { 2, 3, 3, 3, 2, 2 }; // L2 normalize conv4_3 List <double> rgNormalization = new List <double>() { 20, -1, -1, -1, -1, -1 }; List <double> rgStepsW = new List <double>() { 8, 16, 32, 64, 100, 300 }; List <double> rgStepsH = new List <double>() { 8, 16, 32, 64, 100, 300 }; int nMinDim = 300; // in percent % double dfMinRatio = 20; double dfMaxRatio = 90; double dfRatioStep = (int)Math.Floor((dfMaxRatio - dfMinRatio) / (rgstrMboxSourceLayers.Count - 2)); List <double> rgMinSizes = new List <double>(); List <double> rgMaxSizes = new List <double>(); for (double dfRatio = dfMinRatio; dfRatio < dfMaxRatio + 1; dfRatio += dfRatioStep) { rgMinSizes.Add(nMinDim * dfRatio / 100.0); rgMaxSizes.Add(nMinDim * (dfRatio + dfRatioStep) / 100.0); } rgMinSizes.Insert(0, nMinDim * 10 / 100.0); rgMaxSizes.Insert(0, nMinDim * 20 / 100.0); m_rgMultiBoxInfo = new List <MultiBoxHeadInfo>(); for (int i = 0; i < rgstrMboxSourceLayers.Count; i++) { string strSrc = rgstrMboxSourceLayers[i]; double dfMinSize = rgMinSizes[i]; double dfMaxSize = rgMaxSizes[i]; double dfStepW = rgStepsW[i]; double dfStepH = rgStepsH[i]; double dfAspectW = rgAspectWid[i]; double dfAspectH = rgAspectHt[i]; double dfNorm = rgNormalization[i]; m_rgMultiBoxInfo.Add(new MultiBoxHeadInfo(strSrc, dfMinSize, dfMaxSize, dfStepW, dfStepH, dfAspectW, dfAspectH, dfNorm, null)); } }