Example #1
0
        public static void TrainGenerator(Model model, IOptimizer optimizer, ObjectDetectionDataset dataset,
                                          int firstStageEpochs = 20, int secondStageEpochs        = 30,
                                          IEnumerable <ICallback>?callbacks = null, int batchSize = 2)
        {
            callbacks = callbacks?.ToPyList();
            int[] strides    = YOLOv4.Strides.ToArray();
            int   inputSize  = 416;
            int   classCount = MS_COCO.ClassCount;
            var   trueLabels = strides
                               .Select((stride, i) => (Tensor <float>)tf.keras.Input(
                                           new TensorShape(inputSize / stride, inputSize / stride, 3, 85),
                                           name: $"label{i}"))
                               .ToArray();
            var trueBoxes = Enumerable.Range(0, 3)
                            .Select(i => (Tensor <float>)tf.keras.Input(new TensorShape(150, 4), name: $"box{i}"))
                            .ToArray();

            var lossEndpoint = new YoloLossEndpoint(trueLabels: trueLabels, trueBoxes: trueBoxes,
                                                    strides: strides, classCount: classCount);
            Tensor loss = lossEndpoint.__call__(model.outputs);

            model = new Model(new {
                inputs  = trueLabels.Concat(trueBoxes).Prepend((Tensor <float>)model.input_dyn),
                outputs = loss
            }.AsKwArgs());

            var generator = ListLinq.Select(
                dataset.Batch(batchSize: batchSize,
                              onloadAugmentation: ObjectDetectionDataset.RandomlyApplyAugmentations),
                batch => batch.ToGeneratorOutput())
                            .ToSequence();

            if (firstStageEpochs > 0)
            {
                SetFreeze(model, true);
                model.compile(new ImplicitContainer <object>(optimizer), new ZeroLoss());
                model.fit_generator(generator, callbacks: callbacks,
                                    verbose: 1,
                                    shuffle: false,
                                    epochs: firstStageEpochs);
                SetFreeze(model, false);
            }
            model.compile(new ImplicitContainer <object>(optimizer), new ZeroLoss());
            model.fit_generator(generator, callbacks: callbacks,
                                shuffle: false,
                                verbose: 1,
                                epochs: firstStageEpochs + secondStageEpochs,
                                initial_epoch: firstStageEpochs);
        }
Example #2
0
        public override int Run(string[] remainingArguments)
        {
            Trace.Listeners.Add(new ConsoleTraceListener(useErrorStream: true));

            tf.debugging.set_log_device_placement(this.LogDevicePlacement);

            if (this.GpuAllowGrowth)
            {
                dynamic config = config_pb2.ConfigProto.CreateInstance();
                config.gpu_options.allow_growth = true;
                tf.keras.backend.set_session(Session.NewDyn(config: config));
            }

            if (this.TestRun)
            {
                this.Annotations = this.Annotations.Take(this.BatchSize * 3).ToArray();
            }

            var dataset = new ObjectDetectionDataset(this.Annotations,
                                                     classNames: this.ClassNames,
                                                     strides: this.Strides,
                                                     inputSize: this.InputSize,
                                                     anchors: this.Anchors,
                                                     anchorsPerScale: this.AnchorsPerScale,
                                                     maxBBoxPerScale: this.MaxBBoxPerScale);
            var model = YOLO.CreateV4Trainable(dataset.InputSize, dataset.ClassNames.Length, dataset.Strides);

            var learningRateSchedule = new YOLO.LearningRateSchedule(
                totalSteps: (long)(this.FirstStageEpochs + this.SecondStageEpochs) * dataset.BatchCount(this.BatchSize),
                warmupSteps: this.WarmupEpochs * dataset.BatchCount(this.BatchSize));
            // https://github.com/AlexeyAB/darknet/issues/1845
            var optimizer = new Adam(learning_rate: learningRateSchedule, epsilon: 0.000001);

            if (this.ModelSummary)
            {
                model.summary();
            }
            if (this.WeightsPath != null)
            {
                model.load_weights(this.WeightsPath);
            }

            var callbacks = new List <ICallback> {
                new LearningRateLogger(),
                new TensorBoard(log_dir: this.LogDir, batch_size: this.BatchSize, profile_batch: 4),
            };

            if (!this.Benchmark && !this.TestRun)
            {
                callbacks.Add(new ModelCheckpoint("yoloV4.weights.{epoch:02d}", save_weights_only: true));
            }

            YOLO.TrainGenerator(model, optimizer, dataset, batchSize: this.BatchSize,
                                firstStageEpochs: this.FirstStageEpochs,
                                secondStageEpochs: this.SecondStageEpochs,
                                callbacks: callbacks);

            if (!this.Benchmark && !this.TestRun)
            {
                model.save_weights("yoloV4.weights-trained");
            }

            // the following does not work due to the need to name layers properly
            // https://stackoverflow.com/questions/61402903/unable-to-create-group-name-already-exists
            // model.save("yoloV4-trained");
            return(0);
        }
Example #3
0
        public static void Train(Model model, IOptimizer optimizer, ObjectDetectionDataset dataset,
                                 ObjectDetectionDataset?testSet    = null,
                                 IEnumerable <ICallback>?callbacks = null,
                                 int batchSize             = 2,
                                 int warmupEpochs          = 2, int firstStageEpochs = 20,
                                 int secondStageEpochs     = 30,
                                 float initialLearningRate = 1e-3f,
                                 float finalLearningRate   = 1e-6f,
                                 bool testRun   = false,
                                 bool benchmark = false)
        {
            var globalSteps = new Variable(1, dtype: tf.int64);

            var learningRateSchedule = new YOLO.LearningRateSchedule(
                totalSteps: (long)(firstStageEpochs + secondStageEpochs) * dataset.BatchCount(batchSize),
                warmupSteps: warmupEpochs * dataset.BatchCount(batchSize),
                initialLearningRate: initialLearningRate,
                finalLearningRate: finalLearningRate);

            foreach (var callback in callbacks ?? Array.Empty <ICallback>())
            {
                callback.DynamicInvoke <object>("set_model", model);
                callback.DynamicInvoke <object>("set_params", new Dictionary <string, object> {
                    ["metrics"] = new[] { "loss", "testLoss" }.SelectMany(prefix => new[] {
                        prefix + nameof(Loss.GIUO),
                        prefix + nameof(Loss.Conf),
                        prefix + nameof(Loss.Prob),
                    }).ToArray(),
                });
            }

            bool isFreeze     = false;
            int  totalBatches = 0;

            foreach (int epoch in Enumerable.Range(0, firstStageEpochs + secondStageEpochs))
            {
                // let 1st batch train with unfrozen layers to initialize them
                if (totalBatches > 32)
                {
                    if (epoch < firstStageEpochs)
                    {
                        if (!isFreeze)
                        {
                            isFreeze = true;

                            SetFreeze(model, true);
                        }
                    }
                    else
                    {
                        if (isFreeze)
                        {
                            isFreeze = false;
                            SetFreeze(model, false);
                        }
                    }
                }

                foreach (var callback in callbacks ?? Array.Empty <ICallback>())
                {
                    callback.on_epoch_begin(epoch);
                }

                var trainLoss   = new FinalLoss();
                int allocIssues = 0;
                foreach (var batch in dataset.Batch(batchSize: batchSize,
                                                    onloadAugmentation: ObjectDetectionDataset.RandomlyApplyAugmentations)
                         .BufferedEnumerate(bufferSize: 6))
                {
                    // TODO: https://github.com/hunglc007/tensorflow-yolov4-tflite/commit/9ab36aaa90c46aa063e3356d8e7f0e5bb27d919b
                    try {
                        var stepLoss = TrainStep(batch, model, optimizer, dataset.ClassNames.Length, dataset.Strides, bench: benchmark);
                        trainLoss += stepLoss.AsFinal();

                        int reportSteps = testRun ? dataset.BatchCount(batchSize) : 1;
                        globalSteps.assign_add_dyn(reportSteps);
                        totalBatches += reportSteps;

                        UpdateLearningRate(optimizer, globalSteps, learningRateSchedule);

                        WriteLosses(optimizer, globalSteps, stepLoss);
                        summary_ops_v2.scalar("epoch", epoch, step: globalSteps);

                        stepLoss = default;

                        allocIssues = 0;

                        if (testRun)
                        {
                            break;
                        }
                    } catch (ResourceExhaustedError e) {
                        allocIssues++;
                        Trace.TraceError(e.ToString());
                        GC.Collect();
                        GC.WaitForPendingFinalizers();

                        if (allocIssues > 10)
                        {
                            throw;
                        }
                    }
                }

                var testLoss = new FinalLoss();
                if (testSet != null)
                {
                    foreach (var batch in testSet.Batch(batchSize: batchSize, onloadAugmentation: null))
                    {
                        try {
                            testLoss += TestStep(batch, model, dataset.ClassNames.Length, dataset.Strides).AsFinal();

                            allocIssues = 0;
                            if (testRun)
                            {
                                break;
                            }
                        } catch (ResourceExhaustedError e) {
                            allocIssues++;
                            Trace.TraceError(e.ToString());
                            GC.Collect();
                            GC.WaitForPendingFinalizers();
                            if (allocIssues > 10)
                            {
                                throw;
                            }
                        }
                    }
                }

                foreach (var callback in callbacks ?? Array.Empty <ICallback>())
                {
                    var logs = new Dictionary <string, object?>();
                    (trainLoss / dataset.BatchCount(batchSize)).Write(logs, "loss");
                    if (testSet != null)
                    {
                        (testLoss / testSet.Count).Write(logs, "testLoss");
                    }
                    callback.on_epoch_end(epoch, logs: logs);
                }

                if (benchmark && epoch == 1)
                {
                    return;
                }
            }
        }