public static void TrainGenerator(Model model, IOptimizer optimizer, ObjectDetectionDataset dataset, int firstStageEpochs = 20, int secondStageEpochs = 30, IEnumerable <ICallback>?callbacks = null, int batchSize = 2) { callbacks = callbacks?.ToPyList(); int[] strides = YOLOv4.Strides.ToArray(); int inputSize = 416; int classCount = MS_COCO.ClassCount; var trueLabels = strides .Select((stride, i) => (Tensor <float>)tf.keras.Input( new TensorShape(inputSize / stride, inputSize / stride, 3, 85), name: $"label{i}")) .ToArray(); var trueBoxes = Enumerable.Range(0, 3) .Select(i => (Tensor <float>)tf.keras.Input(new TensorShape(150, 4), name: $"box{i}")) .ToArray(); var lossEndpoint = new YoloLossEndpoint(trueLabels: trueLabels, trueBoxes: trueBoxes, strides: strides, classCount: classCount); Tensor loss = lossEndpoint.__call__(model.outputs); model = new Model(new { inputs = trueLabels.Concat(trueBoxes).Prepend((Tensor <float>)model.input_dyn), outputs = loss }.AsKwArgs()); var generator = ListLinq.Select( dataset.Batch(batchSize: batchSize, onloadAugmentation: ObjectDetectionDataset.RandomlyApplyAugmentations), batch => batch.ToGeneratorOutput()) .ToSequence(); if (firstStageEpochs > 0) { SetFreeze(model, true); model.compile(new ImplicitContainer <object>(optimizer), new ZeroLoss()); model.fit_generator(generator, callbacks: callbacks, verbose: 1, shuffle: false, epochs: firstStageEpochs); SetFreeze(model, false); } model.compile(new ImplicitContainer <object>(optimizer), new ZeroLoss()); model.fit_generator(generator, callbacks: callbacks, shuffle: false, verbose: 1, epochs: firstStageEpochs + secondStageEpochs, initial_epoch: firstStageEpochs); }
public override int Run(string[] remainingArguments) { Trace.Listeners.Add(new ConsoleTraceListener(useErrorStream: true)); tf.debugging.set_log_device_placement(this.LogDevicePlacement); if (this.GpuAllowGrowth) { dynamic config = config_pb2.ConfigProto.CreateInstance(); config.gpu_options.allow_growth = true; tf.keras.backend.set_session(Session.NewDyn(config: config)); } if (this.TestRun) { this.Annotations = this.Annotations.Take(this.BatchSize * 3).ToArray(); } var dataset = new ObjectDetectionDataset(this.Annotations, classNames: this.ClassNames, strides: this.Strides, inputSize: this.InputSize, anchors: this.Anchors, anchorsPerScale: this.AnchorsPerScale, maxBBoxPerScale: this.MaxBBoxPerScale); var model = YOLO.CreateV4Trainable(dataset.InputSize, dataset.ClassNames.Length, dataset.Strides); var learningRateSchedule = new YOLO.LearningRateSchedule( totalSteps: (long)(this.FirstStageEpochs + this.SecondStageEpochs) * dataset.BatchCount(this.BatchSize), warmupSteps: this.WarmupEpochs * dataset.BatchCount(this.BatchSize)); // https://github.com/AlexeyAB/darknet/issues/1845 var optimizer = new Adam(learning_rate: learningRateSchedule, epsilon: 0.000001); if (this.ModelSummary) { model.summary(); } if (this.WeightsPath != null) { model.load_weights(this.WeightsPath); } var callbacks = new List <ICallback> { new LearningRateLogger(), new TensorBoard(log_dir: this.LogDir, batch_size: this.BatchSize, profile_batch: 4), }; if (!this.Benchmark && !this.TestRun) { callbacks.Add(new ModelCheckpoint("yoloV4.weights.{epoch:02d}", save_weights_only: true)); } YOLO.TrainGenerator(model, optimizer, dataset, batchSize: this.BatchSize, firstStageEpochs: this.FirstStageEpochs, secondStageEpochs: this.SecondStageEpochs, callbacks: callbacks); if (!this.Benchmark && !this.TestRun) { model.save_weights("yoloV4.weights-trained"); } // the following does not work due to the need to name layers properly // https://stackoverflow.com/questions/61402903/unable-to-create-group-name-already-exists // model.save("yoloV4-trained"); return(0); }
public static void Train(Model model, IOptimizer optimizer, ObjectDetectionDataset dataset, ObjectDetectionDataset?testSet = null, IEnumerable <ICallback>?callbacks = null, int batchSize = 2, int warmupEpochs = 2, int firstStageEpochs = 20, int secondStageEpochs = 30, float initialLearningRate = 1e-3f, float finalLearningRate = 1e-6f, bool testRun = false, bool benchmark = false) { var globalSteps = new Variable(1, dtype: tf.int64); var learningRateSchedule = new YOLO.LearningRateSchedule( totalSteps: (long)(firstStageEpochs + secondStageEpochs) * dataset.BatchCount(batchSize), warmupSteps: warmupEpochs * dataset.BatchCount(batchSize), initialLearningRate: initialLearningRate, finalLearningRate: finalLearningRate); foreach (var callback in callbacks ?? Array.Empty <ICallback>()) { callback.DynamicInvoke <object>("set_model", model); callback.DynamicInvoke <object>("set_params", new Dictionary <string, object> { ["metrics"] = new[] { "loss", "testLoss" }.SelectMany(prefix => new[] { prefix + nameof(Loss.GIUO), prefix + nameof(Loss.Conf), prefix + nameof(Loss.Prob), }).ToArray(), }); } bool isFreeze = false; int totalBatches = 0; foreach (int epoch in Enumerable.Range(0, firstStageEpochs + secondStageEpochs)) { // let 1st batch train with unfrozen layers to initialize them if (totalBatches > 32) { if (epoch < firstStageEpochs) { if (!isFreeze) { isFreeze = true; SetFreeze(model, true); } } else { if (isFreeze) { isFreeze = false; SetFreeze(model, false); } } } foreach (var callback in callbacks ?? Array.Empty <ICallback>()) { callback.on_epoch_begin(epoch); } var trainLoss = new FinalLoss(); int allocIssues = 0; foreach (var batch in dataset.Batch(batchSize: batchSize, onloadAugmentation: ObjectDetectionDataset.RandomlyApplyAugmentations) .BufferedEnumerate(bufferSize: 6)) { // TODO: https://github.com/hunglc007/tensorflow-yolov4-tflite/commit/9ab36aaa90c46aa063e3356d8e7f0e5bb27d919b try { var stepLoss = TrainStep(batch, model, optimizer, dataset.ClassNames.Length, dataset.Strides, bench: benchmark); trainLoss += stepLoss.AsFinal(); int reportSteps = testRun ? dataset.BatchCount(batchSize) : 1; globalSteps.assign_add_dyn(reportSteps); totalBatches += reportSteps; UpdateLearningRate(optimizer, globalSteps, learningRateSchedule); WriteLosses(optimizer, globalSteps, stepLoss); summary_ops_v2.scalar("epoch", epoch, step: globalSteps); stepLoss = default; allocIssues = 0; if (testRun) { break; } } catch (ResourceExhaustedError e) { allocIssues++; Trace.TraceError(e.ToString()); GC.Collect(); GC.WaitForPendingFinalizers(); if (allocIssues > 10) { throw; } } } var testLoss = new FinalLoss(); if (testSet != null) { foreach (var batch in testSet.Batch(batchSize: batchSize, onloadAugmentation: null)) { try { testLoss += TestStep(batch, model, dataset.ClassNames.Length, dataset.Strides).AsFinal(); allocIssues = 0; if (testRun) { break; } } catch (ResourceExhaustedError e) { allocIssues++; Trace.TraceError(e.ToString()); GC.Collect(); GC.WaitForPendingFinalizers(); if (allocIssues > 10) { throw; } } } } foreach (var callback in callbacks ?? Array.Empty <ICallback>()) { var logs = new Dictionary <string, object?>(); (trainLoss / dataset.BatchCount(batchSize)).Write(logs, "loss"); if (testSet != null) { (testLoss / testSet.Count).Write(logs, "testLoss"); } callback.on_epoch_end(epoch, logs: logs); } if (benchmark && epoch == 1) { return; } } }