Exemplo n.º 1
0
        RunSweep(
            CrowdDataWithText[] trainingDataSets,
            CrowdDataWithText validationDataSet,
            ModelBase model,
            CorpusInformation corpusInformation,
            Func <CrowdDataMapping, ModelBase, ModelRunnerBase, ModelRunnerBase> runnerCreator,
            Func <ModelRunnerBase, Dictionary <string, object> > trainingResultGetter,
            Func <ModelRunnerBase, Dictionary <string, object>, Dictionary <string, object> > validationResultGetter,
            ExperimentParameters experimentParameters,
            CrowdDataWithText validationDataSetNoLabels    = null,
            List <ModelRunnerBase> previousTrainingRunners = null,
            int numIterations = 200,
            Dictionary <string, Dictionary <string, object> > resultStorage = null)
        {
            var       allResultsForThisModel     = resultStorage ?? new Dictionary <string, Dictionary <string, object> >();
            var       trainingRunners            = new List <ModelRunnerBase>();
            const int NumIterationsForValidation = 5;

            for (var i = 0; i < trainingDataSets.Length; i++)
            {
                var currentModelName =
                    $"{model.Name}_{i}"; // Chart code assumes suffix of dash then number for ordering chart points
                var trainingData    = trainingDataSets[i];
                var trainingMapping = new CrowdDataWithTextMapping(
                    trainingData,
                    LabelValuesToString,
                    corpusInformation);

                var trainingRunner = runnerCreator.Invoke(trainingMapping, model, previousTrainingRunners?[i]);
                trainingRunners.Add(trainingRunner);
                Rand.Restart(experimentParameters.RandomSeed);
                RunModel(trainingRunner, currentModelName + "_Training", numIterations,
                         experimentParameters.UseGoldLabelsInTraining);
                var trainingResults = trainingResultGetter?.Invoke(trainingRunner) ?? new Dictionary <string, object>();

                trainingResults[ErrorsKey]        = trainingRunner.GetErrors();
                trainingResults[WorkerMetricsKey] = GetWorkerMetrics(trainingRunner.DataMapping.Data, trainingRunner, experimentParameters.MaximumNumberWorkers);

                var currentResults =
                    new Dictionary <string, object> {
                    [TrainingKey] = trainingResults
                };

                var validationMapping = new CrowdDataWithTextMapping(
                    validationDataSet,
                    LabelValuesToString,
                    corpusInformation);

                var validationRunner = runnerCreator.Invoke(validationMapping, model, trainingRunner);
                Rand.Restart(experimentParameters.RandomSeed);
                var validationMetrics = RunModel(validationRunner, currentModelName + "_Validation",
                                                 NumIterationsForValidation, false);

                foreach (var prediction in validationRunner.Posteriors.TrueLabel)
                {
                    Console.WriteLine(prediction);
                }

                var validationResults = validationResultGetter.Invoke(validationRunner, validationMetrics);
                validationResults[ErrorsKey]        = validationRunner.GetErrors();
                validationResults[WorkerMetricsKey] = GetWorkerMetrics(validationRunner.DataMapping.Data,
                                                                       validationRunner, experimentParameters.MaximumNumberWorkers);
                currentResults[ValidationKey] = validationResults;

                if (validationDataSetNoLabels != null)
                {
                    var validationMappingNoLabels = new CrowdDataWithTextMapping(
                        validationDataSetNoLabels,
                        LabelValuesToString,
                        corpusInformation);

                    var validationNoLabelsRunner =
                        runnerCreator.Invoke(validationMappingNoLabels, model, trainingRunner);
                    Rand.Restart(experimentParameters.RandomSeed);
                    var validationMetricsNoLabels = RunModel(validationNoLabelsRunner,
                                                             currentModelName + "_ValidationNoLabels", NumIterationsForValidation, false);
                    currentResults[ValidationNoLabelsKey] =
                        validationResultGetter.Invoke(validationNoLabelsRunner, validationMetricsNoLabels);
                }

                allResultsForThisModel[$"TrainingPercent_{Math.Min((i + 1) * 100 / trainingDataSets.Length, 100)}"] =
                    currentResults;
            }

            return(allResultsForThisModel, trainingRunners);
        }