public PredictionResult Test(IArffDataSet testingSet)
        {
            Guard.NotNull(() => testingSet, testingSet);
            var problemSource = problemFactory.Construct(testingSet);

            return(Prediction.Predict(problemSource.GetProblem(), trainedModel, false));
        }
Exemple #2
0
        public IParameterSelection Create(TrainingHeader header, IArffDataSet dataset)
        {
            Guard.NotNull(() => header, header);
            Guard.NotNull(() => dataset, dataset);
            Parameter defaultParameter = new Parameter();

            defaultParameter.KernelType = header.Kernel;
            defaultParameter.CacheSize  = 200;
            defaultParameter.SvmType    = header.SvmType;
            var model = new TrainingModel(header);

            if (!header.GridSelection)
            {
                return(new NullParameterSelection(defaultParameter, model));
            }

            GridSearchParameters searchParameters;

            logger.Info("Investigate LibLinear");
            if (header.Kernel == KernelType.Linear)
            {
                var gamma = GetList(1, 1, 1);
                if (dataset.Header.Total > (dataset.TotalDocuments * 10))
                {
                    logger.Info("Selecting Linear features >> instances");
                    defaultParameter.Shrinking = false;
                }
                else
                {
                    logger.Warn("Investigate LibLinear");
                }

                var training = problemFactory.Construct(dataset).GetProblem();
                defaultParameter.Weights = WeightCalculation.GetWeights(training.Y);
                foreach (var classItem in defaultParameter.Weights)
                {
                    logger.Info($"Using class [{classItem.Key}] with weight [{classItem.Value}]");
                }

                searchParameters = new GridSearchParameters(3, GetList(-1, 2, 1), gamma, defaultParameter);
            }
            else
            {
                searchParameters = new GridSearchParameters(3, GetList(-5, 15, 2), GetList(-15, 3, 2), defaultParameter);
            }

            return(new GridParameterSelection(taskFactory, model, searchParameters));
        }
        public async Task <TrainingResults> Train(IParameterSelection selection)
        {
            Guard.NotNull(() => selection, selection);
            Problem problem    = problemFactory.Construct(dataSet).GetProblem();
            var     parameters = await selection.Find(problem, CancellationToken.None).ConfigureAwait(false);

            // it is reasonable to choose values between 1 and 10^15
            // http://stackoverflow.com/questions/19089913/data-imbalance-in-svm-using-libsvm
            // http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf
            // http://stats.stackexchange.com/questions/31066/what-is-the-influence-of-c-in-svms-with-linear-kernel
            log.Info("Training...");
            var result = selection.Training.Train(problem, parameters);

            log.Info("Training Done.");
            return(new TrainingResults(result, selection.Training.Header, dataSet));
        }