예제 #1
0
        public void Test_GetSubset_ByRows_UsingRowIndices()
        {
            // Given
            var newDataTable = new DataTable
            {
                Columns =
                {
                    new DataColumn("Col1", typeof(string)),
                    new DataColumn("Col2", typeof(int)),
                    new DataColumn("Col3", typeof(string)),
                    new DataColumn("Col4", typeof(int))
                },
                Rows =
                {
                    new object[] { "a1.1", 1, "b1.2", 2 },
                    new object[] { "a3.1", 5, "b3.2", 6 }
                }
            };
            var expectedDataFrame = new DataFrame(newDataTable, new[] { 100, 102 });

            // When
            var actualDataFrame = _subject.GetSubsetByRows(new[] { 0, 2 });

            // Then
            Assert.IsTrue(expectedDataFrame.Equals(actualDataFrame));
        }
예제 #2
0
        public IList <IDataQualityReport <TPredictionResult> > CrossValidate(
            IPredictionModelBuilder modelBuilder,
            IModelBuilderParams modelBuilderParams,
            IPredictor <TPredictionResult> predictor,
            IDataQualityMeasure <TPredictionResult> qualityMeasure,
            IDataFrame dataFrame,
            string dependentFeatureName,
            double percetnagOfTrainData,
            int folds)
        {
            var trainingDataCount  = (int)Math.Round(percetnagOfTrainData * dataFrame.RowCount);
            var testDataCount      = dataFrame.RowCount - trainingDataCount;
            var shuffledAllIndices = dataFrame.RowIndices.Shuffle(_randomizer);
            var maxWindowsCount    = dataFrame.RowCount / testDataCount;

            var iterationAccuracies = new List <IDataQualityReport <TPredictionResult> >();
            var currentWindowNo     = 0;

            for (var i = 0; i < folds; i++)
            {
                if (currentWindowNo == maxWindowsCount)
                {
                    currentWindowNo    = 0;
                    shuffledAllIndices = shuffledAllIndices.Shuffle();
                }
                var offset          = currentWindowNo * testDataCount;
                var trainingIndices = shuffledAllIndices.Skip(offset).Take(trainingDataCount).ToList();
                var trainingData    = dataFrame.GetSubsetByRows(trainingIndices);

                var testIndices = shuffledAllIndices.Except(trainingIndices).ToList();
                var testData    = dataFrame.GetSubsetByRows(testIndices);
                IPredictionModel          model       = modelBuilder.BuildModel(trainingData, dependentFeatureName, modelBuilderParams);
                IList <TPredictionResult> predictions = predictor.Predict(testData, model, dependentFeatureName);
                IList <TPredictionResult> expected    = testData.GetColumnVector <TPredictionResult>(dependentFeatureName);
                IDataQualityReport <TPredictionResult> qualityReport = qualityMeasure.GetReport(expected, predictions);
                iterationAccuracies.Add(qualityReport);
                currentWindowNo++;
            }
            return(iterationAccuracies);
        }