public void Test_GetSubset_ByRows_UsingRowIndices() { // Given var newDataTable = new DataTable { Columns = { new DataColumn("Col1", typeof(string)), new DataColumn("Col2", typeof(int)), new DataColumn("Col3", typeof(string)), new DataColumn("Col4", typeof(int)) }, Rows = { new object[] { "a1.1", 1, "b1.2", 2 }, new object[] { "a3.1", 5, "b3.2", 6 } } }; var expectedDataFrame = new DataFrame(newDataTable, new[] { 100, 102 }); // When var actualDataFrame = _subject.GetSubsetByRows(new[] { 0, 2 }); // Then Assert.IsTrue(expectedDataFrame.Equals(actualDataFrame)); }
public IList <IDataQualityReport <TPredictionResult> > CrossValidate( IPredictionModelBuilder modelBuilder, IModelBuilderParams modelBuilderParams, IPredictor <TPredictionResult> predictor, IDataQualityMeasure <TPredictionResult> qualityMeasure, IDataFrame dataFrame, string dependentFeatureName, double percetnagOfTrainData, int folds) { var trainingDataCount = (int)Math.Round(percetnagOfTrainData * dataFrame.RowCount); var testDataCount = dataFrame.RowCount - trainingDataCount; var shuffledAllIndices = dataFrame.RowIndices.Shuffle(_randomizer); var maxWindowsCount = dataFrame.RowCount / testDataCount; var iterationAccuracies = new List <IDataQualityReport <TPredictionResult> >(); var currentWindowNo = 0; for (var i = 0; i < folds; i++) { if (currentWindowNo == maxWindowsCount) { currentWindowNo = 0; shuffledAllIndices = shuffledAllIndices.Shuffle(); } var offset = currentWindowNo * testDataCount; var trainingIndices = shuffledAllIndices.Skip(offset).Take(trainingDataCount).ToList(); var trainingData = dataFrame.GetSubsetByRows(trainingIndices); var testIndices = shuffledAllIndices.Except(trainingIndices).ToList(); var testData = dataFrame.GetSubsetByRows(testIndices); IPredictionModel model = modelBuilder.BuildModel(trainingData, dependentFeatureName, modelBuilderParams); IList <TPredictionResult> predictions = predictor.Predict(testData, model, dependentFeatureName); IList <TPredictionResult> expected = testData.GetColumnVector <TPredictionResult>(dependentFeatureName); IDataQualityReport <TPredictionResult> qualityReport = qualityMeasure.GetReport(expected, predictions); iterationAccuracies.Add(qualityReport); currentWindowNo++; } return(iterationAccuracies); }