public IEnumerable <(IDataTable Training, IDataTable Validation)> Fold(int k, int?randomSeed = null, bool shuffle = true) { var input = Enumerable.Range(0, RowCount); if (shuffle) { input = input.Shuffle(randomSeed); } var final = input.ToList(); var foldSize = final.Count / k; for (var i = 0; i < k; i++) { var trainingRows = final.Take(i * foldSize).Concat(final.Skip((i + 1) * foldSize)); var validationRows = final.Skip(i * foldSize).Take(foldSize); var writer1 = new DataTableWriter(Columns, null); foreach (var row in GetRows(trainingRows)) { writer1.Process(row); } var writer2 = new DataTableWriter(Columns, null); foreach (var row in GetRows(validationRows)) { writer2.Process(row); } yield return(writer1.GetDataTable(), writer2.GetDataTable()); } }
public (IDataTable Training, IDataTable Test) Split(int?randomSeed = null, double trainPercentage = 0.8, bool shuffle = true, Stream output1 = null, Stream output2 = null) { var input = Enumerable.Range(0, RowCount); if (shuffle) { input = input.Shuffle(randomSeed); } var final = input.ToList(); int trainingCount = Convert.ToInt32(RowCount * trainPercentage); var writer1 = new DataTableWriter(Columns, output1); foreach (var row in GetRows(final.Take(trainingCount))) { writer1.Process(row); } var writer2 = new DataTableWriter(Columns, output2); foreach (var row in GetRows(final.Skip(trainingCount))) { writer2.Process(row); } return(writer1.GetDataTable(), writer2.GetDataTable()); }
public IDataTable Bag(int?count = null, Stream output = null, int?randomSeed = null) { var input = Enumerable.Range(0, RowCount).ToList().Bag(count ?? RowCount, randomSeed); var writer = new DataTableWriter(Columns, output); foreach (var row in GetRows(input)) { writer.Process(row); } return(writer.GetDataTable()); }