Esempio n. 1
0
        public IEnumerable <(IDataTable Training, IDataTable Validation)> Fold(int k, int?randomSeed = null, bool shuffle = true)
        {
            var input = Enumerable.Range(0, RowCount);

            if (shuffle)
            {
                input = input.Shuffle(randomSeed);
            }
            var final    = input.ToList();
            var foldSize = final.Count / k;

            for (var i = 0; i < k; i++)
            {
                var trainingRows   = final.Take(i * foldSize).Concat(final.Skip((i + 1) * foldSize));
                var validationRows = final.Skip(i * foldSize).Take(foldSize);

                var writer1 = new DataTableWriter(Columns, null);
                foreach (var row in GetRows(trainingRows))
                {
                    writer1.Process(row);
                }

                var writer2 = new DataTableWriter(Columns, null);
                foreach (var row in GetRows(validationRows))
                {
                    writer2.Process(row);
                }

                yield return(writer1.GetDataTable(), writer2.GetDataTable());
            }
        }
Esempio n. 2
0
        public (IDataTable Training, IDataTable Test) Split(int?randomSeed = null, double trainPercentage = 0.8, bool shuffle = true, Stream output1 = null, Stream output2 = null)
        {
            var input = Enumerable.Range(0, RowCount);

            if (shuffle)
            {
                input = input.Shuffle(randomSeed);
            }
            var final         = input.ToList();
            int trainingCount = Convert.ToInt32(RowCount * trainPercentage);

            var writer1 = new DataTableWriter(Columns, output1);

            foreach (var row in GetRows(final.Take(trainingCount)))
            {
                writer1.Process(row);
            }

            var writer2 = new DataTableWriter(Columns, output2);

            foreach (var row in GetRows(final.Skip(trainingCount)))
            {
                writer2.Process(row);
            }

            return(writer1.GetDataTable(), writer2.GetDataTable());
        }
Esempio n. 3
0
        public IDataTable Bag(int?count = null, Stream output = null, int?randomSeed = null)
        {
            var input  = Enumerable.Range(0, RowCount).ToList().Bag(count ?? RowCount, randomSeed);
            var writer = new DataTableWriter(Columns, output);

            foreach (var row in GetRows(input))
            {
                writer.Process(row);
            }
            return(writer.GetDataTable());
        }