/// <summary> /// Split the dataset into the train set and test set according to the given fraction. /// Respects the <paramref name="stratificationColumn"/> if provided. /// </summary> /// <typeparam name="T">The tuple describing the data schema.</typeparam> /// <param name="catalog">The training catalog.</param> /// <param name="data">The dataset to split.</param> /// <param name="testFraction">The fraction of data to go into the test set.</param> /// <param name="stratificationColumn">Optional selector for the column to use as a stratification column. If two examples share the same value of the <paramref name="stratificationColumn"/> /// (if provided), they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from train to the test set. /// If this optional parameter is not provided, a stratification columns will be generated, and its values will be random numbers .</param> /// <param name="seed">Optional parameter used in combination with the <paramref name="stratificationColumn"/>. /// If the <paramref name="stratificationColumn"/> is not provided, the random numbers generated to create it, will use this seed as value. /// And if it is not provided, the default value will be used.</param> /// <returns>A pair of datasets, for the train and test set.</returns> public static (DataView <T> trainSet, DataView <T> testSet) TrainTestSplit <T>(this TrainCatalogBase catalog, DataView <T> data, double testFraction = 0.1, Func <T, PipelineColumn> stratificationColumn = null, uint?seed = null) {
public static IHostEnvironment GetEnvironment(TrainCatalogBase catalog) => Contracts.CheckRef(catalog, nameof(catalog)).Environment;