/// <summary> /// Performs sanity checks on the dataset splitting results. /// </summary> /// <param name="wholeDataset">The dataset that was split.</param> /// <param name="trainingDataset">The training part of the <paramref name="wholeDataset"/>.</param> /// <param name="testDataset">The test part of the <paramref name="wholeDataset"/>.</param> /// <param name="hasColdUsers">Specifies whether test dataset is supposed to have cold users.</param> /// <param name="hasColdItems">Specifies whether test dataset is supposed to have cold items.</param> /// <param name="hasIgnoredUsers">Specifies whether some users were ignored when splitting data.</param> /// <param name="hasIgnoredItems">Specifies whether some items were ignored when splitting data.</param> /// <param name="areOccasionalColdItemsRemoved">Specifies whether the occasionally produced cold items were removed from the test set.</param> private static void CheckDatasetSplitCorrectness( Dataset wholeDataset, Dataset trainingDataset, Dataset testDataset, bool hasColdUsers, bool hasColdItems, bool hasIgnoredUsers, bool hasIgnoredItems, bool areOccasionalColdItemsRemoved) { // Training and test sets should have zero intersection var trainTestIntersection = trainingDataset.Intersect(testDataset); Assert.Empty(trainTestIntersection); if (!hasIgnoredUsers && !hasIgnoredItems && !areOccasionalColdItemsRemoved) { // All the observations should be either in the training set or in the test set var missingObservations = wholeDataset.Except(trainingDataset).Except(testDataset); Assert.Empty(missingObservations); } if (!hasColdItems && areOccasionalColdItemsRemoved) { // There should be no cold items var coldItems = testDataset.Select(obs => obs.Item2).Except(trainingDataset.Select(obs => obs.Item2)); Assert.Empty(coldItems); } if (!hasColdUsers) { // There should be no cold users var coldUsers = testDataset.Select(obs => obs.Item1).Except(trainingDataset.Select(obs => obs.Item1)); Assert.Empty(coldUsers); } }