public IList<ISplittedData> SplitData(IDataFrame dataToSplit, ISplittingParams splttingParams) { var splitFeature = splttingParams.SplitOnFeature; var totalRowsCount = dataToSplit.RowCount; var uniqueValues = dataToSplit.GetColumnVector(splitFeature).Distinct(); var splittedData = new List<ISplittedData>(); //TODO: AAA emarassingly parallel - test it for performance foreach (var uniqueValue in uniqueValues) { var query = BuildQuery(splitFeature, uniqueValue); var splitResult = dataToSplit.GetSubsetByQuery(query); var subsetCount = splitResult.RowCount; var link = new DecisionLink( CalcInstancesPercentage(totalRowsCount, subsetCount), subsetCount, uniqueValue); splittedData.Add(new SplittedData(link, splitResult)); } return splittedData; }
public IList <ISplittedData> SplitData(IDataFrame dataToSplit, ISplittingParams splttingParams) { var splitFeature = splttingParams.SplitOnFeature; var totalRowsCount = dataToSplit.RowCount; var uniqueValues = dataToSplit.GetColumnVector(splitFeature).Distinct(); var splittedData = new List <ISplittedData>(); //TODO: AAA emarassingly parallel - test it for performance foreach (var uniqueValue in uniqueValues) { var query = BuildQuery(splitFeature, uniqueValue); var splitResult = dataToSplit.GetSubsetByQuery(query); var subsetCount = splitResult.RowCount; var link = new DecisionLink( CalcInstancesPercentage(totalRowsCount, subsetCount), subsetCount, uniqueValue); splittedData.Add(new SplittedData(link, splitResult)); } return(splittedData); }