public IList<ISplittedData> SplitData(IDataFrame dataToSplit, IBinarySplittingParams splttingParams) { var queries = BuildQueries(splttingParams.SplitOnFeature, splttingParams.SplitOnValue); var splitResults = new List<ISplittedData>(); var totalRowsCount = (double)dataToSplit.RowCount; foreach (var boolAndQuery in queries) { var resultDataFrame = dataToSplit.GetSubsetByQuery(boolAndQuery.Value); splitResults.Add(new SplittedData(GetSubsetLink(resultDataFrame, totalRowsCount, boolAndQuery.Key), resultDataFrame)); } return splitResults; }
public IList <ISplittedData> SplitData(IDataFrame dataToSplit, IBinarySplittingParams splttingParams) { var queries = BuildQueries(splttingParams.SplitOnFeature, splttingParams.SplitOnValue); var splitResults = new List <ISplittedData>(); var totalRowsCount = (double)dataToSplit.RowCount; foreach (var boolAndQuery in queries) { var resultDataFrame = dataToSplit.GetSubsetByQuery(boolAndQuery.Value); splitResults.Add(new SplittedData(GetSubsetLink(resultDataFrame, totalRowsCount, boolAndQuery.Key), resultDataFrame)); } return(splitResults); }
public IList<ISplittedData> SplitData(IDataFrame dataToSplit, ISplittingParams splttingParams) { var splitFeature = splttingParams.SplitOnFeature; var totalRowsCount = dataToSplit.RowCount; var uniqueValues = dataToSplit.GetColumnVector(splitFeature).Distinct(); var splittedData = new List<ISplittedData>(); //TODO: AAA emarassingly parallel - test it for performance foreach (var uniqueValue in uniqueValues) { var query = BuildQuery(splitFeature, uniqueValue); var splitResult = dataToSplit.GetSubsetByQuery(query); var subsetCount = splitResult.RowCount; var link = new DecisionLink( CalcInstancesPercentage(totalRowsCount, subsetCount), subsetCount, uniqueValue); splittedData.Add(new SplittedData(link, splitResult)); } return splittedData; }
public IList <ISplittedData> SplitData(IDataFrame dataToSplit, ISplittingParams splttingParams) { var splitFeature = splttingParams.SplitOnFeature; var totalRowsCount = dataToSplit.RowCount; var uniqueValues = dataToSplit.GetColumnVector(splitFeature).Distinct(); var splittedData = new List <ISplittedData>(); //TODO: AAA emarassingly parallel - test it for performance foreach (var uniqueValue in uniqueValues) { var query = BuildQuery(splitFeature, uniqueValue); var splitResult = dataToSplit.GetSubsetByQuery(query); var subsetCount = splitResult.RowCount; var link = new DecisionLink( CalcInstancesPercentage(totalRowsCount, subsetCount), subsetCount, uniqueValue); splittedData.Add(new SplittedData(link, splitResult)); } return(splittedData); }