public IList<ISplittedData> SplitData(IDataFrame dataToSplit, IBinarySplittingParams splttingParams)
        {
            var queries = BuildQueries(splttingParams.SplitOnFeature, splttingParams.SplitOnValue);
            var splitResults = new List<ISplittedData>();
            var totalRowsCount = (double)dataToSplit.RowCount;

            foreach (var boolAndQuery in queries)
            {
                var resultDataFrame = dataToSplit.GetSubsetByQuery(boolAndQuery.Value);
                splitResults.Add(new SplittedData(GetSubsetLink(resultDataFrame, totalRowsCount, boolAndQuery.Key), resultDataFrame));
            }
            return splitResults;
        }
        public IList <ISplittedData> SplitData(IDataFrame dataToSplit, IBinarySplittingParams splttingParams)
        {
            var queries        = BuildQueries(splttingParams.SplitOnFeature, splttingParams.SplitOnValue);
            var splitResults   = new List <ISplittedData>();
            var totalRowsCount = (double)dataToSplit.RowCount;

            foreach (var boolAndQuery in queries)
            {
                var resultDataFrame = dataToSplit.GetSubsetByQuery(boolAndQuery.Value);
                splitResults.Add(new SplittedData(GetSubsetLink(resultDataFrame, totalRowsCount, boolAndQuery.Key), resultDataFrame));
            }
            return(splitResults);
        }
 public IList<ISplittedData> SplitData(IDataFrame dataToSplit, ISplittingParams splttingParams)
 {
     var splitFeature = splttingParams.SplitOnFeature;
     var totalRowsCount = dataToSplit.RowCount;
     var uniqueValues = dataToSplit.GetColumnVector(splitFeature).Distinct();
     var splittedData = new List<ISplittedData>();
     //TODO: AAA emarassingly parallel - test it for performance
     foreach (var uniqueValue in uniqueValues)
     {
         var query = BuildQuery(splitFeature, uniqueValue);
         var splitResult = dataToSplit.GetSubsetByQuery(query);
         var subsetCount = splitResult.RowCount;
         var link = new DecisionLink(
             CalcInstancesPercentage(totalRowsCount, subsetCount),
             subsetCount,
             uniqueValue);
         splittedData.Add(new SplittedData(link, splitResult));
     }
     return splittedData;
 }
Esempio n. 4
0
        public IList <ISplittedData> SplitData(IDataFrame dataToSplit, ISplittingParams splttingParams)
        {
            var splitFeature   = splttingParams.SplitOnFeature;
            var totalRowsCount = dataToSplit.RowCount;
            var uniqueValues   = dataToSplit.GetColumnVector(splitFeature).Distinct();
            var splittedData   = new List <ISplittedData>();

            //TODO: AAA emarassingly parallel - test it for performance
            foreach (var uniqueValue in uniqueValues)
            {
                var query       = BuildQuery(splitFeature, uniqueValue);
                var splitResult = dataToSplit.GetSubsetByQuery(query);
                var subsetCount = splitResult.RowCount;
                var link        = new DecisionLink(
                    CalcInstancesPercentage(totalRowsCount, subsetCount),
                    subsetCount,
                    uniqueValue);
                splittedData.Add(new SplittedData(link, splitResult));
            }
            return(splittedData);
        }