public void TestXGBoostEvaluateTiming()
        {
            var filename    = @"../../../../datasets/xgboost/model_xbg_trees.txt";
            var treesString = File.ReadAllText(filename);
            var model       = XGBoost.Create(treesString, prepareShrink: true);

            var filename2     = @"../../../../datasets/xgboost/xgboost_test_cases_no_feature_names.txt";
            var samplesString = File.ReadLines(filename2);
            var samples       = new Dictionary <string, float[]>();

            foreach (var line in samplesString.Skip(1))
            {
                var parts        = line.Split(',');
                var sample       = parts[0];
                var featureIndex = int.Parse(parts[1]);
                var value        = float.Parse(parts[2]);
                if (!samples.ContainsKey(sample))
                {
                    samples.Add(sample, new float[1000]);
                }
                samples[sample][featureIndex] = value;
            }

            DoXGBoostEvaluateTimingFlatShrink(model, samples);
        }
 public void TestXGBoostCreate()
 {
     var filename    = @"../../../../datasets/xgboost/model_xbg_trees.txt";
     var treesString = File.ReadAllText(filename);
     var model       = XGBoost.Create(treesString);
     // TODO: ensure correctness of each tree
 }
Beispiel #3
0
        private static XGBoost GetModel(string dataPath, short[] reorderMapping)
        {
            var filename    = Path.Combine(dataPath, @"model_xbg_trees.txt");
            var treesString = File.ReadAllText(filename);
            var model       = XGBoost.Create(treesString);

            if (reorderMapping != null)
            {
                model = FeatureReorderer.ReorderXGBoost(model, reorderMapping);
            }
            return(model);
        }
        private void DoXGBoostEvaluateTimingFlatShrink(XGBoost model, Dictionary <string, float[]> samples)
        {
            Random r     = new Random(20190524);
            var    toRun = samples.Select(m => m.Value)
                           .Concat(samples.Select(m => m.Value))
                           .OrderBy(m => r.Next())
                           .ToArray();

            var userFeatures = toRun[0].Take(639).ToArray();
            var jobFeatures  = toRun.Select(m => m.Skip(639).ToArray()).ToArray();

            var results = new float[toRun.Length];
            var timer   = Stopwatch.StartNew();

            model.EvaluateProbabilityFlatShrink(userFeatures, jobFeatures, results);
            timer.Stop();
            output.WriteLine($"Time taken for {toRun.Length} evaluations: {timer.Elapsed.TotalMilliseconds} ms");
        }
        private void DoXGBoostEvaluateTimingFlat(XGBoost model, Dictionary <string, float[]> samples)
        {
            Random r     = new Random(20190524);
            var    toRun = samples.Select(m => m.Value)
                           .Concat(samples.Select(m => m.Value))
                           .OrderBy(m => r.Next())
                           .ToArray();

            var results = new double[toRun.Length];
            var timer   = Stopwatch.StartNew();

            for (int i = 0; i < toRun.Length; i++)
            {
                results[i] = model.EvaluateProbabilityFlat(toRun[i]);
            }
            timer.Stop();
            output.WriteLine($"Time taken for {toRun.Length} evaluations: {timer.Elapsed.TotalMilliseconds} ms");
        }
Beispiel #6
0
        private static void ModelSummary(XGBoost model)
        {
            var allPaths = model.Trees.SelectMany(FeatureReorderer.GetAllPaths).ToArray();

            Console.WriteLine($"total paths: {allPaths.Length}");
            Console.WriteLine($"average path length: {allPaths.Average(m => m.Length)}");
            var pageCounts = allPaths.Select(FeatureReorderer.NumMemoryPages).ToArray();
            var pages      = pageCounts.GroupBy(m => m)
                             .Select(m => new { m.Key, Count = m.Count() })
                             .OrderBy(m => m.Key)
                             .ToArray();

            foreach (var pageCount in pages)
            {
                Console.WriteLine($"  {pageCount.Key}: {pageCount.Count}");
            }
            Console.WriteLine($"average page count: {pageCounts.Average(m => m)}");
        }
        public void TestXGBoostEvaluate()
        {
            var filename    = @"../../../../datasets/xgboost/model_xbg_trees.txt";
            var treesString = File.ReadAllText(filename);
            var model       = XGBoost.Create(treesString, prepareShrink: true);

            var filename2     = @"../../../../datasets/xgboost/xgboost_test_cases_no_feature_names.txt";
            var samplesString = File.ReadLines(filename2);
            var samples       = new Dictionary <string, float[]>();

            foreach (var line in samplesString.Skip(1))
            {
                var parts        = line.Split(',');
                var sample       = parts[0];
                var featureIndex = int.Parse(parts[1]);
                var value        = float.Parse(parts[2]);
                if (!samples.ContainsKey(sample))
                {
                    samples.Add(sample, new float[1000]);
                }
                samples[sample][featureIndex] = value;
            }

            var       timer = Stopwatch.StartNew();
            const int probablity_feature_index = 840;
            int       i = 0;

            for (; i < 2; i++)
            {
                foreach (var sample in samples)
                {
                    var userFeatures = sample.Value.Take(639).ToArray();
                    var jobFeatures  = sample.Value.Skip(639).ToArray();
                    var actual       = model.EvaluateProbabilityFlatShrink(userFeatures, jobFeatures);
                    var expected     = sample.Value[probablity_feature_index];
                    Assert.InRange(actual, expected - 1e-06, expected + 1e-06);
                }
            }
            timer.Stop();
            output.WriteLine($"Time taken for {i*samples.Count} evaluations: {timer.Elapsed.TotalMilliseconds} ms");
        }
        public void TestXGBoostEvaluateTimingReordered()
        {
            var filename1      = @"../../../../datasets/xgboost/reorder.csv";
            var reorderMapping = File.ReadAllLines(filename1)
                                 .Select(m => short.Parse(m))
                                 .ToArray();

            var filename    = @"../../../../datasets/xgboost/model_xbg_trees.txt";
            var treesString = File.ReadAllText(filename);
            var model       = XGBoost.Create(treesString);

            model = FeatureReorderer.ReorderXGBoost(model, reorderMapping);

            var filename2     = @"../../../../datasets/xgboost/xgboost_test_cases_no_feature_names.txt";
            var samplesString = File.ReadLines(filename2);
            var samples       = new Dictionary <string, float[]>();

            foreach (var line in samplesString.Skip(1))
            {
                var parts        = line.Split(',');
                var sample       = parts[0];
                var featureIndex = int.Parse(parts[1]);
                if (featureIndex >= 0 && featureIndex < reorderMapping.Length)
                {
                    featureIndex = reorderMapping[featureIndex];
                }
                var value = float.Parse(parts[2]);
                if (!samples.ContainsKey(sample))
                {
                    samples.Add(sample, new float[1000]);
                }
                samples[sample][featureIndex] = value;
            }

            DoXGBoostEvaluateTimingFlat(model, samples);
        }