Esempio n. 1
0
        public Score Score(RecordSet data, bool parallel = true)
        {
            //NOTE: ignore parallel parameter

            double[] sScores = Yarr.Repeat(double.NaN, data.NRows);
            double[] bScores = Yarr.Repeat(double.NaN, data.NRows);

            bool[] filter       = Yarr.InlineNot(data.HasNaN(this.TargetFeatures));
            var    filteredData = data.Filter(filter);

            data = null;             // unlikely to let anything be GC'ed (lots of references to same obj) but it can't hurt

            this._Score(
                filteredData,
                Yarr.Range(filteredData.NRows).MakeSlice(),
                sScores,
                bScores
                );
            return(new Score(sScores, bScores));
        }
Esempio n. 2
0
        private static void MainMain()
        {
            Write("Running Random Forest ({0} trees)", NUM_MODELS);

            Write("loading training data");
            var traindata   = Parser.LoadTrainData();
            var featureCols = CsvRecord.FEATURE_COLS;

            int[] colIndices = Yarr.Range(featureCols.Count);
            WriteDone();

            Write("creating random forest");
            var treeCreator             = new TreeCreator(traindata, colIndices, COLS_PER_MODEL);
            var trees                   = treeCreator.MakeTreesParallel(NUM_MODELS);
            ScoreAverager <Tree> forest = new ScoreAverager <Tree>(trees);

            WriteDone();
            Console.WriteLine(string.Format("\t\tcreated {0} trees", trees.Count));

            Write("creating and tuning classifier (parallel2)");
            PlayDingSound();
            double bestCutoff   = double.NaN;
            double bestExponent = double.NaN;
            double bestScore    = double.NegativeInfinity;
            var    classifier   = new Classifier(new ScoreCacher(forest));

            foreach (double exponent in Yarr.XRange(-0.5, 0.6, 0.1))
            {
                double cutoff = Math.Exp(exponent);
                classifier.Cutoff = cutoff;

                double score = AMS(classifier.Classify(traindata, parallel: PARALLEL), traindata);

                if (score > bestScore)
                {
                    bestScore    = score;
                    bestCutoff   = cutoff;
                    bestExponent = exponent;
                }
            }
            classifier        = new Classifier(forest);
            classifier.Cutoff = bestCutoff;
            WriteDone();
            Console.WriteLine(string.Format("\t\tpredicted ams: {0}", bestScore));
            Console.WriteLine(string.Format("\t\tcutoff: {0} (e^{1})", bestCutoff, bestExponent));

            if (bestScore < 3.5)
            {
                WriteDone();
                PlayFailSound();
                return;
            }

            Write("loading test data");
            var testdata = Parser.LoadTestData();

            WriteDone();

            Write("scoring test data");
            var predictions = classifier.Classify(testdata, parallel: PARALLEL);
            var confidences = Yarr.Range(1, testdata.NRows + 1);

            WriteDone();

            Write("writing output");
            Parser.WritePredictions(testdata.EventIds, predictions, confidences);
            WriteDone();

            WriteDone();             //whole-method timer
            PlayWinSound();
        }