public void TestBucketizer_MultipleColumns()
        {
            var expectedSplitsArray = new double[][]
            {
                new[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue},
                new[] { double.MinValue, 0.0, 10000.0, double.MaxValue}
            };

            string expectedHandle = "keep";

            var expectedInputCols = new List<string>() { "input_col_a", "input_col_b" };
            var expectedOutputCols = new List<string>() { "output_col_a", "output_col_b" };

            var bucketizer = new Bucketizer();
            bucketizer.SetInputCols(expectedInputCols)
                .SetOutputCols(expectedOutputCols)
                .SetHandleInvalid(expectedHandle)
                .SetSplitsArray(expectedSplitsArray);

            Assert.Equal(expectedHandle, bucketizer.GetHandleInvalid());

            DataFrame input =
                _spark.Sql("SELECT ID as input_col_a, ID as input_col_b from range(100)");

            DataFrame output = bucketizer.Transform(input);
            Assert.Contains(output.Schema().Fields, (f => f.Name == "output_col_a"));
            Assert.Contains(output.Schema().Fields, (f => f.Name == "output_col_b"));

            Assert.Equal(expectedInputCols, bucketizer.GetInputCols());
            Assert.Equal(expectedOutputCols, bucketizer.GetOutputCols());
            Assert.Equal(expectedSplitsArray, bucketizer.GetSplitsArray());
        }
Example #2
0
        public void TestBucketizer()
        {
            var expectedSplits = new double[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue };

            string expectedHandle    = "skip";
            string expectedUid       = "uid";
            string expectedInputCol  = "input_col";
            string expectedOutputCol = "output_col";

            var bucketizer = new Bucketizer(expectedUid);

            bucketizer.SetInputCol(expectedInputCol)
            .SetOutputCol(expectedOutputCol)
            .SetHandleInvalid(expectedHandle)
            .SetSplits(expectedSplits);

            Assert.Equal(expectedHandle, bucketizer.GetHandleInvalid());

            Assert.Equal(expectedUid, bucketizer.Uid());

            DataFrame input = _spark.Sql("SELECT ID as input_col from range(100)");

            DataFrame output = bucketizer.Transform(input);

            Assert.Contains(output.Schema().Fields, (f => f.Name == expectedOutputCol));

            Assert.Equal(expectedInputCol, bucketizer.GetInputCol());
            Assert.Equal(expectedOutputCol, bucketizer.GetOutputCol());
            Assert.Equal(expectedSplits, bucketizer.GetSplits());
        }
Example #3
0
        public IEnumerable<Bucket<Game>> Compute(IEnumerable<Game> games)
        {
            var bucketizer = new Bucketizer<Game>(games);

            TrendRules.Keys.ToList().ForEach(ruleLabel => bucketizer.AddRule(ruleLabel, TrendRules[ruleLabel]));

            return bucketizer.Bucketify();
        }
Example #4
0
        public void TestBucketizer()
        {
            var expectedSplits = new double[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue };

            string expectedHandle    = "skip";
            string expectedUid       = "uid";
            string expectedInputCol  = "input_col";
            string expectedOutputCol = "output_col";

            var bucketizer = new Bucketizer(expectedUid);

            bucketizer.SetInputCol(expectedInputCol)
            .SetOutputCol(expectedOutputCol)
            .SetHandleInvalid(expectedHandle)
            .SetSplits(expectedSplits);

            Assert.Equal(expectedHandle, bucketizer.GetHandleInvalid());

            Assert.Equal(expectedUid, bucketizer.Uid());

            DataFrame input = _spark.Sql("SELECT ID as input_col from range(100)");

            DataFrame output = bucketizer.Transform(input);

            Assert.Contains(output.Schema().Fields, (f => f.Name == expectedOutputCol));

            Assert.Equal(expectedInputCol, bucketizer.GetInputCol());
            Assert.Equal(expectedOutputCol, bucketizer.GetOutputCol());
            Assert.Equal(expectedSplits, bucketizer.GetSplits());

            using (var tempDirectory = new TemporaryDirectory())
            {
                string savePath = Path.Join(tempDirectory.Path, "bucket");
                bucketizer.Save(savePath);

                Bucketizer loadedBucketizer = Bucketizer.Load(savePath);
                Assert.Equal(bucketizer.Uid(), loadedBucketizer.Uid());
            }

            Assert.NotEmpty(bucketizer.ExplainParams());

            Param handleInvalidParam = bucketizer.GetParam("handleInvalid");

            Assert.NotEmpty(handleInvalidParam.Doc);
            Assert.NotEmpty(handleInvalidParam.Name);
            Assert.Equal(handleInvalidParam.Parent, bucketizer.Uid());

            Assert.NotEmpty(bucketizer.ExplainParam(handleInvalidParam));
            bucketizer.Set(handleInvalidParam, "keep");
            Assert.Equal("keep", bucketizer.GetHandleInvalid());

            Assert.Equal("error", bucketizer.Clear(handleInvalidParam).GetHandleInvalid());
        }
Example #5
0
        public void TestPipelineModelTransform()
        {
            var expectedSplits =
                new double[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue };

            string expectedHandle    = "skip";
            string expectedUid       = "uid";
            string expectedInputCol  = "input_col";
            string expectedOutputCol = "output_col";

            var bucketizer = new Bucketizer(expectedUid);

            bucketizer.SetInputCol(expectedInputCol)
            .SetOutputCol(expectedOutputCol)
            .SetHandleInvalid(expectedHandle)
            .SetSplits(expectedSplits);

            var stages = new JavaTransformer[] {
                bucketizer
            };

            PipelineModel pipelineModel = new PipelineModel("randomUID", stages);

            DataFrame input = _spark.Sql("SELECT ID as input_col from range(100)");

            DataFrame output = pipelineModel.Transform(input);

            Assert.Contains(output.Schema().Fields, (f => f.Name == expectedOutputCol));

            Assert.Equal(expectedInputCol, bucketizer.GetInputCol());
            Assert.Equal(expectedOutputCol, bucketizer.GetOutputCol());
            Assert.Equal(expectedSplits, bucketizer.GetSplits());

            Assert.IsType <StructType>(pipelineModel.TransformSchema(input.Schema()));
            Assert.IsType <DataFrame>(output);

            using (var tempDirectory = new TemporaryDirectory())
            {
                string savePath = Path.Join(tempDirectory.Path, "pipelineModel");
                pipelineModel.Save(savePath);

                PipelineModel loadedPipelineModel = PipelineModel.Load(savePath);
                Assert.Equal(pipelineModel.Uid(), loadedPipelineModel.Uid());

                string writePath = Path.Join(tempDirectory.Path, "pipelineModelWithWrite");
                pipelineModel.Write().Save(writePath);

                PipelineModel loadedPipelineModelWithRead = pipelineModel.Read().Load(writePath);
                Assert.Equal(pipelineModel.Uid(), loadedPipelineModelWithRead.Uid());
            }
        }
        static void Main(string[] args)
        {
            var bucketizer = new Bucketizer();

            bucketizer.SetInputCol("input_column");
            bucketizer.Save("/tmp/bucketizer");

            bucketizer.SetInputCol("something_else");

            var loaded = Bucketizer.Load("/tmp/bucketizer");

            Console.WriteLine(bucketizer.GetInputCol());
            Console.WriteLine(loaded.GetInputCol());
        }
Example #7
0
        public void SetUp()
        {
            bucketContents = new[]
                                 {
                                     new SampleClassForTest {PredicateProperty = 0},
                                     new SampleClassForTest {PredicateProperty = 1},
                                     new SampleClassForTest {PredicateProperty = 2},
                                     new SampleClassForTest {PredicateProperty = 3},
                                     new SampleClassForTest {PredicateProperty = 4},
                                     new SampleClassForTest {PredicateProperty = 5}
                                 };

            bucketizer = new Bucketizer<SampleClassForTest>(bucketContents);
        }
Example #8
0
        static void Main(string[] args)
        {
            if (!Parser.ParseArgumentsWithUsage(args, _cmdLine))
            {
                return;
            }

            _pockets = new List <CardSet>();
            for (int i = 0; i < _cmdLine.pockets.Length / 4; ++i)
            {
                String pocket = _cmdLine.pockets.Substring(i * 4, 2) + " " +
                                _cmdLine.pockets.Substring(i * 4 + 2, 2);
                _pockets.Add(StdDeck.Descriptor.GetCardSet(pocket));
            }

            if (_cmdLine.bucketizer != "")
            {
                _bucketizer = XmlSerializerExt.Deserialize <Bucketizer>(_cmdLine.bucketizer);
            }

            if (!String.IsNullOrEmpty(_cmdLine.gameDef))
            {
                XmlSerializerExt.Deserialize(out _gameDef, _cmdLine.gameDef);
                _boardSize = 0;
                for (int r = 0; r < _gameDef.RoundsCount; ++r)
                {
                    _boardSize += _gameDef.SharedCardsCount[r];
                }
            }

            if (!String.IsNullOrEmpty(_cmdLine.oppActionTreeFile) && File.Exists(_cmdLine.oppActionTreeFile))
            {
                Console.WriteLine("Reading opponent action tree from: {0} ...", _cmdLine.oppActionTreeFile);
                XmlSerializerExt.Deserialize(out _oppActionTree, _cmdLine.oppActionTreeFile);
            }

            if (_cmdLine.neytiri != "")
            {
                Console.WriteLine("Reading Neytiri strategy from {0} ...", _cmdLine.neytiri);
                XmlSerializerExt.Deserialize(out _neytiri, _cmdLine.neytiri);
            }

            if (_cmdLine.processLogs)
            {
                UpdateActionTree();
            }

            if (_cmdLine.monteCarlo)
            {
                MonteCarlo();
            }

            if (_cmdLine.showOppActionTree)
            {
                showOppActionTree();
            }

            if (_cmdLine.printNeytiryPf)
            {
                PrintNeytiryPreflop();
            }

            if (_cmdLine.dumpNode)
            {
                DumpNode();
            }
        }