public void TestBucketizer_MultipleColumns() { var expectedSplitsArray = new double[][] { new[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue}, new[] { double.MinValue, 0.0, 10000.0, double.MaxValue} }; string expectedHandle = "keep"; var expectedInputCols = new List<string>() { "input_col_a", "input_col_b" }; var expectedOutputCols = new List<string>() { "output_col_a", "output_col_b" }; var bucketizer = new Bucketizer(); bucketizer.SetInputCols(expectedInputCols) .SetOutputCols(expectedOutputCols) .SetHandleInvalid(expectedHandle) .SetSplitsArray(expectedSplitsArray); Assert.Equal(expectedHandle, bucketizer.GetHandleInvalid()); DataFrame input = _spark.Sql("SELECT ID as input_col_a, ID as input_col_b from range(100)"); DataFrame output = bucketizer.Transform(input); Assert.Contains(output.Schema().Fields, (f => f.Name == "output_col_a")); Assert.Contains(output.Schema().Fields, (f => f.Name == "output_col_b")); Assert.Equal(expectedInputCols, bucketizer.GetInputCols()); Assert.Equal(expectedOutputCols, bucketizer.GetOutputCols()); Assert.Equal(expectedSplitsArray, bucketizer.GetSplitsArray()); }
public void TestBucketizer() { var expectedSplits = new double[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue }; string expectedHandle = "skip"; string expectedUid = "uid"; string expectedInputCol = "input_col"; string expectedOutputCol = "output_col"; var bucketizer = new Bucketizer(expectedUid); bucketizer.SetInputCol(expectedInputCol) .SetOutputCol(expectedOutputCol) .SetHandleInvalid(expectedHandle) .SetSplits(expectedSplits); Assert.Equal(expectedHandle, bucketizer.GetHandleInvalid()); Assert.Equal(expectedUid, bucketizer.Uid()); DataFrame input = _spark.Sql("SELECT ID as input_col from range(100)"); DataFrame output = bucketizer.Transform(input); Assert.Contains(output.Schema().Fields, (f => f.Name == expectedOutputCol)); Assert.Equal(expectedInputCol, bucketizer.GetInputCol()); Assert.Equal(expectedOutputCol, bucketizer.GetOutputCol()); Assert.Equal(expectedSplits, bucketizer.GetSplits()); }
public IEnumerable<Bucket<Game>> Compute(IEnumerable<Game> games) { var bucketizer = new Bucketizer<Game>(games); TrendRules.Keys.ToList().ForEach(ruleLabel => bucketizer.AddRule(ruleLabel, TrendRules[ruleLabel])); return bucketizer.Bucketify(); }
public void TestBucketizer() { var expectedSplits = new double[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue }; string expectedHandle = "skip"; string expectedUid = "uid"; string expectedInputCol = "input_col"; string expectedOutputCol = "output_col"; var bucketizer = new Bucketizer(expectedUid); bucketizer.SetInputCol(expectedInputCol) .SetOutputCol(expectedOutputCol) .SetHandleInvalid(expectedHandle) .SetSplits(expectedSplits); Assert.Equal(expectedHandle, bucketizer.GetHandleInvalid()); Assert.Equal(expectedUid, bucketizer.Uid()); DataFrame input = _spark.Sql("SELECT ID as input_col from range(100)"); DataFrame output = bucketizer.Transform(input); Assert.Contains(output.Schema().Fields, (f => f.Name == expectedOutputCol)); Assert.Equal(expectedInputCol, bucketizer.GetInputCol()); Assert.Equal(expectedOutputCol, bucketizer.GetOutputCol()); Assert.Equal(expectedSplits, bucketizer.GetSplits()); using (var tempDirectory = new TemporaryDirectory()) { string savePath = Path.Join(tempDirectory.Path, "bucket"); bucketizer.Save(savePath); Bucketizer loadedBucketizer = Bucketizer.Load(savePath); Assert.Equal(bucketizer.Uid(), loadedBucketizer.Uid()); } Assert.NotEmpty(bucketizer.ExplainParams()); Param handleInvalidParam = bucketizer.GetParam("handleInvalid"); Assert.NotEmpty(handleInvalidParam.Doc); Assert.NotEmpty(handleInvalidParam.Name); Assert.Equal(handleInvalidParam.Parent, bucketizer.Uid()); Assert.NotEmpty(bucketizer.ExplainParam(handleInvalidParam)); bucketizer.Set(handleInvalidParam, "keep"); Assert.Equal("keep", bucketizer.GetHandleInvalid()); Assert.Equal("error", bucketizer.Clear(handleInvalidParam).GetHandleInvalid()); }
public void TestPipelineModelTransform() { var expectedSplits = new double[] { double.MinValue, 0.0, 10.0, 50.0, double.MaxValue }; string expectedHandle = "skip"; string expectedUid = "uid"; string expectedInputCol = "input_col"; string expectedOutputCol = "output_col"; var bucketizer = new Bucketizer(expectedUid); bucketizer.SetInputCol(expectedInputCol) .SetOutputCol(expectedOutputCol) .SetHandleInvalid(expectedHandle) .SetSplits(expectedSplits); var stages = new JavaTransformer[] { bucketizer }; PipelineModel pipelineModel = new PipelineModel("randomUID", stages); DataFrame input = _spark.Sql("SELECT ID as input_col from range(100)"); DataFrame output = pipelineModel.Transform(input); Assert.Contains(output.Schema().Fields, (f => f.Name == expectedOutputCol)); Assert.Equal(expectedInputCol, bucketizer.GetInputCol()); Assert.Equal(expectedOutputCol, bucketizer.GetOutputCol()); Assert.Equal(expectedSplits, bucketizer.GetSplits()); Assert.IsType <StructType>(pipelineModel.TransformSchema(input.Schema())); Assert.IsType <DataFrame>(output); using (var tempDirectory = new TemporaryDirectory()) { string savePath = Path.Join(tempDirectory.Path, "pipelineModel"); pipelineModel.Save(savePath); PipelineModel loadedPipelineModel = PipelineModel.Load(savePath); Assert.Equal(pipelineModel.Uid(), loadedPipelineModel.Uid()); string writePath = Path.Join(tempDirectory.Path, "pipelineModelWithWrite"); pipelineModel.Write().Save(writePath); PipelineModel loadedPipelineModelWithRead = pipelineModel.Read().Load(writePath); Assert.Equal(pipelineModel.Uid(), loadedPipelineModelWithRead.Uid()); } }
static void Main(string[] args) { var bucketizer = new Bucketizer(); bucketizer.SetInputCol("input_column"); bucketizer.Save("/tmp/bucketizer"); bucketizer.SetInputCol("something_else"); var loaded = Bucketizer.Load("/tmp/bucketizer"); Console.WriteLine(bucketizer.GetInputCol()); Console.WriteLine(loaded.GetInputCol()); }
public void SetUp() { bucketContents = new[] { new SampleClassForTest {PredicateProperty = 0}, new SampleClassForTest {PredicateProperty = 1}, new SampleClassForTest {PredicateProperty = 2}, new SampleClassForTest {PredicateProperty = 3}, new SampleClassForTest {PredicateProperty = 4}, new SampleClassForTest {PredicateProperty = 5} }; bucketizer = new Bucketizer<SampleClassForTest>(bucketContents); }
static void Main(string[] args) { if (!Parser.ParseArgumentsWithUsage(args, _cmdLine)) { return; } _pockets = new List <CardSet>(); for (int i = 0; i < _cmdLine.pockets.Length / 4; ++i) { String pocket = _cmdLine.pockets.Substring(i * 4, 2) + " " + _cmdLine.pockets.Substring(i * 4 + 2, 2); _pockets.Add(StdDeck.Descriptor.GetCardSet(pocket)); } if (_cmdLine.bucketizer != "") { _bucketizer = XmlSerializerExt.Deserialize <Bucketizer>(_cmdLine.bucketizer); } if (!String.IsNullOrEmpty(_cmdLine.gameDef)) { XmlSerializerExt.Deserialize(out _gameDef, _cmdLine.gameDef); _boardSize = 0; for (int r = 0; r < _gameDef.RoundsCount; ++r) { _boardSize += _gameDef.SharedCardsCount[r]; } } if (!String.IsNullOrEmpty(_cmdLine.oppActionTreeFile) && File.Exists(_cmdLine.oppActionTreeFile)) { Console.WriteLine("Reading opponent action tree from: {0} ...", _cmdLine.oppActionTreeFile); XmlSerializerExt.Deserialize(out _oppActionTree, _cmdLine.oppActionTreeFile); } if (_cmdLine.neytiri != "") { Console.WriteLine("Reading Neytiri strategy from {0} ...", _cmdLine.neytiri); XmlSerializerExt.Deserialize(out _neytiri, _cmdLine.neytiri); } if (_cmdLine.processLogs) { UpdateActionTree(); } if (_cmdLine.monteCarlo) { MonteCarlo(); } if (_cmdLine.showOppActionTree) { showOppActionTree(); } if (_cmdLine.printNeytiryPf) { PrintNeytiryPreflop(); } if (_cmdLine.dumpNode) { DumpNode(); } }