private KMeansDriverHandlers(
            [Parameter(typeof(NumPartitions))] int numPartitions,
            GroupCommDriver groupCommDriver,
            IEvaluatorRequestor evaluatorRequestor,
            CommandLineArguments arguments)
        {
            _executionDirectory = Path.Combine(Directory.GetCurrentDirectory(), Constants.KMeansExecutionBaseDirectory, Guid.NewGuid().ToString("N").Substring(0, 4));
            string dataFile = arguments.Arguments.First();

            DataVector.ShuffleDataAndGetInitialCentriods(
                dataFile,
                numPartitions,
                _clustersNumber,
                _executionDirectory);

            _totalEvaluators = numPartitions + 1;

            _groupCommDriver    = groupCommDriver;
            _evaluatorRequestor = evaluatorRequestor;

            _centroidCodecConf = CodecToStreamingCodecConfiguration <Centroids> .Conf
                                 .Set(CodecToStreamingCodecConfiguration <Centroids> .Codec, GenericType <CentroidsCodec> .Class)
                                 .Build();

            IConfiguration dataConverterConfig1 = PipelineDataConverterConfiguration <Centroids> .Conf
                                                  .Set(PipelineDataConverterConfiguration <Centroids> .DataConverter, GenericType <DefaultPipelineDataConverter <Centroids> > .Class)
                                                  .Build();

            _controlMessageCodecConf = CodecToStreamingCodecConfiguration <ControlMessage> .Conf
                                       .Set(CodecToStreamingCodecConfiguration <ControlMessage> .Codec, GenericType <ControlMessageCodec> .Class)
                                       .Build();

            IConfiguration dataConverterConfig2 = PipelineDataConverterConfiguration <ControlMessage> .Conf
                                                  .Set(PipelineDataConverterConfiguration <ControlMessage> .DataConverter, GenericType <DefaultPipelineDataConverter <ControlMessage> > .Class)
                                                  .Build();

            _processedResultsCodecConf = CodecToStreamingCodecConfiguration <ProcessedResults> .Conf
                                         .Set(CodecToStreamingCodecConfiguration <ProcessedResults> .Codec, GenericType <ProcessedResultsCodec> .Class)
                                         .Build();

            IConfiguration reduceFunctionConfig = ReduceFunctionConfiguration <ProcessedResults> .Conf
                                                  .Set(ReduceFunctionConfiguration <ProcessedResults> .ReduceFunction, GenericType <KMeansMasterTask.AggregateMeans> .Class)
                                                  .Build();

            IConfiguration dataConverterConfig3 = PipelineDataConverterConfiguration <ProcessedResults> .Conf
                                                  .Set(PipelineDataConverterConfiguration <ProcessedResults> .DataConverter, GenericType <DefaultPipelineDataConverter <ProcessedResults> > .Class)
                                                  .Build();

            _commGroup = _groupCommDriver.DefaultGroup
                         .AddBroadcast <Centroids>(Constants.CentroidsBroadcastOperatorName, Constants.MasterTaskId, TopologyTypes.Flat, dataConverterConfig1)
                         .AddBroadcast <ControlMessage>(Constants.ControlMessageBroadcastOperatorName, Constants.MasterTaskId, TopologyTypes.Flat, dataConverterConfig2)
                         .AddReduce <ProcessedResults>(Constants.MeansReduceOperatorName, Constants.MasterTaskId, TopologyTypes.Flat, reduceFunctionConfig, dataConverterConfig3)
                         .Build();

            _groupCommTaskStarter = new TaskStarter(_groupCommDriver, _totalEvaluators);
        }
Beispiel #2
0
        public KMeansDriverHandlers([Parameter(typeof(NumPartitions))] int numPartitions, GroupCommDriver groupCommDriver)
        {
            Identifier          = "KMeansDriverId";
            _executionDirectory = Path.Combine(Directory.GetCurrentDirectory(), Constants.KMeansExecutionBaseDirectory, Guid.NewGuid().ToString("N").Substring(0, 4));
            ISet <string> arguments = ClrHandlerHelper.GetCommandLineArguments();
            string        dataFile  = arguments.Single(a => a.StartsWith("DataFile", StringComparison.Ordinal)).Split(':')[1];

            DataVector.ShuffleDataAndGetInitialCentriods(
                Path.Combine(Directory.GetCurrentDirectory(), "reef", "global", dataFile),
                numPartitions,
                _clustersNumber,
                _executionDirectory);

            _totalEvaluators = numPartitions + 1;

            _groupCommDriver = groupCommDriver;

            _centroidCodecConf = CodecToStreamingCodecConfiguration <Centroids> .Conf
                                 .Set(CodecConfiguration <Centroids> .Codec, GenericType <CentroidsCodec> .Class)
                                 .Build();

            IConfiguration dataConverterConfig1 = PipelineDataConverterConfiguration <Centroids> .Conf
                                                  .Set(PipelineDataConverterConfiguration <Centroids> .DataConverter, GenericType <DefaultPipelineDataConverter <Centroids> > .Class)
                                                  .Build();

            _controlMessageCodecConf = CodecToStreamingCodecConfiguration <ControlMessage> .Conf
                                       .Set(CodecConfiguration <ControlMessage> .Codec, GenericType <ControlMessageCodec> .Class)
                                       .Build();

            IConfiguration dataConverterConfig2 = PipelineDataConverterConfiguration <ControlMessage> .Conf
                                                  .Set(PipelineDataConverterConfiguration <ControlMessage> .DataConverter, GenericType <DefaultPipelineDataConverter <ControlMessage> > .Class)
                                                  .Build();

            _processedResultsCodecConf = CodecToStreamingCodecConfiguration <ProcessedResults> .Conf
                                         .Set(CodecConfiguration <ProcessedResults> .Codec, GenericType <ProcessedResultsCodec> .Class)
                                         .Build();

            IConfiguration reduceFunctionConfig = ReduceFunctionConfiguration <ProcessedResults> .Conf
                                                  .Set(ReduceFunctionConfiguration <ProcessedResults> .ReduceFunction, GenericType <KMeansMasterTask.AggregateMeans> .Class)
                                                  .Build();

            IConfiguration dataConverterConfig3 = PipelineDataConverterConfiguration <ProcessedResults> .Conf
                                                  .Set(PipelineDataConverterConfiguration <ProcessedResults> .DataConverter, GenericType <DefaultPipelineDataConverter <ProcessedResults> > .Class)
                                                  .Build();

            _commGroup = _groupCommDriver.DefaultGroup
                         .AddBroadcast <Centroids>(Constants.CentroidsBroadcastOperatorName, Constants.MasterTaskId, TopologyTypes.Flat, dataConverterConfig1)
                         .AddBroadcast <ControlMessage>(Constants.ControlMessageBroadcastOperatorName, Constants.MasterTaskId, TopologyTypes.Flat, dataConverterConfig2)
                         .AddReduce <ProcessedResults>(Constants.MeansReduceOperatorName, Constants.MasterTaskId, TopologyTypes.Flat, reduceFunctionConfig, dataConverterConfig3)
                         .Build();

            _groupCommTaskStarter = new TaskStarter(_groupCommDriver, _totalEvaluators);

            CreateClassHierarchy();
        }