private async void StartMergeAndTransform() { var shuffleTask = (ShuffleTask)_task; BinaryOperatorTypes mergeTypes = new BinaryOperatorTypes(); mergeTypes.FromString(shuffleTask.ShuffleTransformsTypes[1]); MethodInfo method = typeof(MoveUtils).GetMethod("ApplyMerger"); MethodInfo generic = method.MakeGenericMethod( new Type[] { mergeTypes.SecondaryKeyType, mergeTypes.SecondaryPayloadType, mergeTypes.SecondaryDatasetType, mergeTypes.OutputKeyType, mergeTypes.OutputPayloadType, mergeTypes.OutputDatasetType }); object[] inputSplitDatasets = new object[_inputs.Length]; for (int i = 0; i < _inputs.Length; i++) { int splitIndex = Convert.ToInt32(_inputEndpointOperatorIndex[i].Substring(_inputEndpointOperatorIndex[i].Length - 1)); inputSplitDatasets[splitIndex] = CreateDatasetFromInput(_inputEndpointOperatorIndex[i], mergeTypes.SecondaryKeyType, mergeTypes.SecondaryPayloadType, mergeTypes.SecondaryDatasetType); } object[] arguments = new Object[] { inputSplitDatasets, shuffleTask.ShuffleDescriptor, shuffleTask.ShuffleTransforms[1] }; _cachedDatasets[shuffleTask.OutputId] = generic.Invoke(this, arguments); _outputKeyType = mergeTypes.OutputKeyType; _outputPayloadType = mergeTypes.OutputPayloadType; _outputDatasetType = mergeTypes.OutputDatasetType; _outputId = shuffleTask.OutputId; if (_task.Transforms != null) { for (int i = 0; i < _task.Transforms.Length; i++) { object dataset1 = null; string dataset1Id = null; object dataset2 = null; string dataset2Id = null; TransformUtils.PrepareTransformInputs(_task.TransformsInputs[i], ref dataset1, ref dataset1Id, ref dataset2, ref dataset2Id, _cachedDatasets); string transformType = _task.TransformsOperations[i]; object transformOutput = null; if (transformType == OperatorType.UnaryTransform.ToString()) { UnaryOperatorTypes unaryTransformTypes = new UnaryOperatorTypes(); unaryTransformTypes.FromString(_task.TransformsTypes[i]); if (dataset1Id == "$" && dataset1 == null) { throw new InvalidOperationException(); } method = typeof(TransformUtils).GetMethod("ApplyUnaryTransformer"); generic = method.MakeGenericMethod( new Type[] { unaryTransformTypes.InputKeyType, unaryTransformTypes.InputPayloadType, unaryTransformTypes.InputDatasetType, unaryTransformTypes.OutputKeyType, unaryTransformTypes.OutputPayloadType, unaryTransformTypes.OutputDatasetType }); arguments = new Object[] { dataset1, _task.Transforms[i] }; _outputKeyType = unaryTransformTypes.OutputKeyType; _outputPayloadType = unaryTransformTypes.OutputPayloadType; _outputDatasetType = unaryTransformTypes.OutputDatasetType; } else if (transformType == OperatorType.BinaryTransform.ToString()) { BinaryOperatorTypes binaryTransformTypes = new BinaryOperatorTypes(); binaryTransformTypes.FromString(_task.TransformsTypes[i]); if (dataset1Id == "$" && dataset1 == null) { throw new InvalidOperationException(); } if (dataset2Id == "$" && dataset2 == null) { dataset2Id = _task.TransformsInputs[i].InputId2; dataset2 = CreateDatasetFromSecondaryInput(dataset2Id, binaryTransformTypes.SecondaryKeyType, binaryTransformTypes.SecondaryPayloadType, binaryTransformTypes.SecondaryDatasetType); if (!_cachedDatasets.ContainsKey(dataset2Id)) { _cachedDatasets.Add(dataset2Id, dataset2); } else { _cachedDatasets[dataset2Id] = dataset2; } } method = typeof(TransformUtils).GetMethod("ApplyBinaryTransformer"); generic = method.MakeGenericMethod( new Type[] { binaryTransformTypes.InputKeyType, binaryTransformTypes.InputPayloadType, binaryTransformTypes.InputDatasetType, binaryTransformTypes.SecondaryKeyType, binaryTransformTypes.SecondaryPayloadType, binaryTransformTypes.SecondaryDatasetType, binaryTransformTypes.OutputKeyType, binaryTransformTypes.OutputPayloadType, binaryTransformTypes.OutputDatasetType }); arguments = new Object[] { dataset1, dataset2, _task.Transforms[i] }; _outputKeyType = binaryTransformTypes.OutputKeyType; _outputPayloadType = binaryTransformTypes.OutputPayloadType; _outputDatasetType = binaryTransformTypes.OutputDatasetType; } else if (transformType == OperatorType.MoveSplit.ToString()) { BinaryOperatorTypes splitTypes = new BinaryOperatorTypes(); splitTypes.FromString(_task.TransformsTypes[i]); if (dataset1Id == "$" && dataset1 == null) { throw new InvalidOperationException(); } method = typeof(MoveUtils).GetMethod("ApplySplitter"); generic = method.MakeGenericMethod( new Type[] { splitTypes.InputKeyType, splitTypes.InputPayloadType, splitTypes.InputDatasetType, splitTypes.SecondaryKeyType, splitTypes.SecondaryPayloadType, splitTypes.SecondaryDatasetType }); arguments = new Object[] { dataset1, _task.SecondaryShuffleDescriptor, _task.Transforms[i] }; _outputKeyType = splitTypes.SecondaryKeyType; _outputPayloadType = splitTypes.SecondaryPayloadType; _outputDatasetType = splitTypes.SecondaryDatasetType; } else { throw new InvalidOperationException("Error: Unsupported transformation type"); } if (method != null && generic != null && arguments != null) { transformOutput = generic.Invoke(this, arguments); } if (transformOutput != null) { if (!_cachedDatasets.ContainsKey(dataset1Id)) { _cachedDatasets.Add(dataset1Id, transformOutput); } else { _cachedDatasets[dataset1Id] = transformOutput; } } _outputId = dataset1Id; } } await Task.Run(() => ApplyProducer()); _isProduceAllowedToApply = false; _firstProduceTrigger.Set(); }
public void CreateAndTransformDataset(int shardId) { var produceTask = (ProduceTask)_task; MethodInfo method = typeof(ShardedProducerOperator).GetMethod("CreateDatasetFromExpression"); MethodInfo generic = method.MakeGenericMethod( new Type[] { produceTask.OperationTypes.OutputKeyType, produceTask.OperationTypes.OutputPayloadType, produceTask.OperationTypes.OutputDatasetType }); object[] arguments = new Object[] { shardId, produceTask.DataProducer }; _cachedDatasets[shardId][produceTask.OutputId] = generic.Invoke(this, arguments); _outputKeyType = produceTask.OperationTypes.OutputKeyType; _outputPayloadType = produceTask.OperationTypes.OutputPayloadType; _outputDatasetType = produceTask.OperationTypes.OutputDatasetType; _outputId = produceTask.OutputId; if (_task.Transforms != null) { for (int i = 0; i < _task.Transforms.Length; i++) { object dataset1 = null; string dataset1Id = null; object dataset2 = null; string dataset2Id = null; TransformUtils.PrepareTransformInputs(_task.TransformsInputs[i], ref dataset1, ref dataset1Id, ref dataset2, ref dataset2Id, _cachedDatasets[shardId]); string transformType = _task.TransformsOperations[i]; object transformOutput = null; if (transformType == OperatorType.UnaryTransform.ToString()) { UnaryOperatorTypes unaryTransformTypes = new UnaryOperatorTypes(); unaryTransformTypes.FromString(_task.TransformsTypes[i]); if (dataset1Id == "$" && dataset1 == null) { throw new InvalidOperationException(); } method = typeof(TransformUtils).GetMethod("ApplyUnaryTransformer"); generic = method.MakeGenericMethod( new Type[] { unaryTransformTypes.InputKeyType, unaryTransformTypes.InputPayloadType, unaryTransformTypes.InputDatasetType, unaryTransformTypes.OutputKeyType, unaryTransformTypes.OutputPayloadType, unaryTransformTypes.OutputDatasetType }); arguments = new Object[] { dataset1, _task.Transforms[i] }; _outputKeyType = unaryTransformTypes.OutputKeyType; _outputPayloadType = unaryTransformTypes.OutputPayloadType; _outputDatasetType = unaryTransformTypes.OutputDatasetType; } else if (transformType == OperatorType.BinaryTransform.ToString()) { BinaryOperatorTypes binaryTransformTypes = new BinaryOperatorTypes(); binaryTransformTypes.FromString(_task.TransformsTypes[i]); if (dataset1Id == "$" && dataset1 == null) { throw new InvalidOperationException(); } if (dataset2Id == "$" && dataset2 == null) { dataset2Id = _task.TransformsInputs[i].InputId2; _binaryOperatorTypes[dataset2Id] = binaryTransformTypes; _startCreatingSecondaryDatasets[dataset2Id].Signal(); _finishCreatingSecondaryDatasets[dataset2Id].Wait(); dataset2 = _cachedDatasets[shardId][dataset2Id]; } method = typeof(TransformUtils).GetMethod("ApplyBinaryTransformer"); generic = method.MakeGenericMethod( new Type[] { binaryTransformTypes.InputKeyType, binaryTransformTypes.InputPayloadType, binaryTransformTypes.InputDatasetType, binaryTransformTypes.SecondaryKeyType, binaryTransformTypes.SecondaryPayloadType, binaryTransformTypes.SecondaryDatasetType, binaryTransformTypes.OutputKeyType, binaryTransformTypes.OutputPayloadType, binaryTransformTypes.OutputDatasetType }); arguments = new Object[] { dataset1, dataset2, _task.Transforms[i] }; _outputKeyType = binaryTransformTypes.OutputKeyType; _outputPayloadType = binaryTransformTypes.OutputPayloadType; _outputDatasetType = binaryTransformTypes.OutputDatasetType; } else if (transformType == OperatorType.MoveSplit.ToString()) { BinaryOperatorTypes splitTypes = new BinaryOperatorTypes(); splitTypes.FromString(_task.TransformsTypes[i]); if (dataset1Id == "$" && dataset1 == null) { throw new InvalidOperationException(); } method = typeof(MoveUtils).GetMethod("ApplySplitter"); generic = method.MakeGenericMethod( new Type[] { splitTypes.InputKeyType, splitTypes.InputPayloadType, splitTypes.InputDatasetType, splitTypes.SecondaryKeyType, splitTypes.SecondaryPayloadType, splitTypes.SecondaryDatasetType }); arguments = new Object[] { dataset1, _task.SecondaryShuffleDescriptor, _task.Transforms[i] }; _outputKeyType = splitTypes.SecondaryKeyType; _outputPayloadType = splitTypes.SecondaryPayloadType; _outputDatasetType = splitTypes.SecondaryDatasetType; } else { throw new InvalidOperationException("Error: Unsupported transformation type"); } transformOutput = generic.Invoke(this, arguments); if (transformOutput != null) { if (!_cachedDatasets[shardId].ContainsKey(dataset1Id)) { _cachedDatasets[shardId].Add(dataset1Id, transformOutput); } else { _cachedDatasets[shardId][dataset1Id] = transformOutput; } } _outputId = dataset1Id; } } }
private void DeployMove(ref TaskBase task, ref OperatorsToplogy topology) { var isRightOperandInput = task.IsRightOperandInput; OperatorInputs temporaryInputs = new OperatorInputs(); TaskBase shuffleTask = new ShuffleTask(_moveDescriptor); shuffleTask.OperationTypes = TransformUtils.FillBinaryTransformTypes( typeof(TKeyI1), typeof(TPayloadI1), typeof(TDataSetI1), typeof(TKeyI2), typeof(TPayloadI2), typeof(TDataSetI2), typeof(TKeyO), typeof(TPayloadO), typeof(TDataSetO)); shuffleTask.IsRightOperandInput = false; OperatorTransforms shuffleInputTransforms = new OperatorTransforms(); (_input1 as IDeployable).Deploy(ref shuffleTask, ref topology, ref shuffleInputTransforms); shuffleTask.PrepareTaskTransformations(shuffleInputTransforms); (shuffleTask as ShuffleTask).MapperVertexName = "shufflemapper" + Guid.NewGuid().ToString(); (shuffleTask as ShuffleTask).ReducerVertexName = typeof(ShuffleOperator).Name.ToLower() + Guid.NewGuid().ToString(); shuffleTask.InputIds.SetInputId1(shuffleTask.NextInputIds.InputId1); shuffleTask.InputIds.SetInputId2(shuffleTask.NextInputIds.InputId2); shuffleTask.OutputId = (shuffleTask as ShuffleTask).ReducerVertexName; OperatorTransforms shuffleTransforms = new OperatorTransforms(); shuffleTransforms.AddTransform(SerializationHelper.Serialize(_splitter), OperatorType.MoveSplit.ToString(), TransformUtils.FillBinaryTransformTypes(typeof(TKeyI1), typeof(TPayloadI1), typeof(TDataSetI1), typeof(TKeyI2), typeof(TPayloadI2), typeof(TDataSetI2), typeof(TKeyO), typeof(TPayloadO), typeof(TDataSetO)).ToString(), shuffleTask.InputIds); shuffleTransforms.AddTransform(SerializationHelper.Serialize(_merger), OperatorType.MoveMerge.ToString(), TransformUtils.FillBinaryTransformTypes(typeof(TKeyI1), typeof(TPayloadI1), typeof(TDataSetI1), typeof(TKeyI2), typeof(TPayloadI2), typeof(TDataSetI2), typeof(TKeyO), typeof(TPayloadO), typeof(TDataSetO)).ToString(), shuffleTask.InputIds); ((ShuffleTask)shuffleTask).PrepareShuffleTransformations(shuffleTransforms); topology.AddShuffleOperator((shuffleTask as ShuffleTask).MapperVertexName, (shuffleTask as ShuffleTask).ReducerVertexName, shuffleTask as ShuffleTask); topology.AddOperatorInput((shuffleTask as ShuffleTask).MapperVertexName, shuffleTask.InputIds.InputId1); topology.AddOperatorSecondaryInput((shuffleTask as ShuffleTask).MapperVertexName, shuffleTask.InputIds.InputId2); topology.AddOperatorOutput(shuffleTask.InputIds.InputId1, (shuffleTask as ShuffleTask).MapperVertexName); topology.AddOperatorOutput(shuffleTask.InputIds.InputId2, (shuffleTask as ShuffleTask).MapperVertexName); if (shuffleTask.Transforms != null) { foreach (OperatorInputs inputs in shuffleTask.TransformsInputs) { topology.AddOperatorInput((shuffleTask as ShuffleTask).MapperVertexName, inputs.InputId1); topology.AddOperatorSecondaryInput((shuffleTask as ShuffleTask).MapperVertexName, inputs.InputId2); topology.AddOperatorOutput(inputs.InputId1, (shuffleTask as ShuffleTask).MapperVertexName); topology.AddOperatorOutput(inputs.InputId2, (shuffleTask as ShuffleTask).MapperVertexName); } } // Update the inputs and types for the next operation task.InputIds.SetInputId1(shuffleTask.OutputId); task.OperationTypes.SetInputKeyType(typeof(TKeyO)); task.OperationTypes.SetInputPayloadType(typeof(TPayloadO)); task.OperationTypes.SetInputDatasetType(typeof(TDataSetO)); if (isRightOperandInput) { temporaryInputs.InputId2 = shuffleTask.OutputId; } else { temporaryInputs.InputId1 = shuffleTask.OutputId; } task.NextInputIds = temporaryInputs; }
public override async Task <IShardedDataset <TKey, TPayload, TDataset> > Deploy() { if (!_isDeployed) { OperatorsToplogy topology = OperatorsToplogy.GetInstance(); TaskBase subscribeTask = new SubscribeTask(); subscribeTask.OperationTypes = TransformUtils.FillBinaryTransformTypes( typeof(TKey), typeof(TPayload), typeof(TDataset), typeof(TKey), typeof(TPayload), typeof(TDataset), typeof(TKey), typeof(TPayload), typeof(TDataset)); subscribeTask.IsRightOperandInput = false; OperatorTransforms subscribeInputTransforms = new OperatorTransforms(); Deploy(ref subscribeTask, ref topology, ref subscribeInputTransforms); subscribeTask.InputIds.SetInputId1(subscribeTask.NextInputIds.InputId1); subscribeTask.InputIds.SetInputId2(subscribeTask.NextInputIds.InputId2); subscribeTask.OutputId = typeof(ShardedSubscribeOperator).Name.ToLower() + Guid.NewGuid().ToString(); subscribeTask.PrepareTaskTransformations(subscribeInputTransforms); topology.AddOperatorBase(subscribeTask.OutputId, subscribeTask); topology.AddOperatorInput(subscribeTask.OutputId, subscribeTask.InputIds.InputId1); topology.AddOperatorSecondaryInput(subscribeTask.OutputId, subscribeTask.InputIds.InputId2); topology.AddOperatorOutput(subscribeTask.InputIds.InputId1, subscribeTask.OutputId); topology.AddOperatorOutput(subscribeTask.InputIds.InputId2, subscribeTask.OutputId); if (subscribeTask.Transforms != null) { foreach (OperatorInputs inputs in subscribeTask.TransformsInputs) { topology.AddOperatorSecondaryInput(subscribeTask.OutputId, inputs.InputId2); topology.AddOperatorOutput(inputs.InputId2, subscribeTask.OutputId); } foreach (OperatorInputs inputs in subscribeTask.TransformsInputs) { if (!topology.ContainsSecondaryOperatorInput(subscribeTask.OutputId, inputs.InputId1)) { topology.AddOperatorInput(subscribeTask.OutputId, inputs.InputId1); topology.AddOperatorOutput(inputs.InputId1, subscribeTask.OutputId); } } } _clientTerminalTask = new ClientTerminalTask(); _clientTerminalTask.InputIds.SetInputId1(subscribeTask.OutputId); _clientTerminalTask.OutputId = typeof(ShardedSubscribeClientOperator).Name.ToLower() + Guid.NewGuid().ToString(); _clientTerminalTask.OperationTypes = TransformUtils.FillBinaryTransformTypes( typeof(TKey), typeof(TPayload), typeof(TDataset), typeof(TKey), typeof(TPayload), typeof(TDataset), typeof(TKey), typeof(TPayload), typeof(TDataset)); topology.AddOperatorBase(_clientTerminalTask.OutputId, _clientTerminalTask); topology.AddOperatorInput(_clientTerminalTask.OutputId, _clientTerminalTask.InputIds.InputId1); topology.AddOperatorInput(_clientTerminalTask.OutputId, _clientTerminalTask.InputIds.InputId2); topology.AddOperatorOutput(_clientTerminalTask.InputIds.InputId1, _clientTerminalTask.OutputId); topology.AddOperatorOutput(_clientTerminalTask.InputIds.InputId2, _clientTerminalTask.OutputId); _isDeployed = await DeploymentUtils.DeployOperators(_craClient, topology); if (_isDeployed) { string craWorkerName = typeof(ShardedSubscribeClientOperator).Name.ToLower() + "worker" + Guid.NewGuid().ToString(); _craWorker = new CRAWorker(craWorkerName, "127.0.0.1", NetworkUtils.GetAvailablePort(), _craClient.DataProvider, null, 1000); _craWorker.DisableDynamicLoading(); _craWorker.SideloadVertex(new ShardedSubscribeClientOperator(), typeof(ShardedSubscribeClientOperator).Name.ToLower()); new Thread(() => _craWorker.Start()).Start(); Thread.Sleep(1000); _isDeployed = await DeploymentUtils.DeployClientTerminal(_craClient, craWorkerName, _clientTerminalTask, topology); } else { return(null); } } return(this); }
public override IShardedDataset <TKeyO, TPayloadO, TDataSetO> Deploy() { if (!_isDeployed) { OperatorsToplogy toplogy = OperatorsToplogy.GetInstance(); TaskBase subscribeTask = new SubscribeTask(); subscribeTask.OperationTypes = TransformUtils.FillBinaryTransformTypes( typeof(TKeyO), typeof(TPayloadO), typeof(TDataSetO), typeof(TKeyO), typeof(TPayloadO), typeof(TDataSetO), typeof(TKeyO), typeof(TPayloadO), typeof(TDataSetO)); subscribeTask.IsRightOperandInput = false; OperatorTransforms subscribeInputTransforms = new OperatorTransforms(); Deploy(ref subscribeTask, ref toplogy, ref subscribeInputTransforms); subscribeTask.InputIds.SetInputId1(subscribeTask.NextInputIds.InputId1); subscribeTask.InputIds.SetInputId2(subscribeTask.NextInputIds.InputId2); subscribeTask.OutputId = typeof(SubscribeOperator).Name.ToLower() + Guid.NewGuid().ToString(); subscribeTask.PrepareTaskTransformations(subscribeInputTransforms); toplogy.AddOperatorBase(subscribeTask.OutputId, subscribeTask); toplogy.AddOperatorInput(subscribeTask.OutputId, subscribeTask.InputIds.InputId1); toplogy.AddOperatorSecondaryInput(subscribeTask.OutputId, subscribeTask.InputIds.InputId2); toplogy.AddOperatorOutput(subscribeTask.InputIds.InputId1, subscribeTask.OutputId); toplogy.AddOperatorOutput(subscribeTask.InputIds.InputId2, subscribeTask.OutputId); if (subscribeTask.Transforms != null) { foreach (OperatorInputs inputs in subscribeTask.TransformsInputs) { toplogy.AddOperatorInput(subscribeTask.OutputId, inputs.InputId1); toplogy.AddOperatorSecondaryInput(subscribeTask.OutputId, inputs.InputId2); toplogy.AddOperatorOutput(inputs.InputId1, subscribeTask.OutputId); toplogy.AddOperatorOutput(inputs.InputId2, subscribeTask.OutputId); } } ClientTerminalTask clientTerminalTask = new ClientTerminalTask(); clientTerminalTask.InputIds.SetInputId1(subscribeTask.OutputId); clientTerminalTask.OutputId = typeof(DetachedVertex).Name.ToLower() + Guid.NewGuid().ToString(); _craClient = new CRAClientLibrary(); _clientTerminal = _craClient.RegisterAsVertex(clientTerminalTask.OutputId); toplogy.AddOperatorBase(clientTerminalTask.OutputId, clientTerminalTask); toplogy.AddOperatorInput(clientTerminalTask.OutputId, clientTerminalTask.InputIds.InputId1); toplogy.AddOperatorInput(clientTerminalTask.OutputId, clientTerminalTask.InputIds.InputId2); toplogy.AddOperatorOutput(clientTerminalTask.InputIds.InputId1, clientTerminalTask.OutputId); toplogy.AddOperatorOutput(clientTerminalTask.InputIds.InputId2, clientTerminalTask.OutputId); _isDeployed = DeploymentUtils.DeployOperators(_craClient, toplogy); if (_isDeployed) { _isDeployed = DeploymentUtils.DeployClientTerminal(_craClient, clientTerminalTask, ref _clientTerminal, toplogy); } else { return(null); } } return(this); }