private ShardedCollection <R2, T> Manufacture <R2>(Func <int, Stage <T>, UnaryVertex <Weighted <R>, Weighted <R2>, T> > factory, Expression <Func <Weighted <R>, int> > inputPartitionedBy, Expression <Func <Weighted <R2>, int> > outputPartitionedBy, string name) where R2 : IEquatable <R2> { var output = Foundry.NewStage(this.Output, factory, inputPartitionedBy, outputPartitionedBy, name); return(new ShardedCollection <R2, T>(output, this.Immutable)); }
internal StreamingInputStage(DataSource <R> source, Placement placement, InternalComputation internalComputation, string inputName) { this.inputName = inputName; this.stage = Foundry.NewStage(new TimeContext <Epoch>(internalComputation.ContextManager.RootContext), (i, v) => new StreamingInputVertex <R>(i, v), this.inputName); this.output = stage.NewOutput(vertex => vertex.output); this.stage.Materialize(); this.localVertices = placement.Where(x => x.ProcessId == internalComputation.Controller.Configuration.ProcessID) .Select(x => this.stage.GetVertex(x.VertexId) as StreamingInputVertex <R>) .ToArray(); source.RegisterInputs(this.localVertices); this.completedCalled = false; this.hasActivatedProgressTracker = false; // results in pointstamp comparisons which assert w/o this. this.InternalComputation.Reachability.UpdateReachabilityPartialOrder(internalComputation); this.InternalComputation.Reachability.DoNotImpersonate(stage.StageId); var initialVersion = new Runtime.Progress.Pointstamp(stage.StageId, new int[] { 0 }); internalComputation.ProgressTracker.BroadcastProgressUpdate(initialVersion, placement.Count); }
public static Stage <ConsumerVertex, Epoch> MakeStage(int numberToConsume, int startProcess, int endProcess, int numberOfWorkers, bool exchange, Stream <Pair <int, int>, Epoch> stream) { var locations = new List <VertexLocation>(); for (int i = 0; i < endProcess - startProcess; i++) { for (int j = 0; j < numberOfWorkers; j++) { locations.Add(new VertexLocation(locations.Count, i + startProcess, j)); } } Placement placement = new Placement.Explicit(locations); Stage <ConsumerVertex, Epoch> stage = Foundry.NewStage(placement, stream.Context, (i, s) => new ConsumerVertex(i, s, numberToConsume), "Consumer"); if (exchange) { stage.NewInput(stream, (m, v) => v.OnRecv(m), x => x.Second); } else { stage.NewInput(stream, (m, v) => v.OnRecv(m), x => x.First); } return(stage); }
public static Stream <Weighted <R>, Epoch> NewStage(Stream <Weighted <R>, Epoch> source) { var stage = Foundry.NewStage(source.Context, (i, v) => new IntegratorShard <R>(i, v), "Integrator"); stage.NewInput(source, shard => shard.input, source.PartitionedBy); return(stage.NewOutput(shard => shard.output, source.PartitionedBy)); }
public static Stage <ConsumerVertex, Epoch> MakeStage(int numberToConsume, int numberOfPartitions, Stream <int, Epoch> stream) { Placement placement = new Placement.Explicit(Enumerable.Range(0, numberOfPartitions).Select(x => new VertexLocation(x, 1, x))); Stage <ConsumerVertex, Epoch> stage = Foundry.NewStage(placement, stream.Context, (i, s) => new ConsumerVertex(i, s, numberToConsume), "Consumer"); stage.NewInput(stream, (m, v) => v.OnRecv(m), x => x); return(stage); }
public static Stream <R, T> PartitionBy <R, T>(this Stream <R, T> stream, Expression <Func <R, int> > partitionBy) where T : Time <T> { // if the data are already partitioned (or claim to be) just return the stream. if (partitionBy == null || Naiad.CodeGeneration.ExpressionComparer.Instance.Equals(stream.PartitionedBy, partitionBy)) { return(stream); } return(Foundry.NewStage(stream, (i, v) => new PartitionByShard <R, T>(i, v, null), partitionBy, partitionBy, "PartitionBy")); }
public static Stream <int, Epoch> MakeStage(int numberToSend, int numberOfPartitions, Stream <int, Epoch> input) { Placement placement = new Placement.Explicit(Enumerable.Range(0, numberOfPartitions).Select(x => new VertexLocation(x, 0, x))); Stage <ProducerVertex, Epoch> stage = Foundry.NewStage(placement, input.Context, (i, s) => new ProducerVertex(i, s, numberToSend), "Producer"); stage.NewInput(input, (v, m) => { }, null); Stream <int, Epoch> stream = stage.NewOutput(v => v.output); return(stream); }
public static Stream <Pair <int, int>, Epoch> MakeStage(int numberToSend, int startProcess, int endProcess, int numberOfWorkers, Stream <Pair <int, int>, Epoch> input) { var locations = new List <VertexLocation>(); for (int i = 0; i < endProcess - startProcess; i++) { for (int j = 0; j < numberOfWorkers; j++) { locations.Add(new VertexLocation(locations.Count, i + startProcess, j)); } } Placement placement = new Placement.Explicit(locations); Stage <ProducerVertex, Epoch> stage = Foundry.NewStage(placement, input.Context, (i, s) => new ProducerVertex(i, s, numberToSend), "Producer"); stage.NewInput(input, (v, m) => { }, null); Stream <Pair <int, int>, Epoch> stream = stage.NewOutput(v => v.output); return(stream); }
public static Stream <R, T> AssertPartitionedBy <R, T>(Stream <R, T> stream, Expression <Func <R, int> > partitionBy) where T : Time <T> { return(Foundry.NewStage(stream, (i, v) => new PartitionByShard <R, T>(i, v, partitionBy), null, partitionBy, "PartitionBy")); }
/// <summary> /// Used to write records to files. /// </summary> /// <typeparam name="S">Record type</typeparam> /// <param name="input">Source of records</param> /// <param name="format">Format string for filename; {0} replaced with shard id</param> /// <param name="action">Operation to apply to each record and the output stream. Often (r,s) => s.Write(r);</param> public static void WriteToFiles <S>(this Stream <S, Epoch> input, string format, Action <S, System.IO.BinaryWriter> action) { Foundry.NewStage(input, (i, v) => new Writer <S>(i, v, action, format), null, "Writer"); }
/// <summary> /// Returns elements in the first stream but not the second stream. /// </summary> /// <typeparam name="TRecord">Record type</typeparam> /// <typeparam name="TTime">Time type</typeparam> /// <param name="stream1">first input stream</param> /// <param name="stream2">second input stream</param> /// <returns></returns> public static Stream <TRecord, TTime> Except <TRecord, TTime>(this Stream <TRecord, TTime> stream1, Stream <TRecord, TTime> stream2) where TTime : Time <TTime> { return(Foundry.NewStage(stream1, stream2, (i, s) => new ExceptVertex <TRecord, TTime>(i, s), x => x.GetHashCode(), x => x.GetHashCode(), x => x.GetHashCode(), "Except")); }
/// <summary> /// Joins two input streams. /// </summary> /// <typeparam name="TInput1">First input type</typeparam> /// <typeparam name="TInput2">Second input type</typeparam> /// <typeparam name="TKey">Key type</typeparam> /// <typeparam name="TResult">Result type</typeparam> /// <typeparam name="TTime">Time type</typeparam> /// <param name="stream1">first input stream</param> /// <param name="stream2">second input stream</param> /// <param name="key1">first key selector</param> /// <param name="key2">second key selector</param> /// <param name="reducer">result selector</param> /// <returns>each pair of matching records, subjected to the reducer function</returns> public static Stream <TResult, TTime> Join <TInput1, TInput2, TKey, TResult, TTime>(this Stream <TInput1, TTime> stream1, Stream <TInput2, TTime> stream2, Func <TInput1, TKey> key1, Func <TInput2, TKey> key2, Func <TInput1, TInput2, TResult> reducer) where TTime : Time <TTime> { //return stream1.BinaryExpression(stream2, x => key1(x).GetHashCode(), x => key2(x).GetHashCode(), (x1, x2) => x1.Join(x2, key1, key2, reducer), "Join"); return(Foundry.NewStage(stream1, stream2, (i, s) => new JoinVertex <TInput1, TInput2, TKey, TResult, TTime>(i, s, key1, key2, reducer), x => key1(x).GetHashCode(), x => key2(x).GetHashCode(), null, "Join")); }