Beispiel #1
0
            public override void OnReceive(Message <TOutput, TTime> message)
            {
                WriterStreamSequence <TWriter> writer;

                if (!writers.TryGetValue(message.time, out writer))
                {
                    // make a filename generator for the specified process, worker and time
                    Func <int, Uri> format = segment =>
                                             this.pathFunction(this.Stage.Computation.Controller.Configuration.ProcessID,
                                                               this.workerId,
                                                               message.time,
                                                               segment);
                    // make a sequence writer for the specified process, worker and time
                    writer = new WriterStreamSequence <TWriter>(format, u => streamFunction(this.client, u), writerFunction, fileLengthThreshold);
                    writers.Add(message.time, writer);
                    // ensure that we are called later to close the sequence writer when the time completes
                    this.NotifyAt(message.time);
                }

                // before serializing a batch of records, check to see if the current file has gone over
                // its length threshold; if so the current file will be closed, and the next one will be
                // opened
                writer.CheckForFileBoundary();

                // serialize the batch of records to the current file
                this.serialize(writer.Writer, new ArraySegment <TOutput>(message.payload, 0, message.length));
            }
Beispiel #2
0
        /// <summary>
        /// serialize a sequence of records to a collection of files partitioned by process and thread. For each
        /// process/thread this writes a sequence of files; each time a file reaches a threshold number of bytes,
        /// it is closed and another is opened. This keeps individual files of bounded length, allowing for more
        /// parallelism when reading them later
        /// </summary>
        /// <typeparam name="TOutput">type of record to serialize</typeparam>
        /// <typeparam name="TWriter">type of the serializer</typeparam>
        /// <param name="source">stream of records to serialize</param>
        /// <param name="pathFunction">function from processId, threadId and sequence number to filename</param>
        /// <param name="streamFunction">function to create an output stream given a filename</param>
        /// <param name="writerFunction">function to create a serializer from a stream</param>
        /// <param name="serialize">action to serialize a batch of records</param>
        /// <param name="fileLengthThreshold">length in bytes of a file after which it is closed and a new one is opened</param>
        /// <returns>a handle that can be waited on for the computation to complete</returns>
        public static Subscription WriteBySubscription <TOutput, TWriter>(
            this Stream <TOutput, Epoch> source,
            Func <int, int, int, Uri> pathFunction,
            Func <Uri, Stream> streamFunction,
            Func <Stream, TWriter> writerFunction,
            Action <TWriter, ArraySegment <TOutput> > serialize,
            long fileLengthThreshold) where TWriter : class, IDisposable, IFlushable
        {
            // dictionary of sequence writers, indexed by worker id
            var writers = new Dictionary <int, WriterStreamSequence <TWriter> >();

            return(source.Subscribe(
                       // OnRecv callback
                       (message, workerid) =>
            {
                WriterStreamSequence <TWriter> writer;

                lock (writers)
                {
                    if (!writers.TryGetValue(workerid, out writer))
                    {
                        // make a filename generator for the specified worker and process
                        Func <int, Uri> format = segment =>
                                                 pathFunction(source.ForStage.Computation.Controller.Configuration.ProcessID,
                                                              workerid,
                                                              segment);
                        // make the sequence writer for the specified worker and process
                        writer = new WriterStreamSequence <TWriter>(
                            format, streamFunction, writerFunction, fileLengthThreshold);
                        writers.Add(workerid, writer);
                    }
                }

                // before serializing a batch of records, check to see if the current file has gone over
                // its length threshold; if so the current file will be closed, and the next one will be
                // opened
                writer.CheckForFileBoundary();

                // serialize the batch of records to the current file
                serialize(writer.Writer, new ArraySegment <TOutput>(message.payload, 0, message.length));
            },
                       // OnNotify callback
                       (epoch, workerid) => { },
                       // OnCompleted callback
                       workerid =>
            {
                lock (writers)
                {
                    if (writers.ContainsKey(workerid))
                    {
                        writers[workerid].Close();
                        writers.Remove(workerid);
                    }
                }
            }));
        }
Beispiel #3
0
            public override void OnNotify(TTime time)
            {
                WriterStreamSequence <TWriter> writer = writers[time];

                writers.Remove(time);

                // close the sequence writer
                writer.Close();

                var output = this.Output.GetBufferForTime(time);

                foreach (Uri fileName in writer.Filenames)
                {
                    // emit the filename of each file written by this writer
                    output.Send(fileName);
                }
            }