Example #1
0
File: Dfs.cs Project: xyuan/Naiad
        /// <summary>
        /// Find the start of the first record that begins after the end of the block we have been instructed to read. This indicates
        /// the end of the range of data that this block corresponds to
        /// </summary>
        /// <param name="workItem">the block we are reading</param>
        /// <returns></returns>
        private long FindSpillExtent(TItem workItem)
        {
            // compute the number of bytes remaining in the file past the end of our block
            long spillBytesRemaining = workItem.fileLength - workItem.offset - workItem.length;

            if (spillBytesRemaining <= 0)
            {
                // this is the last block, so our range runs exactly to the end of the block
                return(workItem.length);
            }

            // get a stream that starts immediately after the end of the block, and continues until the end of the file.
            using (Stream spillReader = client.GetDfsStreamReader(
                       workItem.path,
                       // read from the end of this block for the rest of the file
                       workItem.offset + workItem.length, spillBytesRemaining,
                       // use small requests if we expect records to be fairly small, so we don't prefetch and buffer a lot of data in the next block
                       this.syncRequestLength))
            {
                // call into the format-specific function to find the start of the next record. Potentially this spins all the way to the end of the
                // stream
                this.syncToNextRecord(workItem, spillReader);

                // return the offset of the next record after the block, relative to the start of the block
                return(workItem.length + spillReader.Position);
            }
        }
Example #2
0
File: Dfs.cs Project: xyuan/Naiad
 /// <summary>
 /// Execute a work item, reading an HDFS file and generating a sequence of output records
 /// </summary>
 /// <param name="workItem">The work item to be executed, corresponding to an entire Hdfs file</param>
 /// <returns>A sequence of array segments, each containing a sequence of records to be output</returns>
 public IEnumerable <ArraySegment <TOutput> > DoWork(DfsBlock workItem)
 {
     // ask the Hdfs client for a stream corresponding to the file. Use a 1k buffer for byte-at-a-time reads.
     using (Stream reader = client.GetDfsStreamReader(workItem.path, workItem.offset, workItem.length, 1024, workItem.dataNodeAddress))
     {
         foreach (ArraySegment <TOutput> segment in this.deserialize(reader))
         {
             yield return(segment);
         }
     }
 }