Пример #1
0
        /// <summary>
        /// write a sequence of strings as hdfs text files, partitioned by time as well as key.
        /// Within a given time and part, records are written in an undefined order
        /// </summary>
        /// <typeparam name="TTime">type of the record time</typeparam>
        /// <param name="source">stream of records to write</param>
        /// <param name="prefix">webhdfs directory to write the partitioned data into</param>
        /// <param name="bufferSize">buffer size to use for the text serializer</param>
        /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param>
        /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param>
        /// <returns>stream of filenames written</returns>
        public static Stream <Uri, TTime> ToHdfsText <TTime>(
            this Stream <string, TTime> source,
            Uri prefix,
            int bufferSize        = 1024 * 1024,
            long blockSize        = -1,
            long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime>
        {
            // make sure we'll be able to write the partitioned data
            HdfsClient client = new HdfsClient();

            client.EnsureDirectory(prefix, false);

            // don't write byte order marks at the start of the files
            Encoding utf8 = new UTF8Encoding(false, true);

            return(source.ToHdfsBinary(
                       (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment),
                       stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024),
                       (writer, arraySegment) =>
            {
                for (int i = 0; i < arraySegment.Count; i++)
                {
                    writer.WriteLine(arraySegment.Array[i]);
                }
            },
                       bufferSize, blockSize, segmentThreshold));
        }
Пример #2
0
        /// <summary>
        /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer,
        /// partitioned by time as well as key. Within a given time and part, records are written in an undefined order
        /// </summary>
        /// <typeparam name="TOutput">type of the records to write</typeparam>
        /// <typeparam name="TTime">type of the record time</typeparam>
        /// <param name="source">stream of records to write</param>
        /// <param name="prefix">webhdfs directory to write the partitioned data into</param>
        /// <param name="bufferSize">buffer size to use for the serializer</param>
        /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param>
        /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param>
        /// <returns>stream of filenames written</returns>
        public static Stream <Uri, TTime> ToHdfsBinary <TOutput, TTime>(
            this Stream <TOutput, TTime> source,
            Uri prefix,
            int bufferSize        = 1024 * 1024,
            long blockSize        = -1,
            long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime>
        {
            // make sure we'll be able to write the partitioned data
            HdfsClient client = new HdfsClient();

            client.EnsureDirectory(prefix, false);

            return(source.ToHdfsBinary(
                       (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment),
                       stream => new NaiadWriter <TOutput>(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize),
                       (writer, arraySegment) =>
            {
                for (int i = 0; i < arraySegment.Count; i++)
                {
                    writer.Write(arraySegment.Array[i]);
                }
            },
                       bufferSize, blockSize, segmentThreshold));
        }