Пример #1
0
        /// <summary>
        /// write a sequence of strings as hdfs text files, partitioned by time as well as key.
        /// Within a given time and part, records are written in an undefined order
        /// </summary>
        /// <typeparam name="TTime">type of the record time</typeparam>
        /// <param name="source">stream of records to write</param>
        /// <param name="prefix">webhdfs directory to write the partitioned data into</param>
        /// <param name="bufferSize">buffer size to use for the text serializer</param>
        /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param>
        /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param>
        /// <returns>stream of filenames written</returns>
        public static Stream <Uri, TTime> ToHdfsText <TTime>(
            this Stream <string, TTime> source,
            Uri prefix,
            int bufferSize        = 1024 * 1024,
            long blockSize        = -1,
            long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime>
        {
            // make sure we'll be able to write the partitioned data
            HdfsClient client = new HdfsClient();

            client.EnsureDirectory(prefix, false);

            // don't write byte order marks at the start of the files
            Encoding utf8 = new UTF8Encoding(false, true);

            return(source.ToHdfsBinary(
                       (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment),
                       stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024),
                       (writer, arraySegment) =>
            {
                for (int i = 0; i < arraySegment.Count; i++)
                {
                    writer.WriteLine(arraySegment.Array[i]);
                }
            },
                       bufferSize, blockSize, segmentThreshold));
        }
Пример #2
0
        private void AddHomeDirectory()
        {
            bool isHomeDirectory = HdfsClient.ListStatus(path: GetRootPath).Result.SingleOrDefault(a => a.Name == GetHomeDirectoryName) != null;

            if (isHomeDirectory)
            {
                bool result = HdfsClient.MakeDirectory(GetHomeDirectoryPath).Result;
                if (!result)
                {
                    throw new Exception($"Problem add home directory {GetHomeDirectoryPath}");
                }
            }
        }
Пример #3
0
        private void AddDeweyRoot()
        {
            bool isRoot = HdfsClient.ListStatus("/").Result.SingleOrDefault(a => a.Name == GetRootName) != null;

            if (isRoot)
            {
                bool result = HdfsClient.MakeDirectory(GetRootPath).Result;
                if (!result)
                {
                    throw new Exception($"Problem add  root {GetRootPath}");
                }
            }
        }
Пример #4
0
        /// <summary>
        /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer,
        /// partitioned by time as well as key. Within a given time and part, records are written in an undefined order
        /// </summary>
        /// <typeparam name="TOutput">type of the records to write</typeparam>
        /// <typeparam name="TTime">type of the record time</typeparam>
        /// <param name="source">stream of records to write</param>
        /// <param name="prefix">webhdfs directory to write the partitioned data into</param>
        /// <param name="bufferSize">buffer size to use for the serializer</param>
        /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param>
        /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param>
        /// <returns>stream of filenames written</returns>
        public static Stream <Uri, TTime> ToHdfsBinary <TOutput, TTime>(
            this Stream <TOutput, TTime> source,
            Uri prefix,
            int bufferSize        = 1024 * 1024,
            long blockSize        = -1,
            long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime>
        {
            // make sure we'll be able to write the partitioned data
            HdfsClient client = new HdfsClient();

            client.EnsureDirectory(prefix, false);

            return(source.ToHdfsBinary(
                       (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment),
                       stream => new NaiadWriter <TOutput>(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize),
                       (writer, arraySegment) =>
            {
                for (int i = 0; i < arraySegment.Count; i++)
                {
                    writer.Write(arraySegment.Array[i]);
                }
            },
                       bufferSize, blockSize, segmentThreshold));
        }
Пример #5
0
 public bool ReadFile(Guid fileGuid, Stream stream)
 {
     MakeHomeDirectory();
     return(HdfsClient.ReadStream(path: GetFilePath(fileGuid), stream: stream).Result);
 }
Пример #6
0
        private static int RunNativeYarn(string[] args)
        {
            if (!RMHostAndPort.IsSet)
            {
                Console.Error.WriteLine("Error: Yarn cluster rm node hostname not set.");
                Console.Error.WriteLine(Usage);
                return(1);
            }

            string rmHost;
            int    wsPort;

            GetHostAndPort(RMHostAndPort.StringValue, null, 8088, out rmHost, out wsPort);

            string nameNode;
            int    hdfsPort;

            GetHostAndPort(NameNodeAndPort.IsSet ? NameNodeAndPort.StringValue : null, rmHost, -1, out nameNode, out hdfsPort);

            string queueName = null;

            if (YarnJobQueue.IsSet)
            {
                queueName = YarnJobQueue.StringValue;
            }

            int amMemoryMB = -1;

            if (YarnAMMemory.IsSet)
            {
                amMemoryMB = YarnAMMemory.IntValue;
            }

            int workerMemoryMB = -1;

            if (YarnWorkerMemory.IsSet)
            {
                workerMemoryMB = YarnWorkerMemory.IntValue;
            }

            string launcherNode;
            int    launcherPort;

            GetHostAndPort(
                LauncherHostAndPort.IsSet ? LauncherHostAndPort.StringValue : null, null, -1,
                out launcherNode, out launcherPort);

            DfsClient dfsClient;

            if (WebHdfsPort.IsSet)
            {
                dfsClient = new WebHdfsClient(Environment.UserName, WebHdfsPort.IntValue);
            }
            else
            {
                dfsClient = new HdfsClient();
            }

            if (args[0].ToLower().StartsWith("hdfs://"))
            {
                if (!dfsClient.IsFileExists(new Uri(args[0])))
                {
                    Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]);
                    Console.Error.WriteLine(Usage);
                    return(1);
                }
            }
            else
            {
                if (!File.Exists(args[0]))
                {
                    Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]);
                    Console.Error.WriteLine(Usage);
                    return(1);
                }
            }

            UriBuilder builder = new UriBuilder();

            builder.Scheme = "hdfs";
            builder.Host   = nameNode;
            builder.Port   = hdfsPort;
            Uri jobRoot     = dfsClient.Combine(builder.Uri, "user", Environment.UserName);
            Uri stagingRoot = dfsClient.Combine(builder.Uri, "tmp", "staging");

            NativeYarnSubmission submission;

            if (launcherNode == null)
            {
                submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, PeloponneseHome, amMemoryMB, NumHosts, workerMemoryMB, args);
            }
            else
            {
                submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, launcherNode, launcherPort, amMemoryMB, NumHosts, workerMemoryMB, args);
            }

            submission.Submit();

            Console.WriteLine("Waiting for application to complete");

            int ret = submission.Join();

            if (LogsDumpFile.IsSet)
            {
                FetchLogs(LogsDumpFile.StringValue, submission.ClusterJob.Id);
            }

            submission.Dispose();

            return(ret);
        }