/// <summary> /// write a sequence of strings as hdfs text files, partitioned by time as well as key. /// Within a given time and part, records are written in an undefined order /// </summary> /// <typeparam name="TTime">type of the record time</typeparam> /// <param name="source">stream of records to write</param> /// <param name="prefix">webhdfs directory to write the partitioned data into</param> /// <param name="bufferSize">buffer size to use for the text serializer</param> /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param> /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param> /// <returns>stream of filenames written</returns> public static Stream <Uri, TTime> ToHdfsText <TTime>( this Stream <string, TTime> source, Uri prefix, int bufferSize = 1024 * 1024, long blockSize = -1, long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime> { // make sure we'll be able to write the partitioned data HdfsClient client = new HdfsClient(); client.EnsureDirectory(prefix, false); // don't write byte order marks at the start of the files Encoding utf8 = new UTF8Encoding(false, true); return(source.ToHdfsBinary( (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024), (writer, arraySegment) => { for (int i = 0; i < arraySegment.Count; i++) { writer.WriteLine(arraySegment.Array[i]); } }, bufferSize, blockSize, segmentThreshold)); }
private void AddHomeDirectory() { bool isHomeDirectory = HdfsClient.ListStatus(path: GetRootPath).Result.SingleOrDefault(a => a.Name == GetHomeDirectoryName) != null; if (isHomeDirectory) { bool result = HdfsClient.MakeDirectory(GetHomeDirectoryPath).Result; if (!result) { throw new Exception($"Problem add home directory {GetHomeDirectoryPath}"); } } }
private void AddDeweyRoot() { bool isRoot = HdfsClient.ListStatus("/").Result.SingleOrDefault(a => a.Name == GetRootName) != null; if (isRoot) { bool result = HdfsClient.MakeDirectory(GetRootPath).Result; if (!result) { throw new Exception($"Problem add root {GetRootPath}"); } } }
/// <summary> /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer, /// partitioned by time as well as key. Within a given time and part, records are written in an undefined order /// </summary> /// <typeparam name="TOutput">type of the records to write</typeparam> /// <typeparam name="TTime">type of the record time</typeparam> /// <param name="source">stream of records to write</param> /// <param name="prefix">webhdfs directory to write the partitioned data into</param> /// <param name="bufferSize">buffer size to use for the serializer</param> /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param> /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param> /// <returns>stream of filenames written</returns> public static Stream <Uri, TTime> ToHdfsBinary <TOutput, TTime>( this Stream <TOutput, TTime> source, Uri prefix, int bufferSize = 1024 * 1024, long blockSize = -1, long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime> { // make sure we'll be able to write the partitioned data HdfsClient client = new HdfsClient(); client.EnsureDirectory(prefix, false); return(source.ToHdfsBinary( (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), stream => new NaiadWriter <TOutput>(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize), (writer, arraySegment) => { for (int i = 0; i < arraySegment.Count; i++) { writer.Write(arraySegment.Array[i]); } }, bufferSize, blockSize, segmentThreshold)); }
public bool ReadFile(Guid fileGuid, Stream stream) { MakeHomeDirectory(); return(HdfsClient.ReadStream(path: GetFilePath(fileGuid), stream: stream).Result); }
private static int RunNativeYarn(string[] args) { if (!RMHostAndPort.IsSet) { Console.Error.WriteLine("Error: Yarn cluster rm node hostname not set."); Console.Error.WriteLine(Usage); return(1); } string rmHost; int wsPort; GetHostAndPort(RMHostAndPort.StringValue, null, 8088, out rmHost, out wsPort); string nameNode; int hdfsPort; GetHostAndPort(NameNodeAndPort.IsSet ? NameNodeAndPort.StringValue : null, rmHost, -1, out nameNode, out hdfsPort); string queueName = null; if (YarnJobQueue.IsSet) { queueName = YarnJobQueue.StringValue; } int amMemoryMB = -1; if (YarnAMMemory.IsSet) { amMemoryMB = YarnAMMemory.IntValue; } int workerMemoryMB = -1; if (YarnWorkerMemory.IsSet) { workerMemoryMB = YarnWorkerMemory.IntValue; } string launcherNode; int launcherPort; GetHostAndPort( LauncherHostAndPort.IsSet ? LauncherHostAndPort.StringValue : null, null, -1, out launcherNode, out launcherPort); DfsClient dfsClient; if (WebHdfsPort.IsSet) { dfsClient = new WebHdfsClient(Environment.UserName, WebHdfsPort.IntValue); } else { dfsClient = new HdfsClient(); } if (args[0].ToLower().StartsWith("hdfs://")) { if (!dfsClient.IsFileExists(new Uri(args[0]))) { Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); Console.Error.WriteLine(Usage); return(1); } } else { if (!File.Exists(args[0])) { Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); Console.Error.WriteLine(Usage); return(1); } } UriBuilder builder = new UriBuilder(); builder.Scheme = "hdfs"; builder.Host = nameNode; builder.Port = hdfsPort; Uri jobRoot = dfsClient.Combine(builder.Uri, "user", Environment.UserName); Uri stagingRoot = dfsClient.Combine(builder.Uri, "tmp", "staging"); NativeYarnSubmission submission; if (launcherNode == null) { submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, PeloponneseHome, amMemoryMB, NumHosts, workerMemoryMB, args); } else { submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, launcherNode, launcherPort, amMemoryMB, NumHosts, workerMemoryMB, args); } submission.Submit(); Console.WriteLine("Waiting for application to complete"); int ret = submission.Join(); if (LogsDumpFile.IsSet) { FetchLogs(LogsDumpFile.StringValue, submission.ClusterJob.Id); } submission.Dispose(); return(ret); }