/// <summary> /// write a sequence of strings as hdfs text files, partitioned by time as well as key. /// Within a given time and part, records are written in an undefined order /// </summary> /// <typeparam name="TTime">type of the record time</typeparam> /// <param name="source">stream of records to write</param> /// <param name="user">hdfs user</param> /// <param name="webPort">webhdfs protocol port</param> /// <param name="prefix">webhdfs directory to write the partitioned data into</param> /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param> /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param> /// <returns>stream of filenames written</returns> public static Stream <Uri, TTime> ToWebHdfsText <TTime>( this Stream <string, TTime> source, string user, int webPort, Uri prefix, long blockSize = -1, long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime> { // make sure we'll be able to write the partitioned data WebHdfsClient client = new WebHdfsClient(user, webPort); client.EnsureDirectory(prefix, false); // don't write byte order marks at the start of the files Encoding utf8 = new UTF8Encoding(false, true); return(source.ToWebHdfsBinary( user, webPort, (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024), (writer, arraySegment) => { for (int i = 0; i < arraySegment.Count; i++) { writer.WriteLine(arraySegment.Array[i]); } }, blockSize, segmentThreshold)); }
/// <summary> /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer, /// partitioned by time as well as key. Within a given time and part, records are written in an undefined order /// </summary> /// <typeparam name="TOutput">type of the records to write</typeparam> /// <typeparam name="TTime">type of the record time</typeparam> /// <param name="source">stream of records to write</param> /// <param name="user">hdfs user</param> /// <param name="webPort">webhdfs protocol port</param> /// <param name="prefix">webhdfs directory to write the partitioned data into</param> /// <param name="bufferSize">buffer size to use in the serializer</param> /// <param name="blockSize">hdfs block size to use, or -1 for the file system default value</param> /// <param name="segmentThreshold">file size to write before closing the file and opening another one</param> /// <returns>stream of filenames written</returns> public static Stream <Uri, TTime> ToWebHdfsBinary <TOutput, TTime>( this Stream <TOutput, TTime> source, string user, int webPort, Uri prefix, int bufferSize = 1024 * 1024, long blockSize = -1, long segmentThreshold = 254 * 1024 * 1024) where TTime : Time <TTime> { // make sure we'll be able to write the partitioned data WebHdfsClient client = new WebHdfsClient(user, webPort); client.EnsureDirectory(prefix, false); return(source.ToWebHdfsBinary( user, webPort, (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), stream => new NaiadWriter <TOutput>(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize), (writer, arraySegment) => { for (int i = 0; i < arraySegment.Count; i++) { writer.Write(arraySegment.Array[i]); } }, blockSize, segmentThreshold)); }
private async static Task testHdfs() { var w = new WebHdfsClient("http://172.17.7.211:50070", "root"); var ds = await w.CreateDirectory("khamzat_test"); await w.CreateFile(@"c:\!temp\build2.txt", "khamzat_test/build2.txt"); //await w.CreateFile(@"c:\!temp\syslog8", "khamzat_test/syslog8"); }
public void WriteStream_AddRandomFileWithRandomContent_ResultOk() { //Assign WebHdfsClient client = GetWebHdfsClient(); String fileNamePath = $"/dewey/tests/{Guid.NewGuid()}"; string text = $"{Guid.NewGuid()}"; Stream textStream = GenerateStreamFromString(text); //Act bool result = client.WriteStream(textStream, fileNamePath).Result; //Assert Assert.IsTrue(result); }
public void GetFileStatus_AddRandomFileWithRandomContent_ResultFileStatus() { //Assign WebHdfsClient client = GetWebHdfsClient(); string fileName = Guid.NewGuid().ToString(); String fileNamePath = $"/dewey/tests/{fileName}"; string text = $"{Guid.NewGuid()}"; Stream textStream = GenerateStreamFromString(text); bool resultWrite = client.WriteStream(textStream, fileNamePath).Result; //Act HdfsFileStatus fileStatus = client.GetFileStatus(fileNamePath).Result; //Assert Assert.IsTrue(resultWrite); Assert.AreEqual(HdfsFileType.FILE, fileStatus.FileType); }
public void Delete_AddRandomFile_ResultDeleteFile() { //Assign WebHdfsClient client = GetWebHdfsClient(); string fileName = Guid.NewGuid().ToString(); string rootPath = "/dewey/tests"; String fileNamePath = $"{rootPath}/{fileName}"; string text = $"{Guid.NewGuid()}"; Stream textStream = GenerateStreamFromString(text); bool resultWrite = client.WriteStream(textStream, fileNamePath).Result; //Act bool resultDelete = client.Delete(fileNamePath).Result; //Assert Assert.IsTrue(resultWrite); Assert.IsTrue(resultDelete); }
public void MakeDirectory_MakerRandomDirectory_ResultListStatusDirectory() { //Assign WebHdfsClient client = GetWebHdfsClient(); string directoryName = Guid.NewGuid().ToString(); string rootPath = "/dewey/tests"; String directoryNamePath = $"{rootPath}/{directoryName}"; //Act bool resultMakeDirectory = client.MakeDirectory(directoryNamePath).Result; //Assert Assert.IsTrue(resultMakeDirectory); IEnumerable <HdfsFileStatus> fileStatus = client.ListStatus(rootPath).Result; HdfsFileStatus findStatus = fileStatus.SingleOrDefault(a => a.FileType == HdfsFileType.DIRECTORY && a.Name == directoryName); Assert.IsNotNull(findStatus); }
public void ListStatus_AddRandomFileWithRandomContent_ResultListStatus() { //Assign WebHdfsClient client = GetWebHdfsClient(); string fileName = Guid.NewGuid().ToString(); string rootPath = "/dewey/tests"; String fileNamePath = $"{rootPath}/{fileName}"; string text = $"{Guid.NewGuid()}"; Stream textStream = GenerateStreamFromString(text); bool resultWrite = client.WriteStream(textStream, fileNamePath).Result; //Act IEnumerable <HdfsFileStatus> fileStatus = client.ListStatus(rootPath).Result; //Assert Assert.IsTrue(resultWrite); HdfsFileStatus findStatus = fileStatus.SingleOrDefault(a => a.FileType == HdfsFileType.FILE && a.Name == fileName); Assert.IsNotNull(findStatus); }
private static async Task <TResult> CallClient <TResult>(Func <WebHdfsClient, Task <TResult> > caller, HttpMethod method, string url, string operation, string result = "{}", HttpStatusCode status = HttpStatusCode.OK) { var handler = new Mock <FakeHttpMessageHandler> { CallBase = true }; Expression <Func <FakeHttpMessageHandler, HttpResponseMessage> > homeCall = t => t.Send(It.Is <HttpRequestMessage>( msg => msg.Method == HttpMethod.Get && msg.RequestUri.ToString() == "http://test.me/plz/webhdfs/v1/?user.name=hdfs&op=GETHOMEDIRECTORY")); handler.Setup(homeCall) .Returns(new HttpResponseMessage(HttpStatusCode.OK) { Content = new StringContent("{\"Path\":\"/user/hdfs\"}", System.Text.Encoding.UTF8, "application/json") }) .Verifiable(); if (!operation.StartsWith("GETHOMEDIRECTORY", StringComparison.OrdinalIgnoreCase)) { Expression <Func <FakeHttpMessageHandler, HttpResponseMessage> > innerCall = t => t.Send(It.Is <HttpRequestMessage>( msg => msg.Method == method && msg.RequestUri.ToString().StartsWith(BASE_URL + WebHdfsClient.PREFIX + url + "?user.name=" + USER + "&op=" + operation, StringComparison.OrdinalIgnoreCase))); handler.Setup(innerCall) .Returns(new HttpResponseMessage(status) { Content = new StringContent(result, System.Text.Encoding.UTF8, "application/json") }) .Verifiable(); } var client = new WebHdfsClient(handler.Object, BASE_URL, USER); var response = await caller(client); handler.Verify(); return(response); }
public void ReadStream_AddRandomFileWithRandomContent_ResultTheSameContent() { //Assign WebHdfsClient client = GetWebHdfsClient(); String fileNamePath = $"/dewey/tests/{Guid.NewGuid()}"; string text = $"{Guid.NewGuid()}"; Stream textStream = GenerateStreamFromString(text); bool resultWrite = client.WriteStream(textStream, fileNamePath).Result; MemoryStream streamToRead = new MemoryStream(256); streamToRead.Position = 0; //Act bool resultRead = client.ReadStream(streamToRead, fileNamePath).Result; //Assert string textRead = ReadStream(streamToRead); Assert.IsTrue(resultWrite); Assert.IsTrue(resultRead); Assert.AreEqual(text, textRead); }
private static int RunNativeYarn(string[] args) { if (!RMHostAndPort.IsSet) { Console.Error.WriteLine("Error: Yarn cluster rm node hostname not set."); Console.Error.WriteLine(Usage); return(1); } string rmHost; int wsPort; GetHostAndPort(RMHostAndPort.StringValue, null, 8088, out rmHost, out wsPort); string nameNode; int hdfsPort; GetHostAndPort(NameNodeAndPort.IsSet ? NameNodeAndPort.StringValue : null, rmHost, -1, out nameNode, out hdfsPort); string queueName = null; if (YarnJobQueue.IsSet) { queueName = YarnJobQueue.StringValue; } int amMemoryMB = -1; if (YarnAMMemory.IsSet) { amMemoryMB = YarnAMMemory.IntValue; } int workerMemoryMB = -1; if (YarnWorkerMemory.IsSet) { workerMemoryMB = YarnWorkerMemory.IntValue; } string launcherNode; int launcherPort; GetHostAndPort( LauncherHostAndPort.IsSet ? LauncherHostAndPort.StringValue : null, null, -1, out launcherNode, out launcherPort); DfsClient dfsClient; if (WebHdfsPort.IsSet) { dfsClient = new WebHdfsClient(Environment.UserName, WebHdfsPort.IntValue); } else { dfsClient = new HdfsClient(); } if (args[0].ToLower().StartsWith("hdfs://")) { if (!dfsClient.IsFileExists(new Uri(args[0]))) { Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); Console.Error.WriteLine(Usage); return(1); } } else { if (!File.Exists(args[0])) { Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); Console.Error.WriteLine(Usage); return(1); } } UriBuilder builder = new UriBuilder(); builder.Scheme = "hdfs"; builder.Host = nameNode; builder.Port = hdfsPort; Uri jobRoot = dfsClient.Combine(builder.Uri, "user", Environment.UserName); Uri stagingRoot = dfsClient.Combine(builder.Uri, "tmp", "staging"); NativeYarnSubmission submission; if (launcherNode == null) { submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, PeloponneseHome, amMemoryMB, NumHosts, workerMemoryMB, args); } else { submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, launcherNode, launcherPort, amMemoryMB, NumHosts, workerMemoryMB, args); } submission.Submit(); Console.WriteLine("Waiting for application to complete"); int ret = submission.Join(); if (LogsDumpFile.IsSet) { FetchLogs(LogsDumpFile.StringValue, submission.ClusterJob.Id); } submission.Dispose(); return(ret); }