public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp = false) { string account, key, container, blob; AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); if (compressionScheme != CompressionScheme.None) { throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled"); } AzureDfsClient client = new AzureDfsClient(account, key, container); DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (Stream stream = client.GetFileStreamWriterAsync(blob).Result) { DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } }
/// <summary> /// A file with the specified path. /// </summary> /// <param name="path">Path to the file.</param> /// <param name="client">Azure client.</param> /// <param name="config">Cluster configuration.</param> /// <param name="job">Job accessing this file.</param> /// <param name="isFolder">If true this must be a folder.</param> /// <param name="canCache">True if the file can be cached (it is immutable for sure).</param> public AzureDfsFile(ClusterConfiguration config, DryadLinqJobSummary job, AzureDfsClient client, string path, bool canCache, bool isFolder) : base(config, job) { this.client = client; this.path = path; this.ShouldCacheLocally = canCache; this.RepresentsAFolder = isFolder; this.size = -1; if (!string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) { this.LocalCachePath = Path.Combine(CachedClusterResidentObject.CacheDirectory, this.path); } }
public AzureYarnClient(AzureSubscriptions subscriptions, AzureDfsClient dfsClient, Uri baseUri, string ppmHome, string clusterName = null) { this.dfsClient = dfsClient; this.baseUri = baseUri; this.peloponneseHome = ppmHome; IEnumerable <AzureCluster> clusters = subscriptions.GetClustersAsync().Result; AzureCluster cluster; if (clusterName == null) { if (clusters.Count() != 1) { throw new ArgumentException("A cluster name must be provided if there is not exactly one configured HDInsight cluster.", "clusterName"); } cluster = clusters.Single(); } else { IEnumerable <AzureCluster> matching = clusters.Where(c => c.Name == clusterName); if (matching.Count() == 0) { throw new ArgumentException("Cluster " + clusterName + " not attached to a Powershell subscription or specified manuall", "clusterName"); } cluster = matching.First(); } ClusterName = cluster.Name; SubscriptionId = cluster.SubscriptionId; Guid subscriptionGuid = new Guid(SubscriptionId); HDInsightCertificateCredential sCred = new HDInsightCertificateCredential(subscriptionGuid, cluster.Certificate); IHDInsightClient sClient = HDInsightClient.Connect(sCred); credentials = new JobSubmissionCertificateCredential(sCred, ClusterName); JobClient = JobSubmissionClientFactory.Connect(credentials); }
protected override async Task Open() { Log.LogInformation("Opening read for " + source.AbsoluteUri); string account, key, container, blobName; Utils.FromAzureUri(source, out account, out key, out container, out blobName); client = new AzureDfsClient(account, key, container, false, new PeloponneseLogger(Log.Logger)); client.SetParallelThreadCount(4); NameValueCollection query = System.Web.HttpUtility.ParseQueryString(source.Query); if (query["blobs"] == null) { blobIndex = -1; } else { blobIndex = 0; } await OpenBlob(); }
private async Task <bool> WaitForCompletion() { // at this point we know the job directory, so will be able to send a kill command if we need to this.started.SetResult(true); string account, key, container, blob; Azure.Utils.FromAzureUri(errorLocation, out account, out key, out container, out blob); using (AzureDfsClient dfs = new AzureDfsClient(account, key, container)) { string heartBeat = blob + "heartbeat"; bool started = await WaitForHeartBeat(dfs.Container, heartBeat); lock (this) { if (started) { this.status = JobStatus.Running; } else { return(false); } } if (!await WaitForStatus(dfs.Container, heartBeat)) { return(false); } JobStatus finished = await WaitForExit(dfs.Container, heartBeat); lock (this) { if (finished == JobStatus.Success) { this.status = JobStatus.Success; return(true); } else if (finished == JobStatus.Cancelled) { this.status = JobStatus.Cancelled; return(true); } } // there was a failure; try to read an error description string error = null; try { using (Stream s = dfs.GetDfsStreamReader(dfs.Combine(errorLocation, "error.txt"))) { using (StreamReader sr = new StreamReader(s)) { error = await sr.ReadToEndAsync(); } } } catch (Exception) { } lock (this) { this.status = JobStatus.Failure; this.errorMessage = error; } } return(true); }
/// <summary> /// A file with the specified path. /// </summary> /// <param name="path">Path to the file.</param> /// <param name="client">Azure client.</param> /// <param name="config">Cluster configuration.</param> /// <param name="job">Job accessing this file.</param> /// <param name="isFolder">If true this must be a folder.</param> /// <param name="canCache">True if the file can be cached (it is immutable for sure).</param> public AzureDfsFile(ClusterConfiguration config, DryadLinqJobSummary job, AzureDfsClient client, string path, bool canCache, bool isFolder) : base(config, job) { this.client = client; this.path = path; this.ShouldCacheLocally = canCache; this.RepresentsAFolder = isFolder; this.size = -1; if (!string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) this.LocalCachePath = Path.Combine(CachedClusterResidentObject.CacheDirectory, this.path); }
static int Run(string[] args) { Flags.Parse(ConfigurationManager.AppSettings); args = Flags.Parse(args); if (ShowHelp.BooleanValue || args.Length == 0) { Console.Error.WriteLine(Usage); return(0); } if (!File.Exists(args[0])) { Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); Console.Error.WriteLine(Usage); return(-1); } AzureSubscriptions subscriptionManagement = new AzureSubscriptions(); if (AzureSubscriptionId.IsSet && AzureCertificateThumbprint.IsSet) { subscriptionManagement.AddSubscription(AzureSubscriptionId.StringValue, AzureCertificateThumbprint.StringValue); } string clusterName = null; if (AzureClusterName.IsSet) { clusterName = AzureClusterName.StringValue; if (AzureStorageAccountName.IsSet && AzureStorageAccountKey.IsSet) { subscriptionManagement.SetClusterAccountAsync(clusterName, AzureStorageAccountName.StringValue, AzureStorageAccountKey.StringValue).Wait(); } } else { IEnumerable <AzureCluster> clusters = subscriptionManagement.GetClusters(); if (clusters.Count() == 1) { clusterName = clusters.Single().Name; } else { Console.Error.WriteLine("Error: Cluster name must be specified unless there is a single configured cluster in default and supplied subscriptions"); Console.Error.WriteLine(Usage); return(-1); } } AzureCluster cluster; try { cluster = subscriptionManagement.GetClusterAsync(clusterName).Result; } catch (Exception) { Console.Error.WriteLine("Error: Failed to find cluster " + clusterName + " in default or supplied subscriptions"); Console.Error.WriteLine(Usage); return(-1); } if (cluster == null) { Console.Error.WriteLine("Error: Failed to find cluster {0} in default or supplied subscriptions", clusterName); Console.Error.WriteLine(Usage); return(-1); } string containerName = "staging"; if (AzureStorageContainerName.IsSet) { containerName = AzureStorageContainerName.StringValue; } // The args are augmented with an additional setting containing the Azure connection string. args = args.Concat(new string[] { "--addsetting", "Microsoft.Research.Naiad.Cluster.Azure.DefaultConnectionString", string.Format("\"DefaultEndpointsProtocol=https;AccountName={0};AccountKey={1}\"", cluster.StorageAccount.Split('.').First(), cluster.StorageKey) }).ToArray(); Console.Error.WriteLine("Submitting job with args: {0}", string.Join(" ", args)); AzureDfsClient azureDfs = new AzureDfsClient(cluster.StorageAccount, cluster.StorageKey, containerName); AzureYarnClient azureYarn = new AzureYarnClient(subscriptionManagement, azureDfs, ConfigHelpers.GetPPMHome(null), clusterName); AzureYarnSubmission submission = new AzureYarnSubmission(azureDfs, azureYarn, NumHosts, args); submission.Submit(); return(submission.Join()); }
public AzureYarnSubmission(AzureDfsClient dfsClient, AzureYarnClient yarnClient, int numberOfProcesses, string[] args) : base(dfsClient, yarnClient, numberOfProcesses, args) { }
/// <summary> /// Must be called after setting all properties. /// Returns true if initialization succeeds. /// </summary> public override string Initialize() { try { this.AzureClient = new AzureDfsClient( this.AccountName, this.AccountKey, this.Container); this.baseUri = Microsoft.Research.Peloponnese.Azure.Utils.ToAzureUri(this.AccountName, this.Container, "", null, this.AccountKey); return null; } catch (Exception ex) { Console.WriteLine(ex); return ex.Message; } }