public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp = false) { string account, key, container, blob; AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); if (compressionScheme != CompressionScheme.None) { throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled"); } AzureDfsClient client = new AzureDfsClient(account, key, container); DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (Stream stream = client.GetFileStreamWriterAsync(blob).Result) { DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } }
protected override async Task Open() { Log.LogInformation("Opening read for " + source.AbsoluteUri); string account, key, container, blobName; AzureUtils.FromAzureUri(source, out account, out key, out container, out blobName); client = new AzureDfsClient(account, key, container, false, new PeloponneseLogger(Log.Logger)); client.SetParallelThreadCount(4); NameValueCollection query = System.Web.HttpUtility.ParseQueryString(source.Query); if (query["blobs"] == null) { blobIndex = -1; } else { blobIndex = 0; } await OpenBlob(); }
public override Uri RewriteUri <T>(DryadLinqContext context, Uri dataSetUri, FileAccess access) { string account, key, container, blob; AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); UriBuilder builder = new UriBuilder(dataSetUri); NameValueCollection query = System.Web.HttpUtility.ParseQueryString(builder.Query); if (key == null) { query["key"] = context.AzureAccountKey(account); } if (access != FileAccess.Write && typeof(T) == typeof(Microsoft.Research.DryadLinq.LineRecord)) { query["seekBoundaries"] = "Microsoft.Research.DryadLinq.LineRecord"; } builder.Query = query.ToString(); return(builder.Uri); }
private async Task OpenBlob() { string account, key, container, blobName; AzureUtils.FromAzureUri(source, out account, out key, out container, out blobName); NameValueCollection query = System.Web.HttpUtility.ParseQueryString(source.Query); if (blobIndex >= 0) { string[] blobs = query["blobs"].Split(','); blobName += blobs[blobIndex]; ++blobIndex; } Log.LogInformation("Opening read for blob " + blobName); readStream = (await client.GetDfsFileStreamAsync(blobName, ExecutionTimeout, new PeloponneseLogger(Log.Logger))).Stream; long offset = -1; if (query["offset"] != null) { offset = Int64.Parse(query["offset"]); readStream.Seek(offset, SeekOrigin.Begin); } bytesToRead = Int64.MaxValue; if (query["length"] != null) { bytesToRead = Int64.Parse(query["length"]); } if (query["seekboundaries"] != null) { if (offset == -1 || bytesToRead == Int64.MaxValue) { throw new ApplicationException("Reading " + source.AbsoluteUri + ": Can't look for line endings without block start and end metadata"); } if (query["seekboundaries"] != "Microsoft.Research.DryadLinq.LineRecord") { throw new ApplicationException("Reading " + source.AbsoluteUri + ": Don't know how to seek for record boundaries of type " + query["seekboundaries"]); } // SeekLineRecordBoundaries updates bytesToRead offset = await SeekLineRecordBoundaries(offset); readStream.Seek(offset, SeekOrigin.Begin); } long thisLength; if (bytesToRead == Int64.MaxValue) { thisLength = readStream.Length; } else { thisLength = bytesToRead; } long currentLength = TotalLength; if (currentLength == -1) { currentLength = thisLength; Log.LogInformation("Setting Azure read total length to " + currentLength); } else { currentLength += thisLength; Log.LogInformation("Increasing Azure read total length to " + thisLength); } TotalLength = currentLength; }