Ejemplo n.º 1
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp = false)
        {
            string account, key, container, blob;

            AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob);
            if (compressionScheme != CompressionScheme.None)
            {
                throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled");
            }
            AzureDfsClient       client  = new AzureDfsClient(account, key, container);
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (Stream stream = client.GetFileStreamWriterAsync(blob).Result)
            {
                DryadLinqBlockStream      nativeStream = new DryadLinqBlockStream(stream);
                DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                foreach (T rec in source)
                {
                    writer.WriteRecordSync(rec);
                }
                writer.Close();
            }
        }
Ejemplo n.º 2
0
        protected override async Task Open()
        {
            Log.LogInformation("Opening read for " + source.AbsoluteUri);
            string account, key, container, blobName;

            AzureUtils.FromAzureUri(source, out account, out key, out container, out blobName);
            client = new AzureDfsClient(account, key, container, false, new PeloponneseLogger(Log.Logger));
            client.SetParallelThreadCount(4);

            NameValueCollection query = System.Web.HttpUtility.ParseQueryString(source.Query);

            if (query["blobs"] == null)
            {
                blobIndex = -1;
            }
            else
            {
                blobIndex = 0;
            }

            await OpenBlob();
        }
Ejemplo n.º 3
0
        public override Uri RewriteUri <T>(DryadLinqContext context, Uri dataSetUri, FileAccess access)
        {
            string account, key, container, blob;

            AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob);

            UriBuilder          builder = new UriBuilder(dataSetUri);
            NameValueCollection query   = System.Web.HttpUtility.ParseQueryString(builder.Query);

            if (key == null)
            {
                query["key"] = context.AzureAccountKey(account);
            }

            if (access != FileAccess.Write &&
                typeof(T) == typeof(Microsoft.Research.DryadLinq.LineRecord))
            {
                query["seekBoundaries"] = "Microsoft.Research.DryadLinq.LineRecord";
            }

            builder.Query = query.ToString();
            return(builder.Uri);
        }
Ejemplo n.º 4
0
        private async Task OpenBlob()
        {
            string account, key, container, blobName;

            AzureUtils.FromAzureUri(source, out account, out key, out container, out blobName);

            NameValueCollection query = System.Web.HttpUtility.ParseQueryString(source.Query);

            if (blobIndex >= 0)
            {
                string[] blobs = query["blobs"].Split(',');
                blobName += blobs[blobIndex];
                ++blobIndex;
            }

            Log.LogInformation("Opening read for blob " + blobName);

            readStream = (await client.GetDfsFileStreamAsync(blobName, ExecutionTimeout, new PeloponneseLogger(Log.Logger))).Stream;

            long offset = -1;

            if (query["offset"] != null)
            {
                offset = Int64.Parse(query["offset"]);
                readStream.Seek(offset, SeekOrigin.Begin);
            }

            bytesToRead = Int64.MaxValue;
            if (query["length"] != null)
            {
                bytesToRead = Int64.Parse(query["length"]);
            }

            if (query["seekboundaries"] != null)
            {
                if (offset == -1 || bytesToRead == Int64.MaxValue)
                {
                    throw new ApplicationException("Reading " + source.AbsoluteUri + ": Can't look for line endings without block start and end metadata");
                }

                if (query["seekboundaries"] != "Microsoft.Research.DryadLinq.LineRecord")
                {
                    throw new ApplicationException("Reading " + source.AbsoluteUri + ": Don't know how to seek for record boundaries of type " + query["seekboundaries"]);
                }

                // SeekLineRecordBoundaries updates bytesToRead
                offset = await SeekLineRecordBoundaries(offset);

                readStream.Seek(offset, SeekOrigin.Begin);
            }

            long thisLength;

            if (bytesToRead == Int64.MaxValue)
            {
                thisLength = readStream.Length;
            }
            else
            {
                thisLength = bytesToRead;
            }

            long currentLength = TotalLength;

            if (currentLength == -1)
            {
                currentLength = thisLength;
                Log.LogInformation("Setting Azure read total length to " + currentLength);
            }
            else
            {
                currentLength += thisLength;
                Log.LogInformation("Increasing Azure read total length to " + thisLength);
            }

            TotalLength = currentLength;
        }