Example #1
0
 /// <summary>
 /// Ingress a .NET collection to a specified store location.
 /// </summary>
 /// <typeparam name="T">The record type of the collection.</typeparam>
 /// <param name="context">An instance of DryadLinqContext.</param>
 /// <param name="source">The collection to be ingressed.</param>
 /// <param name="dataSetUri">The URI to store the collection.</param>
 /// <param name="metaData">The metadata for the collection.</param>
 /// <param name="outputScheme">The compression scheme used to store the collection.</param>
 /// <param name="isTemp">true to only store the collection temporarily with a time lease.</param>
 /// <param name="serializer">A stream-based serializer.</param>
 public abstract void Ingress <T>(DryadLinqContext context,
                                  IEnumerable <T> source,
                                  Uri dataSetUri,
                                  DryadLinqMetaData metaData,
                                  CompressionScheme outputScheme,
                                  bool isTemp,
                                  Expression <Action <IEnumerable <T>, Stream> > serializer);
Example #2
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp = false)
        {
            string account, key, container, blob;

            AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob);
            if (compressionScheme != CompressionScheme.None)
            {
                throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled");
            }
            AzureDfsClient       client  = new AzureDfsClient(account, key, container);
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (Stream stream = client.GetFileStreamWriterAsync(blob).Result)
            {
                DryadLinqBlockStream      nativeStream = new DryadLinqBlockStream(stream);
                DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                foreach (T rec in source)
                {
                    writer.WriteRecordSync(rec);
                }
                writer.Close();
            }
        }
Example #3
0
        // Load a DryadLinqMetaData from an existing dsc stream.
        internal static DryadLinqMetaData FromDscStream(HpcLinqContext context, string dscStreamName)
        {
            DryadLinqMetaData metaData;

            try
            {
                DscFileSet fs = context.DscService.GetFileSet(dscStreamName);
                metaData                 = new DryadLinqMetaData();
                metaData.m_context       = context;
                metaData.m_dscStreamName = dscStreamName;
                //metaData.m_fp = 0L;
                //metaData.m_dataSetInfo = null;

                byte[] metaDataBytes;

                //record-type
                metaDataBytes = fs.GetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME);
                if (metaDataBytes != null)
                {
                    string recordTypeString = Encoding.UTF8.GetString(metaDataBytes);
                    metaData.m_elemType = Type.GetType(recordTypeString);
                }

                //Compression-scheme
                metaData.m_compressionScheme = fs.CompressionScheme;
            }
            catch (Exception e)
            {
                throw new DryadLinqException(HpcLinqErrorCode.ErrorReadingMetadata,
                                             String.Format(SR.ErrorReadingMetadata), e);
            }

            return(metaData);
        }
Example #4
0
        /// <summary>
        /// Converts an IEnumerable{T} to a DryadLinq specialized IQueryable{T}.
        /// </summary>
        /// <typeparam name="T">The type of the records in the table.</typeparam>
        /// <param name="data">The source data.</param>
        /// <returns>An IQueryable{T} representing the data with DryadLinq query provider.</returns>
        /// <remarks>
        /// The source data will be serialized to a temp stream.
        /// The resulting fileset has an auto-generated name and a temporary lease.
        /// </remarks>
        public IQueryable <T> FromEnumerable <T>(IEnumerable <T> data)
        {
            Uri dataSetName = this.MakeTemporaryStreamUri();
            CompressionScheme compressionScheme = this.OutputDataCompressionScheme;
            DryadLinqMetaData metadata          = new DryadLinqMetaData(this, typeof(T), dataSetName, compressionScheme);

            return(DataProvider.StoreData(this, data, dataSetName, metadata, compressionScheme, true));
        }
Example #5
0
        /// <summary>
        /// Converts an IEnumerable{T} to a LINQ-to-HPC IQueryable{T}.
        /// </summary>
        /// <typeparam name="T">The type of the records in the table.</typeparam>
        /// <param name="data">The source data.</param>
        /// <returns>An IQueryable{T} representing the data and associated with the HPC LINQ query provider.</returns>
        /// <remarks>
        /// The source data will be serialized to a DSC fileset using the LINQ-to-HPC serialization approach.
        /// The resulting fileset will have an auto-generated name and a temporary lease.
        /// </remarks>
        public IQueryable <T> FromEnumerable <T>(IEnumerable <T> data)
        {
            string fileSetName = DataPath.MakeUniqueTemporaryDscFileSetName();
            DscCompressionScheme compressionScheme = Configuration.IntermediateDataCompressionScheme;
            DryadLinqMetaData    metadata          = DryadLinqMetaData.ForLocalDebug(this, typeof(T), fileSetName, compressionScheme);

            return(DataProvider.IngressTemporaryDataDirectlyToDsc(this, data, fileSetName, metadata, compressionScheme));
        }
Example #6
0
 public override void Ingress <T>(DryadLinqContext context,
                                  IEnumerable <T> source,
                                  Uri dataSetUri,
                                  DryadLinqMetaData metaData,
                                  CompressionScheme outputScheme,
                                  bool isTemp = false)
 {
     throw new DryadLinqException("TBA");
 }
Example #7
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp,
                                         Expression <Action <IEnumerable <T>, Stream> > serializer)
        {
            string fileName = dataSetUri.LocalPath;

            if (!String.IsNullOrEmpty(dataSetUri.Host))
            {
                fileName = @"\\" + dataSetUri.Host + fileName;
            }

            // Write the partition:
            string partDir = Path.GetDirectoryName(fileName);

            partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName());
            Directory.CreateDirectory(partDir);
            string uncPath               = Path.Combine(partDir, "Part");
            string partitionPath         = uncPath + ".00000000";
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (FileStream fstream = new FileStream(partitionPath, FileMode.CreateNew, FileAccess.Write))
            {
                if (serializer == null)
                {
                    DryadLinqFileBlockStream  nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme);
                    DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                    foreach (T rec in source)
                    {
                        writer.WriteRecordSync(rec);
                    }
                    writer.Close();
                }
                else
                {
                    Action <IEnumerable <T>, Stream> serializerFunc = serializer.Compile();
                    serializerFunc(source, fstream);
                }
            }

            // Write the partfile:
            long partSize = new FileInfo(partitionPath).Length;

            using (StreamWriter writer = File.CreateText(fileName))
            {
                writer.WriteLine(uncPath);
                writer.WriteLine("1");
                writer.WriteLine("{0},{1}", 0, partSize);
            }
        }
Example #8
0
        /// <summary>
        /// Stores an IEnumerable{T} at a specified location.
        /// </summary>
        /// <typeparam name="T">The record type of the data.</typeparam>
        /// <param name="context">An instance of <see cref="DryadLinqContext"/></param>
        /// <param name="source">The data to store.</param>
        /// <param name="dataSetUri">The URI of the store location.</param>
        /// <param name="metaData">The metadata of the data.</param>
        /// <param name="outputScheme">The compression scheme.</param>
        /// <param name="isTemp">true if the data is only stored temporarily.</param>
        /// <returns>An instance of IQueryable{T} for the data.</returns>
        internal static DryadLinqQuery <T> StoreData <T>(DryadLinqContext context,
                                                         IEnumerable <T> source,
                                                         Uri dataSetUri,
                                                         DryadLinqMetaData metaData,
                                                         CompressionScheme outputScheme,
                                                         bool isTemp = false)
        {
            string       scheme       = DataPath.GetScheme(dataSetUri);
            DataProvider dataProvider = DataProvider.GetDataProvider(scheme);

            dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri);
            dataProvider.Ingress(context, source, dataSetUri, metaData, outputScheme, isTemp);
            return(DataProvider.GetPartitionedTable <T>(context, dataSetUri));
        }
Example #9
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp = false)
        {
            // Write the partition:
            string partDir = context.PartitionUncPath;

            if (partDir == null)
            {
                partDir = Path.GetDirectoryName(dataSetUri.LocalPath);
            }

            if (!Path.IsPathRooted(partDir))
            {
                partDir = Path.Combine("/", partDir);
            }
            partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName());
            Directory.CreateDirectory(partDir);
            string partPath              = Path.Combine(partDir, "Part");
            string partFilePath          = partPath + ".00000000";
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (FileStream fstream = new FileStream(partFilePath, FileMode.CreateNew, FileAccess.Write))
            {
                DryadLinqFileBlockStream  nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme);
                DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                foreach (T rec in source)
                {
                    writer.WriteRecordSync(rec);
                }
                writer.Close();
            }

            // Write the partfile:
            FileInfo finfo = new FileInfo(partFilePath);

            using (StreamWriter writer = File.CreateText(dataSetUri.LocalPath))
            {
                writer.WriteLine(partPath);
                writer.WriteLine("1");
                writer.WriteLine("{0},{1},{2}", 0, finfo.Length, Environment.MachineName);
            }
        }
Example #10
0
        /// <summary>
        /// Stores an IEnumerable{T} at a specified location.
        /// </summary>
        /// <typeparam name="T">The record type of the data.</typeparam>
        /// <param name="context">An instance of <see cref="DryadLinqContext"/></param>
        /// <param name="source">The data to store.</param>
        /// <param name="dataSetUri">The URI of the store location.</param>
        /// <param name="metaData">The metadata of the data.</param>
        /// <param name="outputScheme">The compression scheme.</param>
        /// <param name="isTemp">true if the data is only stored temporarily.</param>
        /// <param name="serializer">A stream-based serializer</param>
        /// <param name="deserializer">A stream-based deserializer</param>
        /// <returns>An instance of IQueryable{T} for the data.</returns>
        internal static DryadLinqQuery <T> StoreData <T>(DryadLinqContext context,
                                                         IEnumerable <T> source,
                                                         Uri dataSetUri,
                                                         DryadLinqMetaData metaData,
                                                         CompressionScheme outputScheme,
                                                         bool isTemp,
                                                         Expression <Action <IEnumerable <T>, Stream> > serializer,
                                                         Expression <Func <Stream, IEnumerable <T> > > deserializer)
        {
            string       scheme       = DataPath.GetScheme(dataSetUri);
            DataProvider dataProvider = DataProvider.GetDataProvider(scheme);

            dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri);
            dataProvider.Ingress(context, source, dataSetUri, metaData, outputScheme, isTemp, serializer);
            DryadLinqQuery <T> res = DataProvider.GetPartitionedTable <T>(context, dataSetUri, deserializer);

            res.CheckAndInitialize();    // must initialize
            return(res);
        }
Example #11
0
        internal static DryadLinqMetaData ForLocalDebug(HpcLinqContext context,
                                                        Type recordType,
                                                        string dscStreamName,
                                                        DscCompressionScheme compressionScheme)
        {
            DryadLinqMetaData metaData = new DryadLinqMetaData();

            metaData.m_context           = context;
            metaData.m_dscStreamName     = dscStreamName;
            metaData.m_elemType          = recordType;
            metaData.m_compressionScheme = compressionScheme;
            //metaData.m_version = context.ClientVersion;
            //metaData.InitializeFlags();

            //metaData.m_fp = 0UL;
            //metaData.m_dataSetInfo = node.OutputDataSetInfo;

            return(metaData);
        }
Example #12
0
        public IEnumerator <T> GetEnumerator()
        {
            List <string[]>      filePathList; // a list of dsc files, each of which is represented by an array holding the replica paths
            DscCompressionScheme compressionScheme;

            try
            {
                DscFileSet fileSet = m_context.DscService.GetFileSet(m_fileSetName);
                filePathList = fileSet.GetFiles().Select(file => file.ReadPaths).ToList();
                DryadLinqMetaData metaData = DryadLinqMetaData.FromDscStream(m_context, m_fileSetName);
                compressionScheme = metaData.CompressionScheme;
            }
            catch (Exception e)
            {
                throw new DryadLinqException(HpcLinqErrorCode.FailedToGetReadPathsForStream,
                                             String.Format(SR.FailedToGetReadPathsForStream, this.m_fileSetName), e);
            }

            return(new TableEnumerator(m_context, filePathList, m_fileSetName, compressionScheme));
        }
        // create DryadLinqMetaData from a query OutputNode
        internal static DryadLinqMetaData Get(DryadLinqContext context, DLinqOutputNode node)
        {
            DryadLinqMetaData metaData = new DryadLinqMetaData();

            if (!DataPath.IsValidDataPath(node.OutputUri))
            {
                throw new InvalidOperationException();
            }

            metaData.m_context = context;
            metaData.m_dataSetUri = node.OutputUri;
            metaData.m_elemType = node.OutputTypes[0];
            metaData.m_compressionScheme = node.OutputCompressionScheme;
            //metaData.m_version = context.ClientVersion();
            //metaData.InitializeFlags();
            
            //metaData.m_fp = 0UL;
            //metaData.m_dataSetInfo = node.OutputDataSetInfo;

            return metaData;
        }
Example #14
0
        // create DryadLinqMetaData from a query OutputNode
        internal static DryadLinqMetaData FromOutputNode(HpcLinqContext context, DryadOutputNode node)
        {
            DryadLinqMetaData metaData = new DryadLinqMetaData();

            if (!(DataPath.IsDsc(node.MetaDataUri) || DataPath.IsHdfs(node.MetaDataUri)))
            {
                throw new InvalidOperationException();
            }

            metaData.m_context           = context;
            metaData.m_dscStreamName     = node.MetaDataUri;
            metaData.m_elemType          = node.OutputTypes[0];
            metaData.m_compressionScheme = node.OutputCompressionScheme;
            //metaData.m_version = context.ClientVersion;
            //metaData.InitializeFlags();

            //metaData.m_fp = 0UL;
            //metaData.m_dataSetInfo = node.OutputDataSetInfo;

            return(metaData);
        }
Example #15
0
        // create DryadLinqMetaData from a query OutputNode
        internal static DryadLinqMetaData Get(DryadLinqContext context, DLinqOutputNode node)
        {
            DryadLinqMetaData metaData = new DryadLinqMetaData();

            if (!DataPath.IsValidDataPath(node.OutputUri))
            {
                throw new InvalidOperationException();
            }

            metaData.m_context           = context;
            metaData.m_dataSetUri        = node.OutputUri;
            metaData.m_elemType          = node.OutputTypes[0];
            metaData.m_compressionScheme = node.OutputCompressionScheme;
            //metaData.m_version = context.ClientVersion();
            //metaData.InitializeFlags();

            //metaData.m_fp = 0UL;
            //metaData.m_dataSetInfo = node.OutputDataSetInfo;

            return(metaData);
        }
Example #16
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme outputScheme,
                                         bool isTemp,
                                         Expression <Action <IEnumerable <T>, Stream> > serializer)
        {
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (Stream stream = context.GetHdfsClient.GetDfsStreamWriter(dataSetUri))
            {
                DryadLinqBlockStream      nativeStream = new DryadLinqBlockStream(stream);
                DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                foreach (T rec in source)
                {
                    writer.WriteRecordSync(rec);
                }
                writer.Close();
            }
        }
Example #17
0
 /// <summary>
 /// Ingress a .NET collection to a specified store location.
 /// </summary>
 /// <typeparam name="T">The record type of the collection.</typeparam>
 /// <param name="context">An instance of DryadLinqContext.</param>
 /// <param name="source">The collection to be ingressed.</param>
 /// <param name="dataSetUri">The URI to store the collection.</param>
 /// <param name="metaData">The metadata for the collection.</param>
 /// <param name="outputScheme">The compression scheme used to store the collection.</param>
 /// <param name="isTemp">true to only store the collection temporarily with a time lease.</param>
 public abstract void Ingress <T>(DryadLinqContext context,
                                  IEnumerable <T> source,
                                  Uri dataSetUri,
                                  DryadLinqMetaData metaData,
                                  CompressionScheme outputScheme,
                                  bool isTemp = false);
Example #18
0
        // create DryadLinqMetaData from a query OutputNode
        internal static DryadLinqMetaData FromOutputNode(HpcLinqContext context, DryadOutputNode node)
        {
            DryadLinqMetaData metaData = new DryadLinqMetaData();

            if (! (DataPath.IsDsc(node.MetaDataUri) || DataPath.IsHdfs(node.MetaDataUri)) )
            {
                throw new InvalidOperationException();
            }

            metaData.m_context = context;
            metaData.m_dscStreamName = node.MetaDataUri;
            metaData.m_elemType = node.OutputTypes[0];
            metaData.m_compressionScheme = node.OutputCompressionScheme;
            //metaData.m_version = context.ClientVersion;
            //metaData.InitializeFlags();

            //metaData.m_fp = 0UL;
            //metaData.m_dataSetInfo = node.OutputDataSetInfo;

            return metaData;
        }
Example #19
0
        // Load a DryadLinqMetaData from an existing dsc stream.
        internal static DryadLinqMetaData FromDscStream(HpcLinqContext context, string dscStreamName)
        {
            DryadLinqMetaData metaData;
            try
            {
                DscFileSet fs = context.DscService.GetFileSet(dscStreamName);
                metaData = new DryadLinqMetaData();
                metaData.m_context = context;
                metaData.m_dscStreamName = dscStreamName;
                //metaData.m_fp = 0L;
                //metaData.m_dataSetInfo = null;

                byte[] metaDataBytes;

                //record-type
                metaDataBytes = fs.GetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME);
                if (metaDataBytes != null)
                {
                    string recordTypeString = Encoding.UTF8.GetString(metaDataBytes);
                    metaData.m_elemType = Type.GetType(recordTypeString);
                }

                //Compression-scheme
                metaData.m_compressionScheme = fs.CompressionScheme;
            }
            catch (Exception e)
            {
                throw new DryadLinqException(HpcLinqErrorCode.ErrorReadingMetadata,
                                           String.Format(SR.ErrorReadingMetadata), e);
            }

            return metaData;
        }
Example #20
0
        internal static DryadLinqMetaData ForLocalDebug(HpcLinqContext context,
                                                        Type recordType,
                                                        string dscStreamName,
                                                        DscCompressionScheme compressionScheme)
        {
            DryadLinqMetaData metaData = new DryadLinqMetaData();

            metaData.m_context = context;
            metaData.m_dscStreamName = dscStreamName;
            metaData.m_elemType = recordType;
            metaData.m_compressionScheme = compressionScheme;
            //metaData.m_version = context.ClientVersion;
            //metaData.InitializeFlags();

            //metaData.m_fp = 0UL;
            //metaData.m_dataSetInfo = node.OutputDataSetInfo;

            return metaData;
        }
Example #21
0
        // ingresses data, and also sets the temporary-length lease.
        internal static DryadLinqQuery <T> IngressTemporaryDataDirectlyToDsc <T>(HpcLinqContext context, IEnumerable <T> source, string dscFileSetName, DryadLinqMetaData metaData, DscCompressionScheme outputScheme)
        {
            DryadLinqQuery <T> result = IngressDataDirectlyToDsc(context, source, dscFileSetName, metaData, outputScheme);

            // try to set a temporary lease on the resulting fileset
            try
            {
                DscFileSet fs = context.DscService.GetFileSet(dscFileSetName);
                fs.SetLeaseEndTime(DateTime.Now.Add(StaticConfig.LeaseDurationForTempFiles));
            }
            catch (DscException)
            {
                // suppress
            }

            return(result);
        }
Example #22
0
        //* streams plain enumerable data directly to DSC
        internal static DryadLinqQuery <T> IngressDataDirectlyToDsc <T>(HpcLinqContext context,
                                                                        IEnumerable <T> source,
                                                                        string dscFileSetName,
                                                                        DryadLinqMetaData metaData,
                                                                        DscCompressionScheme outputScheme)
        {
            try
            {
                string dscFileSetUri = DataPath.MakeDscStreamUri(context.DscService.HostName, dscFileSetName);
                if (source.Take(1).Count() == 0)
                {
                    //there is no data.. we must create a FileSet with an empty file
                    //(the factory/stream approach opens files lazily and thus never opens a file if there is no data)


                    if (context.DscService.FileSetExists(dscFileSetName))
                    {
                        context.DscService.DeleteFileSet(dscFileSetName);
                    }
                    DscFileSet fileSet   = context.DscService.CreateFileSet(dscFileSetName, outputScheme);
                    DscFile    file      = fileSet.AddNewFile(0);
                    string     writePath = file.WritePath;


                    if (outputScheme == DscCompressionScheme.Gzip)
                    {
                        //even zero-byte file must go through the gzip-compressor (for headers etc).
                        using (Stream s = new FileStream(writePath, FileMode.Create))
                        {
                            var gzipStream = new GZipStream(s, CompressionMode.Compress);
                            gzipStream.Close();
                        }
                    }
                    else
                    {
                        StreamWriter sw = new StreamWriter(writePath, false);
                        sw.Close();
                    }
                    fileSet.Seal();
                }
                else
                {
                    HpcLinqFactory <T> factory = (HpcLinqFactory <T>)HpcLinqCodeGen.GetFactory(context, typeof(T));

                    // new DscBlockStream(uri,Create,Write,compress) provides a DSC stream with one partition.
                    NativeBlockStream   nativeStream = new DscBlockStream(dscFileSetUri, FileMode.Create, FileAccess.Write, outputScheme);
                    HpcRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                    try
                    {
                        if (context.Configuration.AllowConcurrentUserDelegatesInSingleProcess)
                        {
                            foreach (T item in source)
                            {
                                writer.WriteRecordAsync(item);
                            }
                        }
                        else
                        {
                            foreach (T item in source)
                            {
                                writer.WriteRecordSync(item);
                            }
                        }
                    }
                    finally
                    {
                        writer.Close(); // closes the NativeBlockStream, which seals the dsc stream.
                    }
                }

                if (metaData != null)
                {
                    DscFileSet fileSet = context.DscService.GetFileSet(dscFileSetName);
                    fileSet.SetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME, Encoding.UTF8.GetBytes(metaData.ElemType.AssemblyQualifiedName));
                }

                return(DataProvider.GetPartitionedTable <T>(context, dscFileSetUri));
            }
            catch
            {
                // if we had a problem creating the empty fileset, try to delete it to avoid cruft being left in DSC.
                try
                {
                    context.DscService.DeleteFileSet(dscFileSetName);
                }
                catch
                {
                    // suppress error during delete
                }

                throw; // rethrow the original exception.
            }
        }