/// <summary> /// Ingress a .NET collection to a specified store location. /// </summary> /// <typeparam name="T">The record type of the collection.</typeparam> /// <param name="context">An instance of DryadLinqContext.</param> /// <param name="source">The collection to be ingressed.</param> /// <param name="dataSetUri">The URI to store the collection.</param> /// <param name="metaData">The metadata for the collection.</param> /// <param name="outputScheme">The compression scheme used to store the collection.</param> /// <param name="isTemp">true to only store the collection temporarily with a time lease.</param> /// <param name="serializer">A stream-based serializer.</param> public abstract void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp, Expression <Action <IEnumerable <T>, Stream> > serializer);
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp = false) { string account, key, container, blob; AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); if (compressionScheme != CompressionScheme.None) { throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled"); } AzureDfsClient client = new AzureDfsClient(account, key, container); DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (Stream stream = client.GetFileStreamWriterAsync(blob).Result) { DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } }
// Load a DryadLinqMetaData from an existing dsc stream. internal static DryadLinqMetaData FromDscStream(HpcLinqContext context, string dscStreamName) { DryadLinqMetaData metaData; try { DscFileSet fs = context.DscService.GetFileSet(dscStreamName); metaData = new DryadLinqMetaData(); metaData.m_context = context; metaData.m_dscStreamName = dscStreamName; //metaData.m_fp = 0L; //metaData.m_dataSetInfo = null; byte[] metaDataBytes; //record-type metaDataBytes = fs.GetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME); if (metaDataBytes != null) { string recordTypeString = Encoding.UTF8.GetString(metaDataBytes); metaData.m_elemType = Type.GetType(recordTypeString); } //Compression-scheme metaData.m_compressionScheme = fs.CompressionScheme; } catch (Exception e) { throw new DryadLinqException(HpcLinqErrorCode.ErrorReadingMetadata, String.Format(SR.ErrorReadingMetadata), e); } return(metaData); }
/// <summary> /// Converts an IEnumerable{T} to a DryadLinq specialized IQueryable{T}. /// </summary> /// <typeparam name="T">The type of the records in the table.</typeparam> /// <param name="data">The source data.</param> /// <returns>An IQueryable{T} representing the data with DryadLinq query provider.</returns> /// <remarks> /// The source data will be serialized to a temp stream. /// The resulting fileset has an auto-generated name and a temporary lease. /// </remarks> public IQueryable <T> FromEnumerable <T>(IEnumerable <T> data) { Uri dataSetName = this.MakeTemporaryStreamUri(); CompressionScheme compressionScheme = this.OutputDataCompressionScheme; DryadLinqMetaData metadata = new DryadLinqMetaData(this, typeof(T), dataSetName, compressionScheme); return(DataProvider.StoreData(this, data, dataSetName, metadata, compressionScheme, true)); }
/// <summary> /// Converts an IEnumerable{T} to a LINQ-to-HPC IQueryable{T}. /// </summary> /// <typeparam name="T">The type of the records in the table.</typeparam> /// <param name="data">The source data.</param> /// <returns>An IQueryable{T} representing the data and associated with the HPC LINQ query provider.</returns> /// <remarks> /// The source data will be serialized to a DSC fileset using the LINQ-to-HPC serialization approach. /// The resulting fileset will have an auto-generated name and a temporary lease. /// </remarks> public IQueryable <T> FromEnumerable <T>(IEnumerable <T> data) { string fileSetName = DataPath.MakeUniqueTemporaryDscFileSetName(); DscCompressionScheme compressionScheme = Configuration.IntermediateDataCompressionScheme; DryadLinqMetaData metadata = DryadLinqMetaData.ForLocalDebug(this, typeof(T), fileSetName, compressionScheme); return(DataProvider.IngressTemporaryDataDirectlyToDsc(this, data, fileSetName, metadata, compressionScheme)); }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp = false) { throw new DryadLinqException("TBA"); }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp, Expression <Action <IEnumerable <T>, Stream> > serializer) { string fileName = dataSetUri.LocalPath; if (!String.IsNullOrEmpty(dataSetUri.Host)) { fileName = @"\\" + dataSetUri.Host + fileName; } // Write the partition: string partDir = Path.GetDirectoryName(fileName); partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName()); Directory.CreateDirectory(partDir); string uncPath = Path.Combine(partDir, "Part"); string partitionPath = uncPath + ".00000000"; DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (FileStream fstream = new FileStream(partitionPath, FileMode.CreateNew, FileAccess.Write)) { if (serializer == null) { DryadLinqFileBlockStream nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } else { Action <IEnumerable <T>, Stream> serializerFunc = serializer.Compile(); serializerFunc(source, fstream); } } // Write the partfile: long partSize = new FileInfo(partitionPath).Length; using (StreamWriter writer = File.CreateText(fileName)) { writer.WriteLine(uncPath); writer.WriteLine("1"); writer.WriteLine("{0},{1}", 0, partSize); } }
/// <summary> /// Stores an IEnumerable{T} at a specified location. /// </summary> /// <typeparam name="T">The record type of the data.</typeparam> /// <param name="context">An instance of <see cref="DryadLinqContext"/></param> /// <param name="source">The data to store.</param> /// <param name="dataSetUri">The URI of the store location.</param> /// <param name="metaData">The metadata of the data.</param> /// <param name="outputScheme">The compression scheme.</param> /// <param name="isTemp">true if the data is only stored temporarily.</param> /// <returns>An instance of IQueryable{T} for the data.</returns> internal static DryadLinqQuery <T> StoreData <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp = false) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri); dataProvider.Ingress(context, source, dataSetUri, metaData, outputScheme, isTemp); return(DataProvider.GetPartitionedTable <T>(context, dataSetUri)); }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp = false) { // Write the partition: string partDir = context.PartitionUncPath; if (partDir == null) { partDir = Path.GetDirectoryName(dataSetUri.LocalPath); } if (!Path.IsPathRooted(partDir)) { partDir = Path.Combine("/", partDir); } partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName()); Directory.CreateDirectory(partDir); string partPath = Path.Combine(partDir, "Part"); string partFilePath = partPath + ".00000000"; DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (FileStream fstream = new FileStream(partFilePath, FileMode.CreateNew, FileAccess.Write)) { DryadLinqFileBlockStream nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } // Write the partfile: FileInfo finfo = new FileInfo(partFilePath); using (StreamWriter writer = File.CreateText(dataSetUri.LocalPath)) { writer.WriteLine(partPath); writer.WriteLine("1"); writer.WriteLine("{0},{1},{2}", 0, finfo.Length, Environment.MachineName); } }
/// <summary> /// Stores an IEnumerable{T} at a specified location. /// </summary> /// <typeparam name="T">The record type of the data.</typeparam> /// <param name="context">An instance of <see cref="DryadLinqContext"/></param> /// <param name="source">The data to store.</param> /// <param name="dataSetUri">The URI of the store location.</param> /// <param name="metaData">The metadata of the data.</param> /// <param name="outputScheme">The compression scheme.</param> /// <param name="isTemp">true if the data is only stored temporarily.</param> /// <param name="serializer">A stream-based serializer</param> /// <param name="deserializer">A stream-based deserializer</param> /// <returns>An instance of IQueryable{T} for the data.</returns> internal static DryadLinqQuery <T> StoreData <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp, Expression <Action <IEnumerable <T>, Stream> > serializer, Expression <Func <Stream, IEnumerable <T> > > deserializer) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri); dataProvider.Ingress(context, source, dataSetUri, metaData, outputScheme, isTemp, serializer); DryadLinqQuery <T> res = DataProvider.GetPartitionedTable <T>(context, dataSetUri, deserializer); res.CheckAndInitialize(); // must initialize return(res); }
internal static DryadLinqMetaData ForLocalDebug(HpcLinqContext context, Type recordType, string dscStreamName, DscCompressionScheme compressionScheme) { DryadLinqMetaData metaData = new DryadLinqMetaData(); metaData.m_context = context; metaData.m_dscStreamName = dscStreamName; metaData.m_elemType = recordType; metaData.m_compressionScheme = compressionScheme; //metaData.m_version = context.ClientVersion; //metaData.InitializeFlags(); //metaData.m_fp = 0UL; //metaData.m_dataSetInfo = node.OutputDataSetInfo; return(metaData); }
public IEnumerator <T> GetEnumerator() { List <string[]> filePathList; // a list of dsc files, each of which is represented by an array holding the replica paths DscCompressionScheme compressionScheme; try { DscFileSet fileSet = m_context.DscService.GetFileSet(m_fileSetName); filePathList = fileSet.GetFiles().Select(file => file.ReadPaths).ToList(); DryadLinqMetaData metaData = DryadLinqMetaData.FromDscStream(m_context, m_fileSetName); compressionScheme = metaData.CompressionScheme; } catch (Exception e) { throw new DryadLinqException(HpcLinqErrorCode.FailedToGetReadPathsForStream, String.Format(SR.FailedToGetReadPathsForStream, this.m_fileSetName), e); } return(new TableEnumerator(m_context, filePathList, m_fileSetName, compressionScheme)); }
// create DryadLinqMetaData from a query OutputNode internal static DryadLinqMetaData Get(DryadLinqContext context, DLinqOutputNode node) { DryadLinqMetaData metaData = new DryadLinqMetaData(); if (!DataPath.IsValidDataPath(node.OutputUri)) { throw new InvalidOperationException(); } metaData.m_context = context; metaData.m_dataSetUri = node.OutputUri; metaData.m_elemType = node.OutputTypes[0]; metaData.m_compressionScheme = node.OutputCompressionScheme; //metaData.m_version = context.ClientVersion(); //metaData.InitializeFlags(); //metaData.m_fp = 0UL; //metaData.m_dataSetInfo = node.OutputDataSetInfo; return metaData; }
// create DryadLinqMetaData from a query OutputNode internal static DryadLinqMetaData FromOutputNode(HpcLinqContext context, DryadOutputNode node) { DryadLinqMetaData metaData = new DryadLinqMetaData(); if (!(DataPath.IsDsc(node.MetaDataUri) || DataPath.IsHdfs(node.MetaDataUri))) { throw new InvalidOperationException(); } metaData.m_context = context; metaData.m_dscStreamName = node.MetaDataUri; metaData.m_elemType = node.OutputTypes[0]; metaData.m_compressionScheme = node.OutputCompressionScheme; //metaData.m_version = context.ClientVersion; //metaData.InitializeFlags(); //metaData.m_fp = 0UL; //metaData.m_dataSetInfo = node.OutputDataSetInfo; return(metaData); }
// create DryadLinqMetaData from a query OutputNode internal static DryadLinqMetaData Get(DryadLinqContext context, DLinqOutputNode node) { DryadLinqMetaData metaData = new DryadLinqMetaData(); if (!DataPath.IsValidDataPath(node.OutputUri)) { throw new InvalidOperationException(); } metaData.m_context = context; metaData.m_dataSetUri = node.OutputUri; metaData.m_elemType = node.OutputTypes[0]; metaData.m_compressionScheme = node.OutputCompressionScheme; //metaData.m_version = context.ClientVersion(); //metaData.InitializeFlags(); //metaData.m_fp = 0UL; //metaData.m_dataSetInfo = node.OutputDataSetInfo; return(metaData); }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp, Expression <Action <IEnumerable <T>, Stream> > serializer) { DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (Stream stream = context.GetHdfsClient.GetDfsStreamWriter(dataSetUri)) { DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } }
/// <summary> /// Ingress a .NET collection to a specified store location. /// </summary> /// <typeparam name="T">The record type of the collection.</typeparam> /// <param name="context">An instance of DryadLinqContext.</param> /// <param name="source">The collection to be ingressed.</param> /// <param name="dataSetUri">The URI to store the collection.</param> /// <param name="metaData">The metadata for the collection.</param> /// <param name="outputScheme">The compression scheme used to store the collection.</param> /// <param name="isTemp">true to only store the collection temporarily with a time lease.</param> public abstract void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp = false);
// create DryadLinqMetaData from a query OutputNode internal static DryadLinqMetaData FromOutputNode(HpcLinqContext context, DryadOutputNode node) { DryadLinqMetaData metaData = new DryadLinqMetaData(); if (! (DataPath.IsDsc(node.MetaDataUri) || DataPath.IsHdfs(node.MetaDataUri)) ) { throw new InvalidOperationException(); } metaData.m_context = context; metaData.m_dscStreamName = node.MetaDataUri; metaData.m_elemType = node.OutputTypes[0]; metaData.m_compressionScheme = node.OutputCompressionScheme; //metaData.m_version = context.ClientVersion; //metaData.InitializeFlags(); //metaData.m_fp = 0UL; //metaData.m_dataSetInfo = node.OutputDataSetInfo; return metaData; }
// Load a DryadLinqMetaData from an existing dsc stream. internal static DryadLinqMetaData FromDscStream(HpcLinqContext context, string dscStreamName) { DryadLinqMetaData metaData; try { DscFileSet fs = context.DscService.GetFileSet(dscStreamName); metaData = new DryadLinqMetaData(); metaData.m_context = context; metaData.m_dscStreamName = dscStreamName; //metaData.m_fp = 0L; //metaData.m_dataSetInfo = null; byte[] metaDataBytes; //record-type metaDataBytes = fs.GetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME); if (metaDataBytes != null) { string recordTypeString = Encoding.UTF8.GetString(metaDataBytes); metaData.m_elemType = Type.GetType(recordTypeString); } //Compression-scheme metaData.m_compressionScheme = fs.CompressionScheme; } catch (Exception e) { throw new DryadLinqException(HpcLinqErrorCode.ErrorReadingMetadata, String.Format(SR.ErrorReadingMetadata), e); } return metaData; }
internal static DryadLinqMetaData ForLocalDebug(HpcLinqContext context, Type recordType, string dscStreamName, DscCompressionScheme compressionScheme) { DryadLinqMetaData metaData = new DryadLinqMetaData(); metaData.m_context = context; metaData.m_dscStreamName = dscStreamName; metaData.m_elemType = recordType; metaData.m_compressionScheme = compressionScheme; //metaData.m_version = context.ClientVersion; //metaData.InitializeFlags(); //metaData.m_fp = 0UL; //metaData.m_dataSetInfo = node.OutputDataSetInfo; return metaData; }
// ingresses data, and also sets the temporary-length lease. internal static DryadLinqQuery <T> IngressTemporaryDataDirectlyToDsc <T>(HpcLinqContext context, IEnumerable <T> source, string dscFileSetName, DryadLinqMetaData metaData, DscCompressionScheme outputScheme) { DryadLinqQuery <T> result = IngressDataDirectlyToDsc(context, source, dscFileSetName, metaData, outputScheme); // try to set a temporary lease on the resulting fileset try { DscFileSet fs = context.DscService.GetFileSet(dscFileSetName); fs.SetLeaseEndTime(DateTime.Now.Add(StaticConfig.LeaseDurationForTempFiles)); } catch (DscException) { // suppress } return(result); }
//* streams plain enumerable data directly to DSC internal static DryadLinqQuery <T> IngressDataDirectlyToDsc <T>(HpcLinqContext context, IEnumerable <T> source, string dscFileSetName, DryadLinqMetaData metaData, DscCompressionScheme outputScheme) { try { string dscFileSetUri = DataPath.MakeDscStreamUri(context.DscService.HostName, dscFileSetName); if (source.Take(1).Count() == 0) { //there is no data.. we must create a FileSet with an empty file //(the factory/stream approach opens files lazily and thus never opens a file if there is no data) if (context.DscService.FileSetExists(dscFileSetName)) { context.DscService.DeleteFileSet(dscFileSetName); } DscFileSet fileSet = context.DscService.CreateFileSet(dscFileSetName, outputScheme); DscFile file = fileSet.AddNewFile(0); string writePath = file.WritePath; if (outputScheme == DscCompressionScheme.Gzip) { //even zero-byte file must go through the gzip-compressor (for headers etc). using (Stream s = new FileStream(writePath, FileMode.Create)) { var gzipStream = new GZipStream(s, CompressionMode.Compress); gzipStream.Close(); } } else { StreamWriter sw = new StreamWriter(writePath, false); sw.Close(); } fileSet.Seal(); } else { HpcLinqFactory <T> factory = (HpcLinqFactory <T>)HpcLinqCodeGen.GetFactory(context, typeof(T)); // new DscBlockStream(uri,Create,Write,compress) provides a DSC stream with one partition. NativeBlockStream nativeStream = new DscBlockStream(dscFileSetUri, FileMode.Create, FileAccess.Write, outputScheme); HpcRecordWriter <T> writer = factory.MakeWriter(nativeStream); try { if (context.Configuration.AllowConcurrentUserDelegatesInSingleProcess) { foreach (T item in source) { writer.WriteRecordAsync(item); } } else { foreach (T item in source) { writer.WriteRecordSync(item); } } } finally { writer.Close(); // closes the NativeBlockStream, which seals the dsc stream. } } if (metaData != null) { DscFileSet fileSet = context.DscService.GetFileSet(dscFileSetName); fileSet.SetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME, Encoding.UTF8.GetBytes(metaData.ElemType.AssemblyQualifiedName)); } return(DataProvider.GetPartitionedTable <T>(context, dscFileSetUri)); } catch { // if we had a problem creating the empty fileset, try to delete it to avoid cruft being left in DSC. try { context.DscService.DeleteFileSet(dscFileSetName); } catch { // suppress error during delete } throw; // rethrow the original exception. } }