public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp = false) { string account, key, container, blob; AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob); if (compressionScheme != CompressionScheme.None) { throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled"); } AzureDfsClient client = new AzureDfsClient(account, key, container); DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (Stream stream = client.GetFileStreamWriterAsync(blob).Result) { DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } }
public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists) { string fileName = dataSetUri.LocalPath; if (!String.IsNullOrEmpty(dataSetUri.Host)) { fileName = @"\\" + dataSetUri.Host + fileName; } if (File.Exists(fileName)) { if (!deleteIfExists) { throw new DryadLinqException("Can't output to existing Partitioned File collection " + dataSetUri.AbsoluteUri); } // Note: We delete all the partitions! var lines = File.ReadAllLines(fileName); try { foreach (string path in this.GetPartitionPaths(lines)) { if (File.Exists(path)) { File.Delete(path); } } } catch (Exception) { /*skip*/ } File.Delete(fileName); } }
/// <summary> /// Ingress a .NET collection to a specified store location. /// </summary> /// <typeparam name="T">The record type of the collection.</typeparam> /// <param name="context">An instance of DryadLinqContext.</param> /// <param name="source">The collection to be ingressed.</param> /// <param name="dataSetUri">The URI to store the collection.</param> /// <param name="metaData">The metadata for the collection.</param> /// <param name="outputScheme">The compression scheme used to store the collection.</param> /// <param name="isTemp">true to only store the collection temporarily with a time lease.</param> /// <param name="serializer">A stream-based serializer.</param> public abstract void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp, Expression <Action <IEnumerable <T>, Stream> > serializer);
internal static DryadLinqMetaData Get(DryadLinqContext context, Uri dataSetUri) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); return(dataProvider.GetMetaData(context, dataSetUri)); }
public PeloponneseJobSubmission(DryadLinqContext context) { m_context = context; m_localResources = new Dictionary <string, HashSet <string> >(); m_peloponneseGMFiles = Peloponnese.ClusterUtils.ConfigHelpers .ListPeloponneseResources(context.PeloponneseHomeDirectory) .Select(r => r.ToLower()); IEnumerable <string> graphManagerFiles = Peloponnese.Shared.DependencyLister.Lister .ListDependencies(Path.Combine(context.DryadHomeDirectory, "Microsoft.Research.Dryad.GraphManager.exe")) .Select(r => r.ToLower()); m_dryadGMFiles = graphManagerFiles.Except(m_peloponneseGMFiles); string[] additionalWorkerFiles = { "Microsoft.Research.Dryadlinq.dll", "Microsoft.Research.Dryad.DryadLinq.NativeWrapper.dll" }; IEnumerable <string> processServiceFiles = Peloponnese.Shared.DependencyLister.Lister .ListDependencies(Path.Combine(context.DryadHomeDirectory, "Microsoft.Research.Dryad.ProcessService.exe")) .Select(r => r.ToLower()); IEnumerable <string> vertexHostFiles = Peloponnese.Shared.DependencyLister.Lister .ListDependencies(Path.Combine(context.DryadHomeDirectory, "Microsoft.Research.Dryad.VertexHost.exe")) .Concat(additionalWorkerFiles.Select(f => Path.Combine(context.DryadHomeDirectory, f))) .Select(r => r.ToLower()); IEnumerable <string> workerFiles = processServiceFiles.Union(vertexHostFiles); m_peloponneseWorkerFiles = workerFiles.Intersect(m_peloponneseGMFiles); m_dryadWorkerFiles = workerFiles.Except(m_peloponneseGMFiles); }
/// <summary> /// Reads a specified dataset. /// </summary> /// <typeparam name="T">The record type of the dataset.</typeparam> /// <param name="context">An instance of <see cref="DryadLinqContext"/></param> /// <param name="dataSetUri">The URI of the dataset.</param> /// <returns>A sequence of records as IEnumerable{T}.</returns> public static IEnumerable <T> ReadData <T>(DryadLinqContext context, Uri dataSetUri) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri); return(new DryadLinqQueryEnumerable <T>(context, dataProvider, dataSetUri)); }
public ClusterClient Client(DryadLinqContext context) { if (_clusterClient == null) { _clusterClient = new NativeYarnClient(HeadNode, HdfsPort, LauncherPort); } return(_clusterClient); }
/// <summary> /// Get the dataset specified by a URI. /// </summary> /// <typeparam name="T">The record type of the dataset.</typeparam> /// <param name="context">An instance of <see cref="DryadLinqContext"/></param> /// <param name="dataSetUri">The URI of the dataset</param> /// <returns>A query object representing the dsc file set data.</returns> internal static DryadLinqQuery <T> GetPartitionedTable <T>(DryadLinqContext context, Uri dataSetUri) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); DryadLinqProvider queryProvider = new DryadLinqProvider(context); dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri); return(new DryadLinqQuery <T>(null, queryProvider, dataProvider, dataSetUri)); }
internal static bool DoAutoTypeInference(DryadLinqContext context, Type type) { if (!StaticConfig.AllowAutoTypeInference) { return(false); } object[] a = type.GetCustomAttributes(typeof(AutoTypeInferenceAttribute), true); return(a.Length != 0); }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp = false) { throw new DryadLinqException("TBA"); }
public ClusterClient Client(DryadLinqContext context) { if (_clusterClient == null) { _clusterClient = _dfsClient.ContinueWith( c => new AzureYarnClient(_azureSubscriptions, c.Result, context.PeloponneseHomeDirectory, Cluster.Name)); } return(_clusterClient.Result); }
public DryadLinqQueryEnumerable(DryadLinqContext context, DataProvider dataProvider, Uri dataSetUri, Expression <Func <Stream, IEnumerable <T> > > deserializer) { this.m_context = context; this.m_dataProvider = dataProvider; this.m_dataSetUri = dataSetUri; this.m_deserializer = deserializer; }
/// <summary> /// Reads the dataset specified by a URI. /// </summary> /// <typeparam name="T">The record type of the dataset</typeparam> /// <param name="context">An instance of <see cref="DryadLinqContext"/></param> /// <param name="dataSetUri">The URI of the dataset</param> /// <param name="deserializer">A stream-based deserializer</param> /// <returns>A sequence of records as IEnumerable{T}</returns> internal static IEnumerable <T> ReadData <T>(DryadLinqContext context, Uri dataSetUri, Expression <Func <Stream, IEnumerable <T> > > deserializer) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri); return(new DryadLinqQueryEnumerable <T>(context, dataProvider, dataSetUri, deserializer)); }
internal TableEnumerator(DryadLinqContext context, DataProvider dataProvider, Uri dataSetUri) { this.m_current = default(T); this.m_factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); Stream stream = dataProvider.Egress(context, dataSetUri); DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream); this.m_reader = this.m_factory.MakeReader(nativeStream); this.m_reader.StartWorker(); }
public override Uri GetTemporaryStreamUri(DryadLinqContext context, string path) { string wd = Directory.GetCurrentDirectory(); path = Path.Combine(Path.GetPathRoot(wd), DataPath.TEMPORARY_STREAM_NAME_PREFIX, path); Directory.CreateDirectory(Path.GetDirectoryName(path)); Uri uri = new Uri(this.Scheme + ":///" + path); return(uri); }
public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists) { if (context.GetHdfsClient.IsFileExists(dataSetUri)) { if (!deleteIfExists) { throw new DryadLinqException("Can't output to existing HDFS collection " + dataSetUri.AbsoluteUri); } context.GetHdfsClient.DeleteDfsFile(dataSetUri, true); } }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp, Expression <Action <IEnumerable <T>, Stream> > serializer) { string fileName = dataSetUri.LocalPath; if (!String.IsNullOrEmpty(dataSetUri.Host)) { fileName = @"\\" + dataSetUri.Host + fileName; } // Write the partition: string partDir = Path.GetDirectoryName(fileName); partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName()); Directory.CreateDirectory(partDir); string uncPath = Path.Combine(partDir, "Part"); string partitionPath = uncPath + ".00000000"; DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (FileStream fstream = new FileStream(partitionPath, FileMode.CreateNew, FileAccess.Write)) { if (serializer == null) { DryadLinqFileBlockStream nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } else { Action <IEnumerable <T>, Stream> serializerFunc = serializer.Compile(); serializerFunc(source, fstream); } } // Write the partfile: long partSize = new FileInfo(partitionPath).Length; using (StreamWriter writer = File.CreateText(fileName)) { writer.WriteLine(uncPath); writer.WriteLine("1"); writer.WriteLine("{0},{1}", 0, partSize); } }
public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri) { Int32 parCnt = 0; Int64 size = -1; context.Cluster.DfsClient.GetContentSummary(dataSetUri.AbsolutePath, ref size, ref parCnt); if (parCnt == 0) { throw new DryadLinqException("Got 0 partition count for " + dataSetUri.AbsoluteUri); } return(new DryadLinqStreamInfo(parCnt, size)); }
public override Stream Egress(DryadLinqContext context, Uri dataSetUri) { string fileName = dataSetUri.LocalPath; var lines = File.ReadAllLines(fileName); if (lines.Length < 3) { throw new DryadLinqException("The partition file " + dataSetUri + " is malformed."); } string[] filePathArray = this.GetPartitionPaths(lines); return(new DryadLinqMultiFileStream(filePathArray, CompressionScheme.None)); }
public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists) { WebHdfsClient client = new WebHdfsClient(dataSetUri.Host, 8033, 50070); if (client.IsFileExists(dataSetUri.AbsolutePath)) { if (!deleteIfExists) { throw new DryadLinqException("Can't output to existing HDFS collection " + dataSetUri.AbsoluteUri); } client.DeleteDfsFile(dataSetUri.AbsolutePath); } }
public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists) { AzureCollectionPartition partition = new AzureCollectionPartition(dataSetUri); if (partition.IsCollectionExists()) { if (!deleteIfExists) { throw new DryadLinqException("Can't output to existing Azure Blob collection " + dataSetUri.AbsoluteUri); } partition.DeleteCollection(); } }
internal static bool RecordCanBeNull(DryadLinqContext context, Type type) { if (type == null || type.IsValueType) { return(false); } object[] attribs = type.GetCustomAttributes(typeof(NullableAttribute), true); if (attribs.Length == 0) { return(StaticConfig.AllowNullRecords); } return(((NullableAttribute)attribs[0]).CanBeNull); }
/// <summary> /// Create a new job executor object. /// </summary> public DryadLinqJobExecutor(DryadLinqContext context) { // use a new job submission object for each query this.m_context = context; this.m_currentStatus = JobStatus.NotSubmitted; if (context.LocalExecution) { this.m_jobSubmission = new LocalJobSubmission(context); } else { this.m_jobSubmission = new YarnJobSubmission(context); } }
/// <summary> /// Stores an IEnumerable{T} at a specified location. /// </summary> /// <typeparam name="T">The record type of the data.</typeparam> /// <param name="context">An instance of <see cref="DryadLinqContext"/></param> /// <param name="source">The data to store.</param> /// <param name="dataSetUri">The URI of the store location.</param> /// <param name="metaData">The metadata of the data.</param> /// <param name="outputScheme">The compression scheme.</param> /// <param name="isTemp">true if the data is only stored temporarily.</param> /// <returns>An instance of IQueryable{T} for the data.</returns> internal static DryadLinqQuery <T> StoreData <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme outputScheme, bool isTemp = false) { string scheme = DataPath.GetScheme(dataSetUri); DataProvider dataProvider = DataProvider.GetDataProvider(scheme); dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri); dataProvider.Ingress(context, source, dataSetUri, metaData, outputScheme, isTemp); return(DataProvider.GetPartitionedTable <T>(context, dataSetUri)); }
public override Uri RewriteUri <T>(DryadLinqContext context, Uri dataSetUri, FileAccess access) { UriBuilder builder = new UriBuilder(dataSetUri); NameValueCollection query = System.Web.HttpUtility.ParseQueryString(builder.Query); if (access != FileAccess.Write && typeof(T) == typeof(Microsoft.Research.DryadLinq.LineRecord)) { query["seekBoundaries"] = "Microsoft.Research.DryadLinq.LineRecord"; } builder.Query = query.ToString(); return(builder.Uri); }
public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri) { Int32 parCnt = 0; Int64 size = -1; NameValueCollection query = System.Web.HttpUtility.ParseQueryString(dataSetUri.Query); bool expandBlocks = (query["seekboundaries"] == "Microsoft.Research.DryadLinq.LineRecord"); context.GetHdfsClient.GetDirectoryContentSummary(dataSetUri, expandBlocks, ref size, ref parCnt); if (parCnt == 0) { throw new DryadLinqException("Got 0 partition count for " + dataSetUri.AbsoluteUri); } return(new DryadLinqStreamInfo(parCnt, size)); }
/// <summary> /// Create a new job executor object. /// </summary> public DryadLinqJobExecutor(DryadLinqContext context) { // use a new job submission object for each query this.m_context = context; this.m_currentStatus = JobStatus.NotSubmitted; if (context.PlatformKind == PlatformKind.LOCAL) { this.m_jobSubmission = new LocalJobSubmission(context); } else { this.m_jobSubmission = new YarnJobSubmission( context, context.Cluster.MakeInternalClusterUri("tmp", "staging")); } }
internal DryadLinqMetaData(DryadLinqContext context, Type recordType, Uri dataSetUri, CompressionScheme compressionScheme) { this.m_context = context; this.m_dataSetUri = dataSetUri; this.m_elemType = recordType; this.m_compressionScheme = compressionScheme; //this.m_version = context.ClientVersion(); //this.InitializeFlags(); //this.m_fp = 0UL; //this.m_dataSetInfo = node.OutputDataSetInfo; }
public override void Ingress <T>(DryadLinqContext context, IEnumerable <T> source, Uri dataSetUri, DryadLinqMetaData metaData, CompressionScheme compressionScheme, bool isTemp = false) { // Write the partition: string partDir = context.PartitionUncPath; if (partDir == null) { partDir = Path.GetDirectoryName(dataSetUri.LocalPath); } if (!Path.IsPathRooted(partDir)) { partDir = Path.Combine("/", partDir); } partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName()); Directory.CreateDirectory(partDir); string partPath = Path.Combine(partDir, "Part"); string partFilePath = partPath + ".00000000"; DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T)); using (FileStream fstream = new FileStream(partFilePath, FileMode.CreateNew, FileAccess.Write)) { DryadLinqFileBlockStream nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme); DryadLinqRecordWriter <T> writer = factory.MakeWriter(nativeStream); foreach (T rec in source) { writer.WriteRecordSync(rec); } writer.Close(); } // Write the partfile: FileInfo finfo = new FileInfo(partFilePath); using (StreamWriter writer = File.CreateText(dataSetUri.LocalPath)) { writer.WriteLine(partPath); writer.WriteLine("1"); writer.WriteLine("{0},{1},{2}", 0, finfo.Length, Environment.MachineName); } }
public override Stream Egress(DryadLinqContext context, Uri dataSetUri) { try { AzureCollectionPartition partition = new AzureCollectionPartition(dataSetUri); if (!partition.IsCollectionExists()) { throw new DryadLinqException("Input collection " + dataSetUri + " does not exist"); } Stream dataSetStream = partition.GetReadStream(); return(dataSetStream); } catch (Exception e) { throw new DryadLinqException("Can't get Azure stream info for " + dataSetUri, e); } }