Exemple #1
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp = false)
        {
            string account, key, container, blob;

            AzureUtils.FromAzureUri(dataSetUri, out account, out key, out container, out blob);
            if (compressionScheme != CompressionScheme.None)
            {
                throw new DryadLinqException("Not implemented: writing to Azure temporary storage with compression enabled");
            }
            AzureDfsClient       client  = new AzureDfsClient(account, key, container);
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (Stream stream = client.GetFileStreamWriterAsync(blob).Result)
            {
                DryadLinqBlockStream      nativeStream = new DryadLinqBlockStream(stream);
                DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                foreach (T rec in source)
                {
                    writer.WriteRecordSync(rec);
                }
                writer.Close();
            }
        }
Exemple #2
0
        public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists)
        {
            string fileName = dataSetUri.LocalPath;

            if (!String.IsNullOrEmpty(dataSetUri.Host))
            {
                fileName = @"\\" + dataSetUri.Host + fileName;
            }
            if (File.Exists(fileName))
            {
                if (!deleteIfExists)
                {
                    throw new DryadLinqException("Can't output to existing Partitioned File collection " + dataSetUri.AbsoluteUri);
                }

                // Note: We delete all the partitions!
                var lines = File.ReadAllLines(fileName);
                try
                {
                    foreach (string path in this.GetPartitionPaths(lines))
                    {
                        if (File.Exists(path))
                        {
                            File.Delete(path);
                        }
                    }
                }
                catch (Exception) { /*skip*/ }
                File.Delete(fileName);
            }
        }
Exemple #3
0
 /// <summary>
 /// Ingress a .NET collection to a specified store location.
 /// </summary>
 /// <typeparam name="T">The record type of the collection.</typeparam>
 /// <param name="context">An instance of DryadLinqContext.</param>
 /// <param name="source">The collection to be ingressed.</param>
 /// <param name="dataSetUri">The URI to store the collection.</param>
 /// <param name="metaData">The metadata for the collection.</param>
 /// <param name="outputScheme">The compression scheme used to store the collection.</param>
 /// <param name="isTemp">true to only store the collection temporarily with a time lease.</param>
 /// <param name="serializer">A stream-based serializer.</param>
 public abstract void Ingress <T>(DryadLinqContext context,
                                  IEnumerable <T> source,
                                  Uri dataSetUri,
                                  DryadLinqMetaData metaData,
                                  CompressionScheme outputScheme,
                                  bool isTemp,
                                  Expression <Action <IEnumerable <T>, Stream> > serializer);
Exemple #4
0
        internal static DryadLinqMetaData Get(DryadLinqContext context, Uri dataSetUri)
        {
            string       scheme       = DataPath.GetScheme(dataSetUri);
            DataProvider dataProvider = DataProvider.GetDataProvider(scheme);

            return(dataProvider.GetMetaData(context, dataSetUri));
        }
Exemple #5
0
        public PeloponneseJobSubmission(DryadLinqContext context)
        {
            m_context        = context;
            m_localResources = new Dictionary <string, HashSet <string> >();

            m_peloponneseGMFiles = Peloponnese.ClusterUtils.ConfigHelpers
                                   .ListPeloponneseResources(context.PeloponneseHomeDirectory)
                                   .Select(r => r.ToLower());
            IEnumerable <string> graphManagerFiles = Peloponnese.Shared.DependencyLister.Lister
                                                     .ListDependencies(Path.Combine(context.DryadHomeDirectory, "Microsoft.Research.Dryad.GraphManager.exe"))
                                                     .Select(r => r.ToLower());

            m_dryadGMFiles = graphManagerFiles.Except(m_peloponneseGMFiles);

            string[] additionalWorkerFiles =
            {
                "Microsoft.Research.Dryadlinq.dll",
                "Microsoft.Research.Dryad.DryadLinq.NativeWrapper.dll"
            };

            IEnumerable <string> processServiceFiles = Peloponnese.Shared.DependencyLister.Lister
                                                       .ListDependencies(Path.Combine(context.DryadHomeDirectory, "Microsoft.Research.Dryad.ProcessService.exe"))
                                                       .Select(r => r.ToLower());
            IEnumerable <string> vertexHostFiles = Peloponnese.Shared.DependencyLister.Lister
                                                   .ListDependencies(Path.Combine(context.DryadHomeDirectory, "Microsoft.Research.Dryad.VertexHost.exe"))
                                                   .Concat(additionalWorkerFiles.Select(f => Path.Combine(context.DryadHomeDirectory, f)))
                                                   .Select(r => r.ToLower());
            IEnumerable <string> workerFiles = processServiceFiles.Union(vertexHostFiles);

            m_peloponneseWorkerFiles = workerFiles.Intersect(m_peloponneseGMFiles);
            m_dryadWorkerFiles       = workerFiles.Except(m_peloponneseGMFiles);
        }
Exemple #6
0
        /// <summary>
        /// Reads a specified dataset.
        /// </summary>
        /// <typeparam name="T">The record type of the dataset.</typeparam>
        /// <param name="context">An instance of <see cref="DryadLinqContext"/></param>
        /// <param name="dataSetUri">The URI of the dataset.</param>
        /// <returns>A sequence of records as IEnumerable{T}.</returns>
        public static IEnumerable <T> ReadData <T>(DryadLinqContext context, Uri dataSetUri)
        {
            string       scheme       = DataPath.GetScheme(dataSetUri);
            DataProvider dataProvider = DataProvider.GetDataProvider(scheme);

            dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri);
            return(new DryadLinqQueryEnumerable <T>(context, dataProvider, dataSetUri));
        }
 public ClusterClient Client(DryadLinqContext context)
 {
     if (_clusterClient == null)
     {
         _clusterClient = new NativeYarnClient(HeadNode, HdfsPort, LauncherPort);
     }
     return(_clusterClient);
 }
Exemple #8
0
        /// <summary>
        /// Get the dataset specified by a URI.
        /// </summary>
        /// <typeparam name="T">The record type of the dataset.</typeparam>
        /// <param name="context">An instance of <see cref="DryadLinqContext"/></param>
        /// <param name="dataSetUri">The URI of the dataset</param>
        /// <returns>A query object representing the dsc file set data.</returns>
        internal static DryadLinqQuery <T> GetPartitionedTable <T>(DryadLinqContext context, Uri dataSetUri)
        {
            string            scheme        = DataPath.GetScheme(dataSetUri);
            DataProvider      dataProvider  = DataProvider.GetDataProvider(scheme);
            DryadLinqProvider queryProvider = new DryadLinqProvider(context);

            dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri);
            return(new DryadLinqQuery <T>(null, queryProvider, dataProvider, dataSetUri));
        }
Exemple #9
0
 internal static bool DoAutoTypeInference(DryadLinqContext context, Type type)
 {
     if (!StaticConfig.AllowAutoTypeInference)
     {
         return(false);
     }
     object[] a = type.GetCustomAttributes(typeof(AutoTypeInferenceAttribute), true);
     return(a.Length != 0);
 }
Exemple #10
0
 public override void Ingress <T>(DryadLinqContext context,
                                  IEnumerable <T> source,
                                  Uri dataSetUri,
                                  DryadLinqMetaData metaData,
                                  CompressionScheme outputScheme,
                                  bool isTemp = false)
 {
     throw new DryadLinqException("TBA");
 }
 public ClusterClient Client(DryadLinqContext context)
 {
     if (_clusterClient == null)
     {
         _clusterClient = _dfsClient.ContinueWith(
             c => new AzureYarnClient(_azureSubscriptions, c.Result, context.PeloponneseHomeDirectory, Cluster.Name));
     }
     return(_clusterClient.Result);
 }
Exemple #12
0
 public DryadLinqQueryEnumerable(DryadLinqContext context,
                                 DataProvider dataProvider,
                                 Uri dataSetUri,
                                 Expression <Func <Stream, IEnumerable <T> > > deserializer)
 {
     this.m_context      = context;
     this.m_dataProvider = dataProvider;
     this.m_dataSetUri   = dataSetUri;
     this.m_deserializer = deserializer;
 }
Exemple #13
0
        /// <summary>
        /// Reads the dataset specified by a URI.
        /// </summary>
        /// <typeparam name="T">The record type of the dataset</typeparam>
        /// <param name="context">An instance of <see cref="DryadLinqContext"/></param>
        /// <param name="dataSetUri">The URI of the dataset</param>
        /// <param name="deserializer">A stream-based deserializer</param>
        /// <returns>A sequence of records as IEnumerable{T}</returns>
        internal static IEnumerable <T> ReadData <T>(DryadLinqContext context,
                                                     Uri dataSetUri,
                                                     Expression <Func <Stream, IEnumerable <T> > > deserializer)
        {
            string       scheme       = DataPath.GetScheme(dataSetUri);
            DataProvider dataProvider = DataProvider.GetDataProvider(scheme);

            dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri);
            return(new DryadLinqQueryEnumerable <T>(context, dataProvider, dataSetUri, deserializer));
        }
Exemple #14
0
            internal TableEnumerator(DryadLinqContext context, DataProvider dataProvider, Uri dataSetUri)
            {
                this.m_current = default(T);
                this.m_factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));
                Stream stream = dataProvider.Egress(context, dataSetUri);
                DryadLinqBlockStream nativeStream = new DryadLinqBlockStream(stream);

                this.m_reader = this.m_factory.MakeReader(nativeStream);
                this.m_reader.StartWorker();
            }
Exemple #15
0
        public override Uri GetTemporaryStreamUri(DryadLinqContext context, string path)
        {
            string wd = Directory.GetCurrentDirectory();

            path = Path.Combine(Path.GetPathRoot(wd), DataPath.TEMPORARY_STREAM_NAME_PREFIX, path);
            Directory.CreateDirectory(Path.GetDirectoryName(path));
            Uri uri = new Uri(this.Scheme + ":///" + path);

            return(uri);
        }
Exemple #16
0
 public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists)
 {
     if (context.GetHdfsClient.IsFileExists(dataSetUri))
     {
         if (!deleteIfExists)
         {
             throw new DryadLinqException("Can't output to existing HDFS collection " + dataSetUri.AbsoluteUri);
         }
         context.GetHdfsClient.DeleteDfsFile(dataSetUri, true);
     }
 }
Exemple #17
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp,
                                         Expression <Action <IEnumerable <T>, Stream> > serializer)
        {
            string fileName = dataSetUri.LocalPath;

            if (!String.IsNullOrEmpty(dataSetUri.Host))
            {
                fileName = @"\\" + dataSetUri.Host + fileName;
            }

            // Write the partition:
            string partDir = Path.GetDirectoryName(fileName);

            partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName());
            Directory.CreateDirectory(partDir);
            string uncPath               = Path.Combine(partDir, "Part");
            string partitionPath         = uncPath + ".00000000";
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (FileStream fstream = new FileStream(partitionPath, FileMode.CreateNew, FileAccess.Write))
            {
                if (serializer == null)
                {
                    DryadLinqFileBlockStream  nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme);
                    DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                    foreach (T rec in source)
                    {
                        writer.WriteRecordSync(rec);
                    }
                    writer.Close();
                }
                else
                {
                    Action <IEnumerable <T>, Stream> serializerFunc = serializer.Compile();
                    serializerFunc(source, fstream);
                }
            }

            // Write the partfile:
            long partSize = new FileInfo(partitionPath).Length;

            using (StreamWriter writer = File.CreateText(fileName))
            {
                writer.WriteLine(uncPath);
                writer.WriteLine("1");
                writer.WriteLine("{0},{1}", 0, partSize);
            }
        }
Exemple #18
0
        public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri)
        {
            Int32 parCnt = 0;
            Int64 size   = -1;

            context.Cluster.DfsClient.GetContentSummary(dataSetUri.AbsolutePath, ref size, ref parCnt);
            if (parCnt == 0)
            {
                throw new DryadLinqException("Got 0 partition count for " + dataSetUri.AbsoluteUri);
            }
            return(new DryadLinqStreamInfo(parCnt, size));
        }
Exemple #19
0
        public override Stream Egress(DryadLinqContext context, Uri dataSetUri)
        {
            string fileName = dataSetUri.LocalPath;
            var    lines    = File.ReadAllLines(fileName);

            if (lines.Length < 3)
            {
                throw new DryadLinqException("The partition file " + dataSetUri + " is malformed.");
            }
            string[] filePathArray = this.GetPartitionPaths(lines);
            return(new DryadLinqMultiFileStream(filePathArray, CompressionScheme.None));
        }
Exemple #20
0
        public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists)
        {
            WebHdfsClient client = new WebHdfsClient(dataSetUri.Host, 8033, 50070);

            if (client.IsFileExists(dataSetUri.AbsolutePath))
            {
                if (!deleteIfExists)
                {
                    throw new DryadLinqException("Can't output to existing HDFS collection " + dataSetUri.AbsoluteUri);
                }
                client.DeleteDfsFile(dataSetUri.AbsolutePath);
            }
        }
Exemple #21
0
        public override void CheckExistence(DryadLinqContext context, Uri dataSetUri, bool deleteIfExists)
        {
            AzureCollectionPartition partition = new AzureCollectionPartition(dataSetUri);

            if (partition.IsCollectionExists())
            {
                if (!deleteIfExists)
                {
                    throw new DryadLinqException("Can't output to existing Azure Blob collection " + dataSetUri.AbsoluteUri);
                }
                partition.DeleteCollection();
            }
        }
Exemple #22
0
        internal static bool RecordCanBeNull(DryadLinqContext context, Type type)
        {
            if (type == null || type.IsValueType)
            {
                return(false);
            }

            object[] attribs = type.GetCustomAttributes(typeof(NullableAttribute), true);
            if (attribs.Length == 0)
            {
                return(StaticConfig.AllowNullRecords);
            }
            return(((NullableAttribute)attribs[0]).CanBeNull);
        }
Exemple #23
0
 /// <summary>
 /// Create a new job executor object.
 /// </summary>
 public DryadLinqJobExecutor(DryadLinqContext context)
 {
     // use a new job submission object for each query
     this.m_context       = context;
     this.m_currentStatus = JobStatus.NotSubmitted;
     if (context.LocalExecution)
     {
         this.m_jobSubmission = new LocalJobSubmission(context);
     }
     else
     {
         this.m_jobSubmission = new YarnJobSubmission(context);
     }
 }
Exemple #24
0
        /// <summary>
        /// Stores an IEnumerable{T} at a specified location.
        /// </summary>
        /// <typeparam name="T">The record type of the data.</typeparam>
        /// <param name="context">An instance of <see cref="DryadLinqContext"/></param>
        /// <param name="source">The data to store.</param>
        /// <param name="dataSetUri">The URI of the store location.</param>
        /// <param name="metaData">The metadata of the data.</param>
        /// <param name="outputScheme">The compression scheme.</param>
        /// <param name="isTemp">true if the data is only stored temporarily.</param>
        /// <returns>An instance of IQueryable{T} for the data.</returns>
        internal static DryadLinqQuery <T> StoreData <T>(DryadLinqContext context,
                                                         IEnumerable <T> source,
                                                         Uri dataSetUri,
                                                         DryadLinqMetaData metaData,
                                                         CompressionScheme outputScheme,
                                                         bool isTemp = false)
        {
            string       scheme       = DataPath.GetScheme(dataSetUri);
            DataProvider dataProvider = DataProvider.GetDataProvider(scheme);

            dataSetUri = dataProvider.RewriteUri <T>(context, dataSetUri);
            dataProvider.Ingress(context, source, dataSetUri, metaData, outputScheme, isTemp);
            return(DataProvider.GetPartitionedTable <T>(context, dataSetUri));
        }
Exemple #25
0
        public override Uri RewriteUri <T>(DryadLinqContext context, Uri dataSetUri, FileAccess access)
        {
            UriBuilder          builder = new UriBuilder(dataSetUri);
            NameValueCollection query   = System.Web.HttpUtility.ParseQueryString(builder.Query);

            if (access != FileAccess.Write &&
                typeof(T) == typeof(Microsoft.Research.DryadLinq.LineRecord))
            {
                query["seekBoundaries"] = "Microsoft.Research.DryadLinq.LineRecord";
            }

            builder.Query = query.ToString();
            return(builder.Uri);
        }
Exemple #26
0
        public override DryadLinqStreamInfo GetStreamInfo(DryadLinqContext context, Uri dataSetUri)
        {
            Int32 parCnt = 0;
            Int64 size   = -1;
            NameValueCollection query = System.Web.HttpUtility.ParseQueryString(dataSetUri.Query);
            bool expandBlocks         = (query["seekboundaries"] == "Microsoft.Research.DryadLinq.LineRecord");

            context.GetHdfsClient.GetDirectoryContentSummary(dataSetUri, expandBlocks, ref size, ref parCnt);
            if (parCnt == 0)
            {
                throw new DryadLinqException("Got 0 partition count for " + dataSetUri.AbsoluteUri);
            }
            return(new DryadLinqStreamInfo(parCnt, size));
        }
Exemple #27
0
 /// <summary>
 /// Create a new job executor object.
 /// </summary>
 public DryadLinqJobExecutor(DryadLinqContext context)
 {
     // use a new job submission object for each query
     this.m_context       = context;
     this.m_currentStatus = JobStatus.NotSubmitted;
     if (context.PlatformKind == PlatformKind.LOCAL)
     {
         this.m_jobSubmission = new LocalJobSubmission(context);
     }
     else
     {
         this.m_jobSubmission = new YarnJobSubmission(
             context, context.Cluster.MakeInternalClusterUri("tmp", "staging"));
     }
 }
Exemple #28
0
        internal DryadLinqMetaData(DryadLinqContext context,
                                   Type recordType,
                                   Uri dataSetUri,
                                   CompressionScheme compressionScheme)
        {
            this.m_context           = context;
            this.m_dataSetUri        = dataSetUri;
            this.m_elemType          = recordType;
            this.m_compressionScheme = compressionScheme;
            //this.m_version = context.ClientVersion();
            //this.InitializeFlags();

            //this.m_fp = 0UL;
            //this.m_dataSetInfo = node.OutputDataSetInfo;
        }
Exemple #29
0
        public override void Ingress <T>(DryadLinqContext context,
                                         IEnumerable <T> source,
                                         Uri dataSetUri,
                                         DryadLinqMetaData metaData,
                                         CompressionScheme compressionScheme,
                                         bool isTemp = false)
        {
            // Write the partition:
            string partDir = context.PartitionUncPath;

            if (partDir == null)
            {
                partDir = Path.GetDirectoryName(dataSetUri.LocalPath);
            }

            if (!Path.IsPathRooted(partDir))
            {
                partDir = Path.Combine("/", partDir);
            }
            partDir = Path.Combine(partDir, DryadLinqUtil.MakeUniqueName());
            Directory.CreateDirectory(partDir);
            string partPath              = Path.Combine(partDir, "Part");
            string partFilePath          = partPath + ".00000000";
            DryadLinqFactory <T> factory = (DryadLinqFactory <T>)DryadLinqCodeGen.GetFactory(context, typeof(T));

            using (FileStream fstream = new FileStream(partFilePath, FileMode.CreateNew, FileAccess.Write))
            {
                DryadLinqFileBlockStream  nativeStream = new DryadLinqFileBlockStream(fstream, compressionScheme);
                DryadLinqRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                foreach (T rec in source)
                {
                    writer.WriteRecordSync(rec);
                }
                writer.Close();
            }

            // Write the partfile:
            FileInfo finfo = new FileInfo(partFilePath);

            using (StreamWriter writer = File.CreateText(dataSetUri.LocalPath))
            {
                writer.WriteLine(partPath);
                writer.WriteLine("1");
                writer.WriteLine("{0},{1},{2}", 0, finfo.Length, Environment.MachineName);
            }
        }
Exemple #30
0
        public override Stream Egress(DryadLinqContext context, Uri dataSetUri)
        {
            try
            {
                AzureCollectionPartition partition = new AzureCollectionPartition(dataSetUri);
                if (!partition.IsCollectionExists())
                {
                    throw new DryadLinqException("Input collection " + dataSetUri + " does not exist");
                }

                Stream dataSetStream = partition.GetReadStream();
                return(dataSetStream);
            }
            catch (Exception e)
            {
                throw new DryadLinqException("Can't get Azure stream info for " + dataSetUri, e);
            }
        }