Example #1
0
            internal TableEnumerator(HpcLinqContext context,
                                     List <string[]> filePathList,
                                     string associatedDscStreamName,
                                     DscCompressionScheme scheme)
            {
                this.m_context                 = context;
                this.m_current                 = default(T);
                this.m_filePathList            = filePathList;
                this.m_associatedDscStreamName = associatedDscStreamName;
                this.m_compressionScheme       = scheme;
                this.m_factory                 = (HpcLinqFactory <T>)HpcLinqCodeGen.GetFactory(context, typeof(T));
                bool appendNewLinesToFiles     = (typeof(T) == typeof(LineRecord));
                NativeBlockStream nativeStream = new MultiBlockStream(m_filePathList, m_associatedDscStreamName,
                                                                      FileAccess.Read, m_compressionScheme,
                                                                      appendNewLinesToFiles);

                this.m_reader = this.m_factory.MakeReader(nativeStream);

                if (context.Configuration.AllowConcurrentUserDelegatesInSingleProcess)
                {
                    this.m_reader.StartWorker();
                }
            }
Example #2
0
        //* streams plain enumerable data directly to DSC
        internal static DryadLinqQuery <T> IngressDataDirectlyToDsc <T>(HpcLinqContext context,
                                                                        IEnumerable <T> source,
                                                                        string dscFileSetName,
                                                                        DryadLinqMetaData metaData,
                                                                        DscCompressionScheme outputScheme)
        {
            try
            {
                string dscFileSetUri = DataPath.MakeDscStreamUri(context.DscService.HostName, dscFileSetName);
                if (source.Take(1).Count() == 0)
                {
                    //there is no data.. we must create a FileSet with an empty file
                    //(the factory/stream approach opens files lazily and thus never opens a file if there is no data)


                    if (context.DscService.FileSetExists(dscFileSetName))
                    {
                        context.DscService.DeleteFileSet(dscFileSetName);
                    }
                    DscFileSet fileSet   = context.DscService.CreateFileSet(dscFileSetName, outputScheme);
                    DscFile    file      = fileSet.AddNewFile(0);
                    string     writePath = file.WritePath;


                    if (outputScheme == DscCompressionScheme.Gzip)
                    {
                        //even zero-byte file must go through the gzip-compressor (for headers etc).
                        using (Stream s = new FileStream(writePath, FileMode.Create))
                        {
                            var gzipStream = new GZipStream(s, CompressionMode.Compress);
                            gzipStream.Close();
                        }
                    }
                    else
                    {
                        StreamWriter sw = new StreamWriter(writePath, false);
                        sw.Close();
                    }
                    fileSet.Seal();
                }
                else
                {
                    HpcLinqFactory <T> factory = (HpcLinqFactory <T>)HpcLinqCodeGen.GetFactory(context, typeof(T));

                    // new DscBlockStream(uri,Create,Write,compress) provides a DSC stream with one partition.
                    NativeBlockStream   nativeStream = new DscBlockStream(dscFileSetUri, FileMode.Create, FileAccess.Write, outputScheme);
                    HpcRecordWriter <T> writer       = factory.MakeWriter(nativeStream);
                    try
                    {
                        if (context.Configuration.AllowConcurrentUserDelegatesInSingleProcess)
                        {
                            foreach (T item in source)
                            {
                                writer.WriteRecordAsync(item);
                            }
                        }
                        else
                        {
                            foreach (T item in source)
                            {
                                writer.WriteRecordSync(item);
                            }
                        }
                    }
                    finally
                    {
                        writer.Close(); // closes the NativeBlockStream, which seals the dsc stream.
                    }
                }

                if (metaData != null)
                {
                    DscFileSet fileSet = context.DscService.GetFileSet(dscFileSetName);
                    fileSet.SetMetadata(DryadLinqMetaData.RECORD_TYPE_NAME, Encoding.UTF8.GetBytes(metaData.ElemType.AssemblyQualifiedName));
                }

                return(DataProvider.GetPartitionedTable <T>(context, dscFileSetUri));
            }
            catch
            {
                // if we had a problem creating the empty fileset, try to delete it to avoid cruft being left in DSC.
                try
                {
                    context.DscService.DeleteFileSet(dscFileSetName);
                }
                catch
                {
                    // suppress error during delete
                }

                throw; // rethrow the original exception.
            }
        }