Beispiel #1
0
        public void GetGeneSection()
        {
            var readStream  = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("Clinvar20150901.json.gz")), CompressionMode.Decompress);
            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath("Clinvar20150901.json.gz.jsi"));

            var outStream = new MemoryStream();

            using (var writer = new StreamWriter(outStream, Encoding.UTF8, 512, true))
                using (var qp = new QueryProcessor(new StreamReader(readStream), indexStream, writer))
                {
                    writer.NewLine = "\r\n";
                    qp.PrintSection("genes");
                }

            Assert.NotEqual(0, outStream.Length);
            outStream.Position = 0;
            using (var reader = new StreamReader(outStream))
            {
                var count = 0;
                var line  = reader.ReadLine();
                while (line != null)
                {
                    count++;
                    line = reader.ReadLine();
                }

                Assert.Equal(127, count);
            }
        }
Beispiel #2
0
        public void EndOfFile()
        {
            using (var ms = new MemoryStream())
            {
                var writeBuffer = ByteUtilities.GetRandomBytes(100);
                var readBuffer  = new byte[60];

                using (var bgzipStream = new BlockGZipStream(ms, CompressionMode.Compress, true))
                {
                    bgzipStream.Write(writeBuffer, 0, writeBuffer.Length);
                }

                ms.Position = 0;

                using (var bgzipStream = new BlockGZipStream(ms, CompressionMode.Decompress))
                {
                    int numBytesRead = bgzipStream.Read(readBuffer, 0, 0);
                    Assert.Equal(0, numBytesRead);

                    numBytesRead = bgzipStream.Read(readBuffer, 0, readBuffer.Length);
                    Assert.Equal(readBuffer.Length, numBytesRead);

                    numBytesRead = bgzipStream.Read(readBuffer, 0, readBuffer.Length);
                    Assert.Equal(writeBuffer.Length - readBuffer.Length, numBytesRead);

                    numBytesRead = bgzipStream.Read(readBuffer, 0, readBuffer.Length);
                    Assert.Equal(0, numBytesRead);
                }
            }
        }
Beispiel #3
0
        public void IndexCreation_multiChromosome()
        {
            var jsonStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("cosmicv72.indels.json.gz")), CompressionMode.Decompress);

            var writeStream = new MemoryStream();

            using (var indexCreator = new IndexCreator(jsonStream, writeStream))
            {
                indexCreator.CreateIndex();
            }

            JasixIndex readBackIndex;
            var        readStream = new MemoryStream(writeStream.ToArray());

            readStream.Seek(0, SeekOrigin.Begin);

            using (readStream)
            {
                readBackIndex = new JasixIndex(readStream);
            }

            Assert.Equal(2268, readBackIndex.GetFirstVariantPosition("chr1", 9775924, 9775924));
            Assert.Equal(14035925971, readBackIndex.GetFirstVariantPosition("chr2", 16081096, 16081096));
            Assert.Equal(433156622693, readBackIndex.GetFirstVariantPosition("chr20", 36026164, 36026164));
            Assert.Equal(439602269527, readBackIndex.GetFirstVariantPosition("chrX", 66765044, 66765044));
        }
Beispiel #4
0
        private int ReadGeneLines(Stream jsonStream)
        {
            var lineCount = 0;

            using (var bGzipStream = new BlockGZipStream(jsonStream, CompressionMode.Decompress))
                using (var reader = new StreamReader(bGzipStream))
                {
                    string line;
                    while ((line = reader.ReadLine()) != null)
                    {
                        if (line == string.Empty)
                        {
                            continue;
                        }
                        if (line == FooterLine)
                        {
                            break;
                        }
                        if (!line.EndsWith(','))
                        {
                            line += ',';
                        }
                        lineCount++;
                        _geneLines.Add(line);
                    }
                }

            return(lineCount);
        }
Beispiel #5
0
 public IndexCreator(BlockGZipStream readStream, Stream writeStream)
 {
     _reader         = new BgzipTextReader(readStream);
     _writeStream    = writeStream;
     _chromBenchmark = new Benchmark();
     _benchmark      = new Benchmark();
 }
Beispiel #6
0
        private static (Stream jsonStream, Stream jasixStream) GetNirvanaJsonStreamWithoutGenes(int chromNumber)
        {
            var jsonStream  = new MemoryStream();
            var jasixStream = new MemoryStream();

            using (var bgZipStream = new BlockGZipStream(jsonStream, CompressionMode.Compress, true))
                using (var writer = new BgzipTextWriter(bgZipStream))
                    using (var jasixIndex = new JasixIndex())
                    {
                        writer.Write(NirvanaHeader);
                        writer.Flush();
                        jasixIndex.BeginSection(JasixCommons.PositionsSectionTag, writer.Position);
                        for (int i = 100 * chromNumber; i < 123 * chromNumber; i++)
                        {
                            writer.WriteLine($"{{\"chromosome\":\"chr{chromNumber}\",\"position\":{i}}},");
                            if (i % 50 == 0)
                            {
                                writer.Flush();  //creating another block
                            }
                        }
                        writer.WriteLine($"{{\"chromosome\":\"chr{chromNumber}\",\"position\":{100 *chromNumber +25}}}");
                        writer.Flush();
                        jasixIndex.EndSection(JasixCommons.PositionsSectionTag, writer.Position);

                        writer.Write(NirvanaFooter);
                        jasixIndex.Write(jasixStream);
                    }

            jsonStream.Position  = 0;
            jasixStream.Position = 0;
            return(jsonStream, jasixStream);
        }
Beispiel #7
0
 public IndexCreator(BlockGZipStream readStream, Stream writeStream)
 {
     _reader              = new BgzipTextReader(readStream);
     _writeStream         = writeStream;
     _processedChromosome = new HashSet <string>();
     _chromBenchmark      = new Benchmark();
     _benchmark           = new Benchmark();
 }
Beispiel #8
0
 public void DoubleDispose()
 {
     using (var ms = new MemoryStream())
     {
         var bgzipStream = new BlockGZipStream(ms, CompressionMode.Compress);
         bgzipStream.Dispose();
         bgzipStream.Dispose();
     }
 }
Beispiel #9
0
        public void All_jsons_with_genes()
        {
            var jsonStreams = new Stream[3];
            var jasixSteams = new Stream[3];

            (jsonStreams[0], jasixSteams[0]) = GetJsonStreams(ChromosomeUtilities.Chr1, true);
            (jsonStreams[1], jasixSteams[1]) = GetJsonStreams(ChromosomeUtilities.Chr2, true);
            (jsonStreams[2], jasixSteams[2]) = GetJsonStreams(ChromosomeUtilities.Chr3, true);

            var outStream = new MemoryStream();

            using (var stitcher = new JsonStitcher(jsonStreams, jasixSteams, outStream, true))
            {
                stitcher.Stitch();
            }

            outStream.Position = 0;
            var sb = new StringBuilder();

            using (var bgZipStream = new BlockGZipStream(outStream, CompressionMode.Decompress))
                using (var reader = new StreamReader(bgZipStream))
                {
                    string line;
                    while ((line = reader.ReadLine()) != null)
                    {
                        sb.Append(line + '\n');
                    }
                }

            var fullJson = sb.ToString();

            //making sure all the first and last positions are present in the merged JSON
            Assert.Contains("\"header\":{\"annotator\":\"NirvanaTest\"", fullJson);
            Assert.Contains("{\"chromosome\":\"chr1\",\"position\":100}", fullJson);
            Assert.Contains("{\"chromosome\":\"chr1\",\"position\":122}", fullJson);
            Assert.Contains("{\"chromosome\":\"chr2\",\"position\":200}", fullJson);
            Assert.Contains("{\"chromosome\":\"chr2\",\"position\":222}", fullJson);
            Assert.Contains("{\"chromosome\":\"chr3\",\"position\":300}", fullJson);
            Assert.Contains("{\"chromosome\":\"chr3\",\"position\":322}", fullJson);

            //checking if all the genes are there
            Assert.Contains("gene1A", fullJson);
            Assert.Contains("gene1B", fullJson);
            Assert.Contains("gene2A", fullJson);
            Assert.Contains("gene2B", fullJson);
            Assert.Contains("gene3A", fullJson);
            Assert.Contains("gene3B", fullJson);


            //need to check if this is a valid json
            var jObject = JObject.Parse(fullJson);

            Assert.NotNull(jObject);
        }
        public void BgzipTextWriter_EndToEnd()
        {
            var asterisks         = new string('*', BlockGZipStream.BlockGZipFormatCommon.BlockSize);
            var observedLines     = new List <string>();
            var observedPositions = new List <long>();

            using (var ms = new MemoryStream())
            {
                using (var stream = new BlockGZipStream(ms, CompressionMode.Compress, true))
                    using (var writer = new BgzipTextWriter(stream))
                    {
                        writer.Flush();
                        writer.WriteLine("BOB");
                        writer.WriteLine();
                        writer.Flush();
                        writer.Write("AB");
                        writer.Write("");
                        writer.Write("C");
                        writer.Write(" ");
                        writer.WriteLine("123");
                        writer.WriteLine(asterisks);
                        writer.WriteLine(asterisks);
                        writer.WriteLine(asterisks);
                    }

                ms.Position = 0;

                using (var reader = new BgzipTextReader(new BlockGZipStream(ms, CompressionMode.Decompress)))
                {
                    while (true)
                    {
                        string line = reader.ReadLine();
                        observedPositions.Add(reader.Position);
                        if (line == null)
                        {
                            break;
                        }
                        observedLines.Add(line);
                    }
                }
            }

            Assert.Equal(6, observedLines.Count);
            Assert.Equal("BOB", observedLines[0]);
            Assert.Equal(0, observedLines[1].Length);
            Assert.Equal("ABC 123", observedLines[2]);
            Assert.Equal(asterisks, observedLines[3]);
            Assert.Equal(4, observedPositions[0]);
            Assert.Equal(5, observedPositions[1]);
            Assert.Equal(13, observedPositions[2]);
            Assert.Equal(43843598, observedPositions[3]);
            Assert.Equal(87097359, observedPositions[4]);
        }
Beispiel #11
0
        private static void WriteGenes(ILogger logger, UgaGene[] genes)
        {
            logger.Write($"- writing genes to {Path.GetFileName(ExternalFiles.UniversalGeneFilePath)}... ");

            using (var stream = new BlockGZipStream(FileUtilities.GetCreateStream(ExternalFiles.UniversalGeneFilePath), CompressionMode.Compress))
                using (var writer = new UgaGeneWriter(stream))
                {
                    writer.Write(genes);
                }

            logger.WriteLine("finished");
        }
Beispiel #12
0
        public static byte[] GetCompressedBlock(string s, int compressionLevel = 1)
        {
            using (var stream = new MemoryStream())
            {
                using (var memStream = new BlockGZipStream(stream, CompressionMode.Compress, true))
                    using (var writer = new StreamWriter(memStream))
                    {
                        writer.Write(s);
                    }

                return(stream.ToArray());
            }
        }
Beispiel #13
0
        public void FileIO()
        {
            var    observedDecompressedBuffer = new byte[_expectedDecompressedBuffer.Length];
            string randomPath = RandomPath.GetRandomPath();

            // compress the data
            long observedPosition;

            using (var writer = new BlockGZipStream(FileUtilities.GetCreateStream(randomPath), CompressionMode.Compress))
            {
                writer.Write(_expectedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);
                observedPosition = writer.Position;

                var exception = Record.Exception(() =>
                {
                    var buffer = new byte[10];
                    // ReSharper disable once AccessToDisposedClosure
                    writer.Read(buffer, 0, 1);
                });

                Assert.NotNull(exception);
                Assert.IsType <CompressionException>(exception);
            }

            const long expectedPosition = 979042574;

            Assert.Equal(expectedPosition, observedPosition);

            // decompress the data
            using (var reader = new BlockGZipStream(FileUtilities.GetReadStream(randomPath), CompressionMode.Decompress))
            {
                reader.Read(observedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);

                var exception = Record.Exception(() =>
                {
                    var buffer = new byte[10];
                    // ReSharper disable once AccessToDisposedClosure
                    reader.Write(buffer, 0, 1);
                });

                Assert.NotNull(exception);
                Assert.IsType <CompressionException>(exception);
            }

            Assert.Equal(_expectedDecompressedBuffer, observedDecompressedBuffer);
        }
Beispiel #14
0
        public void StreamIO()
        {
            byte[] observedCompressedBuffer;
            var    observedDecompressedBuffer = new byte[_expectedDecompressedBuffer.Length];

            using (var ms = new MemoryStream())
            {
                // compress the data
                using (var writer = new BlockGZipStream(ms, CompressionMode.Compress, true, 9))
                {
                    Assert.Throws <CompressionException>(delegate
                    {
                        // ReSharper disable once AccessToDisposedClosure
                        writer.Read(observedDecompressedBuffer, 0, 1);
                    });

                    Assert.True(writer.CanWrite);
                    Assert.False(writer.CanRead);
                    Assert.False(writer.CanSeek);

                    writer.Write(_expectedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);
                }

                observedCompressedBuffer = ms.ToArray();
                ms.Seek(0, SeekOrigin.Begin);

                // decompress the data
                using (var reader = new BlockGZipStream(ms, CompressionMode.Decompress))
                {
                    Assert.Throws <CompressionException>(delegate
                    {
                        // ReSharper disable once AccessToDisposedClosure
                        reader.Write(_expectedDecompressedBuffer, 0, 1);
                    });

                    Assert.False(reader.CanWrite);
                    Assert.True(reader.CanRead);
                    Assert.True(reader.CanSeek);

                    reader.Read(observedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);
                }
            }

            Assert.Equal(_expectedDecompressedBuffer, observedDecompressedBuffer);
            Assert.Equal(9629, observedCompressedBuffer.Length);
        }
Beispiel #15
0
 public void WritingTask()
 {
     using (FileStream stream = new FileStream(options.ResultFileName, FileMode.Create))
     {
         using (BlockGZipStream compressingStream = new BlockGZipStream(stream, BlockGZipStreamMode.Compressing))
         {
             while (true)
             {
                 var block = writingQueue.Dequeue();
                 if (block == stopWorkingTask)
                 {
                     break;
                 }
                 compressingStream.WriteBlock(block);
             }
         }
     }
 }
Beispiel #16
0
        private void ReadingTask()
        {
            using (Stream fileStream = new FileStream(options.SourceFileName, FileMode.Open, FileAccess.Read))
            {
                using (BlockGZipStream gzipStream = new BlockGZipStream(fileStream, BlockGZipStreamMode.Decompressing))
                {
                    for (int i = 0; i < gzipStream.Lenght; i++)
                    {
                        readingQueue.Enqueue(gzipStream.ReadBlock(i));
                    }
                }
            }

            for (int i = 0; i < options.ThreadsCount; i++)
            {
                readingQueue.Enqueue(stopWorkingTask);
            }
        }
Beispiel #17
0
        public void NotImplementedMethods()
        {
            using (var ms = new MemoryStream())
            {
                // ReSharper disable AccessToDisposedClosure
                using (var writer = new BlockGZipStream(ms, CompressionMode.Compress, true))
                {
                    Assert.Throws <NotSupportedException>(delegate
                    {
                        // ReSharper disable once UnusedVariable
                        long len = writer.Length;
                    });

                    Assert.Throws <NotSupportedException>(delegate { writer.SetLength(10); });

                    Assert.Throws <NotSupportedException>(delegate { writer.Seek(0, SeekOrigin.Begin); });
                }
                // ReSharper restore AccessToDisposedClosure
            }
        }
        private static AnnotationPosition[] AdjustPartitionGenomicStarts(IReadOnlyList <long> blockBasedOffsets, string vcfUrl,
                                                                         IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var allAdjustedStarts = new AnnotationPosition[blockBasedOffsets.Count];

            for (var index = 0; index < blockBasedOffsets.Count; index++)
            {
                long blockBasedOffset = blockBasedOffsets[index];

                using (var stream = PersistentStreamUtils.GetReadStream(vcfUrl, blockBasedOffset))
                    using (var gzipStream = new BlockGZipStream(stream, CompressionMode.Decompress))
                    {
                        var annotationPosition = GetFirstGenomicPosition(gzipStream, index == 0);
                        allAdjustedStarts[index] = FindProperStartPosition(annotationPosition, geneIntervalForest, refNameToChromosome);
                    }
            }

            AnnotationPosition[] adjustedStarts = MergeConsecutiveEqualValues(allAdjustedStarts).ToArray();
            return(adjustedStarts);
        }
        public void ReadBlockCorrupted()
        {
            using (var ms = new MemoryStream())
                using (var truncatedMs = new MemoryStream())
                    using (var corruptMs = new MemoryStream())
                    {
                        using (var bgzipStream = new BlockGZipStream(ms, CompressionMode.Compress, true))
                            using (var writer = new StreamWriter(bgzipStream, Encoding.ASCII, 4096))
                            {
                                writer.WriteLine("The quick brown fox jumped over the lazy dog.");
                            }

                        var compressedData = ms.ToArray();

                        truncatedMs.Write(compressedData, 0, compressedData.Length - 10);
                        truncatedMs.Position = 0;

                        corruptMs.Write(compressedData, 0, BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength);
                        var randomBytes = BlockStreamTests.GetRandomBytes(500);
                        corruptMs.Write(randomBytes, 0, randomBytes.Length);
                        corruptMs.Position = 0;

                        var readBuffer = new byte[60];

                        Assert.Throws <CompressionException>(delegate
                        {
                            using (var bgzipStream = new BlockGZipStream(truncatedMs, CompressionMode.Decompress))
                            {
                                bgzipStream.Read(readBuffer, 0, readBuffer.Length);
                            }
                        });

                        Assert.Throws <CompressionException>(delegate
                        {
                            using (var bgzipStream = new BlockGZipStream(corruptMs, CompressionMode.Decompress))
                            {
                                bgzipStream.Read(readBuffer, 0, readBuffer.Length);
                            }
                        });
                    }
        }
Beispiel #20
0
        public void GetHeaderOnly()
        {
            var readStream  = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("Clinvar20150901.json.gz")), CompressionMode.Decompress);
            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath("Clinvar20150901.json.gz.jsi"));

            var outStream = new MemoryStream();

            using (var writer = new StreamWriter(outStream, Encoding.UTF8, 512, true))
                using (var qp = new QueryProcessor(new StreamReader(readStream), indexStream, writer))
                {
                    qp.PrintHeaderOnly();
                }

            Assert.NotEqual(0, outStream.Length);
            outStream.Position = 0;
            using (var reader = new StreamReader(outStream))
            {
                string headerLine = reader.ReadToEnd();
                Assert.Equal("{\"header\":{\"annotator\":\"Nirvana 2.0.9.0\",\"creationTime\":\"2018-04-30 17:17:23\",\"genomeAssembly\":\"GRCh37\",\"schemaVersion\":6,\"dataVersion\":\"91.26.45\",\"dataSources\":[{\"name\":\"VEP\",\"version\":\"91\",\"description\":\"Ensembl\",\"releaseDate\":\"2018-03-05\"},{\"name\":\"ClinVar\",\"version\":\"20180129\",\"description\":\"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\",\"releaseDate\":\"2018-01-29\"},{\"name\":\"COSMIC\",\"version\":\"84\",\"description\":\"somatic mutation and related details and information relating to human cancers\",\"releaseDate\":\"2018-02-13\"},{\"name\":\"dbSNP\",\"version\":\"150\",\"description\":\"Identifiers for observed variants\",\"releaseDate\":\"2017-04-03\"},{\"name\":\"gnomAD_exome\",\"version\":\"2.0.2\",\"description\":\"Exome allele frequencies from Genome Aggregation Database (gnomAD)\",\"releaseDate\":\"2017-10-05\"},{\"name\":\"gnomAD\",\"version\":\"2.0.2\",\"description\":\"Whole genome allele frequencies from Genome Aggregation Database (gnomAD)\",\"releaseDate\":\"2017-10-05\"},{\"name\":\"MITOMAP\",\"version\":\"20180228\",\"description\":\"Small variants in the MITOMAP human mitochondrial genome database\",\"releaseDate\":\"2018-02-28\"},{\"name\":\"1000 Genomes Project\",\"version\":\"Phase 3 v5a\",\"description\":\"A public catalogue of human variation and genotype data\",\"releaseDate\":\"2013-05-27\"},{\"name\":\"TOPMed\",\"version\":\"freeze_5\",\"description\":\"Allele frequencies from TOPMed data lifted over using dbSNP ids.\",\"releaseDate\":\"2017-08-28\"},{\"name\":\"ClinGen\",\"version\":\"20160414\",\"releaseDate\":\"2016-04-14\"},{\"name\":\"DGV\",\"version\":\"20160515\",\"description\":\"Provides a comprehensive summary of structural variation in the human genome\",\"releaseDate\":\"2016-05-15\"},{\"name\":\"MITOMAP\",\"version\":\"20180228\",\"description\":\"Large structural variants in the MITOMAP human mitochondrial genome database\",\"releaseDate\":\"2018-02-28\"},{\"name\":\"ExAC\",\"version\":\"0.3.1\",\"description\":\"Gene scores from the ExAC project\",\"releaseDate\":\"2016-03-16\"},{\"name\":\"OMIM\",\"version\":\"20180213\",\"description\":\"An Online Catalog of Human Genes and Genetic Disorders\",\"releaseDate\":\"2018-02-13\"},{\"name\":\"phyloP\",\"version\":\"hg19\",\"description\":\"46 way conservation score between humans and 45 other vertebrates\",\"releaseDate\":\"2009-11-10\"}]}}\r\n", headerLine);
            }
        }
Beispiel #21
0
        public void GetChromosomeList()
        {
            var readStream  = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("Clinvar20150901.json.gz")), CompressionMode.Decompress);
            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath("Clinvar20150901.json.gz.jsi"));

            var outStream = new MemoryStream();

            using (var writer = new StreamWriter(outStream, Encoding.UTF8, 512, true))
                using (var qp = new QueryProcessor(new StreamReader(readStream), indexStream, writer))
                {
                    qp.PrintChromosomeList();
                }

            Assert.NotEqual(0, outStream.Length);
            outStream.Position = 0;
            using (var reader = new StreamReader(outStream))
            {
                string chromList = reader.ReadToEnd();
                Assert.Equal("1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n7\r\n8\r\n9\r\n10\r\n11\r\n12\r\n13\r\n14\r\n15\r\n16\r\n17\r\n18\r\n19\r\n20\r\n21\r\nX\r\nY\r\n", chromList);
            }
        }
Beispiel #22
0
        private static Stream GetAppropriateStream(Stream stream, CompressionAlgorithm compressionAlgorithm)
        {
            Stream newStream;

            switch (compressionAlgorithm)
            {
            case CompressionAlgorithm.BlockGZip:
                newStream = new BlockGZipStream(stream, CompressionMode.Decompress);
                break;

            case CompressionAlgorithm.GZip:
                newStream = new GZipStream(stream, CompressionMode.Decompress);
                break;

            default:
                newStream = stream;
                break;
            }

            return(newStream);
        }
Beispiel #23
0
 private void WriteGeneBlocks(Stream stream)
 {
     using (var bGzipStream = new BlockGZipStream(stream, CompressionMode.Compress, _leaveOutStreamOpen))
         using (var writer = new StreamWriter(bGzipStream))
         {
             var count = _geneLines.Count;
             if (count == 0)
             {
                 writer.WriteLine(FooterLine);
                 return;
             }
             writer.WriteLine(GeneHeaderLine);
             var i = 0;
             foreach (var geneLine in _geneLines.OrderBy(x => x))
             {
                 i++;
                 //the last gene line shouldn't have a comma at the end
                 writer.WriteLine(i == count ? geneLine.TrimEnd(',') : geneLine);
             }
             writer.WriteLine(FooterLine);
         }
 }
Beispiel #24
0
        private static Stream GetAppropriateStream(Stream stream, CompressionAlgorithm compressionAlgorithm)
        {
            Stream newStream;

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (compressionAlgorithm)
            {
            case CompressionAlgorithm.BlockGZip:
                newStream = new BlockGZipStream(stream, CompressionMode.Decompress);
                break;

            case CompressionAlgorithm.GZip:
                newStream = new GZipStream(stream, CompressionMode.Decompress);
                break;

            default:
                newStream = stream;
                break;
            }

            return(newStream);
        }
Beispiel #25
0
        private void ConfigureResourceUtilizationOptions()
        {
            availableRamBytes = availableRamBytes / 2;
            if (CommandName == "compress")
            {
                ReadingBlockSizeBytes = availableRamBytes / ThreadsCount;
                int maxBlockSizeBytes = 1 * 1024 * 1024;
                ReadingBlockSizeBytes = ReadingBlockSizeBytes > maxBlockSizeBytes ? maxBlockSizeBytes : ReadingBlockSizeBytes;
            }
            else
            {
                using (Stream fileStream = new FileStream(SourceFileName, FileMode.Open, FileAccess.Read))
                {
                    using (BlockGZipStream gzipStream = new BlockGZipStream(fileStream, BlockGZipStreamMode.Decompressing))
                    {
                        ReadingBlockSizeBytes = gzipStream.GetMaxStoredBlockSizeBytes();
                    }
                }
            }

            if (availableRamBytes < ReadingBlockSizeBytes)
            {
                BadOption("source-filename", "Sorry. You don't have enough ram.");
            }

            if (availableRamBytes / ReadingBlockSizeBytes < ThreadsCount)
            {
                ThreadsCount = availableRamBytes / ReadingBlockSizeBytes;
            }

            var taskByThread = availableRamBytes / ReadingBlockSizeBytes / ThreadsCount;

            if (taskByThread > TasksByTreadByDefault)
            {
                taskByThread = TasksByTreadByDefault;
            }
            TaskCount = taskByThread * ThreadsCount;
        }
Beispiel #26
0
        public void TestIndexCreation()
        {
            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("cosmicv72.indels.json.gz")), CompressionMode.Decompress);
            var tempFile   = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var indexCreator = new IndexCreator(readStream, FileUtilities.GetCreateStream(tempFile)))
            {
                indexCreator.CreateIndex();
            }
            JasixIndex readBackIndex;

            using (var stream = FileUtilities.GetReadStream(tempFile))
            {
                readBackIndex = new JasixIndex(stream);
            }

            Assert.Equal(1591, readBackIndex.GetFirstVariantPosition("chr1", 9775924, 9775924));
            Assert.Equal(11500956299, readBackIndex.GetFirstVariantPosition("chr2", 16081096, 16081096));
            Assert.Equal(372100991296, readBackIndex.GetFirstVariantPosition("chr20", 36026164, 36026164));
            Assert.Equal(377682846863, readBackIndex.GetFirstVariantPosition("chrX", 66765044, 66765044));

            File.Delete(tempFile);
        }
Beispiel #27
0
        public void InvalidHeader()
        {
            const string dummyString = "The quick brown fox jumped over the lazy dog.";

            using (var ms = new MemoryStream())
                using (var truncatedMs = new MemoryStream())
                {
                    using (var writer = new StreamWriter(ms, Encoding.ASCII, 4096, true))
                    {
                        writer.WriteLine(dummyString);
                    }

                    var observedCompressedBuffer = ms.ToArray();
                    truncatedMs.Write(ms.ToArray(), 0, 17);

                    ms.Seek(0, SeekOrigin.Begin);
                    truncatedMs.Seek(0, SeekOrigin.Begin);

                    // attempt to decompress the data
                    Assert.Throws <CompressionException>(delegate
                    {
                        using (var reader = new BlockGZipStream(ms, CompressionMode.Decompress, true))
                        {
                            reader.Read(observedCompressedBuffer, 0, observedCompressedBuffer.Length);
                        }
                    });

                    Assert.Throws <CompressionException>(delegate
                    {
                        using (var reader = new BlockGZipStream(truncatedMs, CompressionMode.Decompress, true))
                        {
                            reader.Read(observedCompressedBuffer, 0, observedCompressedBuffer.Length);
                        }
                    });
                }
        }
Beispiel #28
0
        // ReSharper disable once UnusedMember.Global
        public AnnotationResult Run(AnnotationConfig config, ILambdaContext context)
        {
            var result = new AnnotationResult {
                id = config.id
            };
            string snsTopicArn = null;
            var    runLog      = new StringBuilder();

            try
            {
                LogUtilities.UpdateLogger(context.Logger, runLog);
                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);
                LogUtilities.LogObject("Config", config);
                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });

                LambdaUtilities.GarbageCollect();
                LambdaUtilities.DeleteTempOutput();

                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);

                string vcfUrl = config.vcfUrl;

                using (var annotationResources = GetAnnotationResources(config))
                {
                    if (annotationResources.InputStartVirtualPosition == -1)
                    {
                        return(GetSuccessOutput(result));
                    }

                    long fileOffset = VirtualPosition.From(annotationResources.InputStartVirtualPosition).FileOffset;

                    using (var preloadVcfStream = PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset))
                    {
                        annotationResources.GetVariantPositions(new BlockGZipStream(preloadVcfStream, CompressionMode.Decompress), config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));
                    }

                    Logger.WriteLine("Scan for positions to preload complete.");

                    using (var aes = new AesCryptoServiceProvider())
                    {
                        FileMetadata jsonMetadata, jasixMetadata;
                        string       jsonPath  = Path.GetTempPath() + LambdaUrlHelper.JsonSuffix;
                        string       jasixPath = jsonPath + LambdaUrlHelper.JsonIndexSuffix;

                        using (var inputVcfStream = new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset), CompressionMode.Decompress))
                            using (var headerStream = config.annotationRange == null ? null : new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl), CompressionMode.Decompress))
                                //
                                using (var jsonFileStream = FileUtilities.GetCreateStream(jsonPath))
                                    using (var jsonCryptoStream = new CryptoStream(jsonFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                        using (var jsonMd5Stream = new MD5Stream(jsonCryptoStream))
                                            //
                                            using (var jasixFileStream = FileUtilities.GetCreateStream(jasixPath))
                                                using (var jasixCryptoStream = new CryptoStream(jasixFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))
                                                    using (var jasixMd5Stream = new MD5Stream(jasixCryptoStream))
                                                    {
                                                        IVcfFilter vcfFilter = config.annotationRange == null
                                ? new NullVcfFilter() as IVcfFilter
                                : new VcfFilter(config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));

                                                        using (var jsonCompressStream = new BlockGZipStream(jsonMd5Stream, CompressionMode.Compress))
                                                        {
                                                            StreamAnnotation.Annotate(headerStream, inputVcfStream, jsonCompressStream, jasixMd5Stream, annotationResources, vcfFilter, true);
                                                        }

                                                        Logger.WriteLine("Annotation done.");

                                                        jsonMetadata  = jsonMd5Stream.GetFileMetadata();
                                                        jasixMetadata = jasixMd5Stream.GetFileMetadata();
                                                    }

                        result.filePath = S3Utilities.GetKey(config.outputDir.path, config.outputPrefix + LambdaUrlHelper.JsonSuffix);
                        string jasixKey = result.filePath + LambdaUrlHelper.JsonIndexSuffix;

                        var s3Client = config.outputDir.GetS3Client(context.RemainingTime);
                        s3Client.DecryptUpload(config.outputDir.bucketName, jasixKey, jasixPath, aes, jasixMetadata);
                        s3Client.DecryptUpload(config.outputDir.bucketName, result.filePath, jsonPath, aes, jsonMetadata);

                        Logger.WriteLine("Nirvana result files uploaded.");
                    }
                }

                LambdaUtilities.DeleteTempOutput();
                if (string.IsNullOrEmpty(result.filePath))
                {
                    throw new FileNotFoundException();
                }

                return(GetSuccessOutput(result));
            }
            catch (Exception exception)
            {
                LambdaUtilities.DeleteTempOutput();
                return(HandleException(runLog, result, exception, snsTopicArn));
            }
        }