public void Gzip_Seek() { // Verify behavior of a compression stream using (GzipWriter stream = new GzipWriter(new MemoryStream())) { try { stream.Seek(50, SeekOrigin.Begin); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } try { stream.Seek(0, SeekOrigin.Current); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } try { stream.Seek(-50, SeekOrigin.End); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } // Verify behavior of a decompression stream using (GzipReader stream = new GzipReader(new MemoryStream(s_sampledata))) { try { stream.Seek(50, SeekOrigin.Begin); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } try { stream.Seek(0, SeekOrigin.Current); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } try { stream.Seek(-50, SeekOrigin.End); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } }
public void Gzip_CompressDecompress() { // Start with a MemoryStream created from the sample data using (MemoryStream source = new MemoryStream(s_sampledata)) { using (MemoryStream dest = new MemoryStream()) { // Compress the data into the destination memory stream instance using (GzipWriter compressor = new GzipWriter(dest, CompressionLevel.Optimal, true)) source.CopyTo(compressor); // The compressed data should be smaller than the source data Assert.IsTrue(dest.Length < source.Length); source.SetLength(0); // Clear the source stream dest.Position = 0; // Reset the destination stream // Decompress the data back into the source memory stream using (GzipReader decompressor = new GzipReader(dest, true)) decompressor.CopyTo(source); // Ensure that the original data has been restored Assert.AreEqual(source.Length, s_sampledata.Length); Assert.IsTrue(s_sampledata.SequenceEqual(source.ToArray())); } } }
public void Gzip_Flush() { // Verify behavior of flushing a compression stream using (MemoryStream compressed = new MemoryStream()) { using (GzipWriter stream = new GzipWriter(compressed, true)) { stream.Write(s_sampledata, 0, s_sampledata.Length); // Get the unflushed length of the compressed stream and flush it long unflushed = compressed.Length; stream.Flush(); // The expectation is that the output stream will be longer after the flush long flushedonce = compressed.Length; Assert.IsTrue(compressed.Length > unflushed); // Flushing the same data a second time should not have any impact at all stream.Flush(); Assert.AreEqual(compressed.Length, flushedonce); // The stream should still be writable after a flush operation stream.Write(s_sampledata, 0, s_sampledata.Length / 10); } } // Verify behavior of flushing a decompression stream using (GzipReader stream = new GzipReader(new MemoryStream(s_sampledata))) { // Flush has no effect on decompression streams, just ensure it doesn't throw stream.Flush(); } }
private static void GetWeightedAverageBinCount(IEnumerable <string> binnedPaths, string mergedBinnedPath, NexteraManifest manifest = null) { int sampleCount = binnedPaths.Count(); if (sampleCount == 1) // copy file { if (File.Exists(binnedPaths.First())) { if (File.Exists(mergedBinnedPath)) { File.Delete(mergedBinnedPath); } File.Copy(binnedPaths.First(), mergedBinnedPath); } } else // merge normal samples { double[] weights = new double[sampleCount]; List <double>[] binCountsBySample = new List <double> [sampleCount]; for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) { string binnedPath = binnedPaths.ElementAt(sampleIndex); var binCounts = new BinCounts(binnedPath, manifest: manifest); List <double> counts = binCounts.AllCounts; // If a manifest is available, get the median of bins overlapping the targeted regions only. // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero. double median = binCounts.OnTargetMedianBinCount; weights[sampleIndex] = median > 0 ? 1.0 / median : 0; binCountsBySample[sampleIndex] = counts; } double weightSum = weights.Sum(); for (int i = 0; i < sampleCount; i++) { weights[i] /= weightSum; } // so weights sum to 1 // Computed weighted average of bin counts across samples using (GzipReader reader = new GzipReader(binnedPaths.First())) using (GzipWriter writer = new GzipWriter(mergedBinnedPath)) { string line; string[] toks; int lineIdx = 0; while ((line = reader.ReadLine()) != null) { toks = line.Split('\t'); double weightedBinCount = 0; for (int i = 0; i < sampleCount; i++) { weightedBinCount += weights[i] * binCountsBySample[i][lineIdx]; } toks[3] = String.Format("{0}", weightedBinCount); writer.WriteLine(String.Join("\t", toks)); lineIdx++; } } } }
public void Gzip_CompressExternal() { // This method generates an output file that can be tested externally; "thethreemusketeers.txt" is // set to Copy Always to the output directory, it can be diffed after running the external tool using (GzipWriter writer = new GzipWriter(File.Create(Path.Combine(Environment.CurrentDirectory, "thethreemusketeers.gz")))) { writer.Write(s_sampledata); writer.Flush(); } }
public static void WriteToTextFile(string outfile, List <GenomicBin> bins) { using (GzipWriter writer = new GzipWriter(outfile)) { foreach (GenomicBin bin in bins) { writer.WriteLine(string.Format("{0}\t{1}\t{2}\t{3:F2}\t{4}", bin.Chromosome, bin.Start, bin.Stop, bin.Count, bin.GC)); } } }
public static int Write(GzipWriter /*!*/ self, [NotNull] MutableString /*!*/ val) { var stream = self.GetStream(); // TODO: this can be optimized (add MutableString.WriteTo(Stream)) var buffer = val.ToByteArray(); stream.Write(buffer, 0, buffer.Length); return(buffer.Length); }
public void Gzip_CompressionLevel() { using (MemoryStream source = new MemoryStream(s_sampledata)) { long none, fastest, optimal; // Size of compressed streams // Compress using CompressionLevel.NoCompression using (MemoryStream compressed = new MemoryStream()) { using (GzipWriter compressor = new GzipWriter(compressed, CompressionLevel.NoCompression)) { compressor.Write(s_sampledata, 0, s_sampledata.Length); compressor.Flush(); none = compressed.Length; } } // Compress using CompressionLevel.Fastest using (MemoryStream compressed = new MemoryStream()) { using (GzipWriter compressor = new GzipWriter(compressed, CompressionLevel.Fastest)) { compressor.Write(s_sampledata, 0, s_sampledata.Length); compressor.Flush(); fastest = compressed.Length; } } // Compress using CompressionLevel.Optimal using (MemoryStream compressed = new MemoryStream()) { using (GzipWriter compressor = new GzipWriter(compressed, CompressionLevel.Optimal)) { compressor.Write(s_sampledata, 0, s_sampledata.Length); compressor.Flush(); optimal = compressed.Length; } } // Fastest should produce better results than no compression Assert.IsTrue(fastest < none); // Optimal should produce better results than fastest Assert.IsTrue(optimal < fastest); // Fastest should be smaller than the original length Assert.IsTrue(fastest < source.Length); // No compression should be BIGGER than the original length Assert.IsTrue(source.Length < none); } }
public void WriteCanvasPartitionResults(string outPath, GenomeSegmentationResults segmentationResults) { Dictionary <string, bool> starts = new Dictionary <string, bool>(); Dictionary <string, bool> stops = new Dictionary <string, bool>(); foreach (string chr in segmentationResults.SegmentByChr.Keys) { for (int segmentIndex = 0; segmentIndex < segmentationResults.SegmentByChr[chr].Length; segmentIndex++) { Segmentation.Segment segment = segmentationResults.SegmentByChr[chr][segmentIndex]; starts[chr + ":" + segment.start] = true; stops[chr + ":" + segment.end] = true; } } Dictionary <string, List <SampleGenomicBin> > excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >(); if (!string.IsNullOrEmpty(ForbiddenIntervalBedPath)) { excludedIntervals = CanvasCommon.Utilities.LoadBedFile(ForbiddenIntervalBedPath); } using (GzipWriter writer = new GzipWriter(outPath)) { int segmentNum = -1; foreach (string chr in StartByChr.Keys) { List <SampleGenomicBin> excludeIntervals = null; if (excludedIntervals.ContainsKey(chr)) { excludeIntervals = excludedIntervals[chr]; } int excludeIndex = 0; // Points to the first interval which *doesn't* end before our current position uint previousBinEnd = 0; for (int pos = 0; pos < StartByChr[chr].Length; pos++) { uint start = StartByChr[chr][pos]; uint end = EndByChr[chr][pos]; string key = chr + ":" + start; bool newSegment = IsNewSegment(starts, key, excludeIntervals, previousBinEnd, end, start, ref excludeIndex); if (newSegment) { segmentNum++; } writer.WriteLine(string.Format($"{chr}\t{start}\t{end}\t{ScoreByChr[chr][pos]}\t{segmentNum}")); previousBinEnd = end; } } } }
public void Gzip_CanWrite() { // Verify behavior of a compression stream using (GzipWriter stream = new GzipWriter(new MemoryStream())) { Assert.IsTrue(stream.CanWrite); } // Verify behavior of a decompression stream using (GzipReader stream = new GzipReader(new MemoryStream(s_sampledata))) { Assert.IsFalse(stream.CanWrite); } }
public void Gzip_SetLength() { // Verify behavior of a compression stream using (GzipWriter stream = new GzipWriter(new MemoryStream())) { try { stream.SetLength(12345L); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } // Verify behavior of a decompression stream using (GzipReader stream = new GzipReader(new MemoryStream(s_sampledata))) { try { stream.SetLength(12345L); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } }
public void Gzip_BaseStream() { using (MemoryStream source = new MemoryStream()) { using (GzipWriter stream = new GzipWriter(source, CompressionLevel.Optimal)) { Assert.IsNotNull(stream.BaseStream); Assert.AreSame(source, stream.BaseStream); } using (GzipReader stream = new GzipReader(source, true)) { Assert.IsNotNull(stream.BaseStream); Assert.AreSame(source, stream.BaseStream); } } }
public void Gzip_GzipException() { using (MemoryStream compressed = new MemoryStream()) { // Start with a compressed MemoryStream created from the sample data using (GzipWriter compressor = new GzipWriter(compressed, CompressionLevel.Optimal, true)) { compressor.Write(s_sampledata, 0, s_sampledata.Length); compressor.Flush(); } byte[] buffer = new byte[8192]; GzipException thrown = null; GzipException deserialized = null; // Create a decompressor to test exception cases using (GzipReader decompressor = new GzipReader(compressed, true)) { // Attempting to read from the middle of the compressed stream should throw a GzipException compressed.Position = compressed.Length / 2; try { decompressor.Read(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (GzipException ex) { thrown = ex; } Assert.IsNotNull(thrown); Assert.IsInstanceOfType(thrown, typeof(GzipException)); // Check the error code property Assert.AreEqual(-3, thrown.ErrorCode); // Z_DATA_ERROR (-3) // Serialize and de-serialize the exception with a BinaryFormatter BinaryFormatter formatter = new BinaryFormatter(); using (MemoryStream memstream = new MemoryStream()) { formatter.Serialize(memstream, thrown); memstream.Seek(0, 0); deserialized = (GzipException)formatter.Deserialize(memstream); } // Check that the exceptions are equivalent Assert.AreEqual(thrown.ErrorCode, deserialized.ErrorCode); Assert.AreEqual(thrown.StackTrace, deserialized.StackTrace); Assert.AreEqual(thrown.ToString(), deserialized.ToString()); } } }
public void WriteCanvasPartitionResults(string outPath, Dictionary <string, List <SegmentWithBins> > segmentsByChromosome) { using (var writer = new GzipWriter(outPath)) { foreach (var chr in segmentsByChromosome.Keys) { var segments = segmentsByChromosome[chr]; foreach (var segment in segments) { foreach (var bin in segment.Bins) { writer.WriteLine(string.Format($"{chr}\t{bin.Start}\t{bin.End}\t{bin.Coverage}\t{segment.Identifier}")); } } } } }
public void Gzip_Write() { byte[] buffer = new byte[8192]; // 8KiB data buffer // Compress the sample data using a call to Write directly using (MemoryStream compressed = new MemoryStream()) { // Check the constructor for ArgumentNullException while we're here try { using (GzipWriter compressor = new GzipWriter(null)) { }; Assert.Fail("Constructor should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } using (GzipWriter compressor = new GzipWriter(compressed, CompressionLevel.Optimal, true)) { // Send in some bum arguments to Write() to check they are caught try { compressor.Write(null); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { compressor.Write(null, 0, 0); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { compressor.Write(s_sampledata, -1, 0); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } try { compressor.Write(s_sampledata, 0, -1); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } try { compressor.Write(s_sampledata, 0, s_sampledata.Length + 1024); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentException)); } // Not writing anything shouldn't throw an exception compressor.Write(s_sampledata, 0, 0); // Compress the data; there really isn't much that can go wrong with Write() itself compressor.Write(s_sampledata, 0, s_sampledata.Length); compressor.Flush(); } using (GzipReader reader = new GzipReader(compressed, true)) { try { reader.Write(buffer, 0, buffer.Length); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } } }
private static void GetBinRatio(string tumorBinnedPath, string normalBinnedPath, string ratioBinnedPath, string ploidyBedPath, NexteraManifest manifest = null) { PloidyInfo referencePloidy = String.IsNullOrEmpty(ploidyBedPath) ? null : PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath); double tumorMedian = (new BinCounts(tumorBinnedPath, manifest: manifest)).OnTargetMedianBinCount; double normalMedian = (new BinCounts(normalBinnedPath, manifest: manifest)).OnTargetMedianBinCount; double librarySizeFactor = (tumorMedian > 0 && normalMedian > 0) ? normalMedian / tumorMedian : 1; using (GzipReader tumorReader = new GzipReader(tumorBinnedPath)) using (GzipReader normalReader = new GzipReader(normalBinnedPath)) using (GzipWriter writer = new GzipWriter(ratioBinnedPath)) { string normalLine; string tumorLine; string[] normalToks; string[] tumorToks; double normalCount; double tumorCount; double ratio; while ((normalLine = normalReader.ReadLine()) != null) { tumorLine = tumorReader.ReadLine(); normalToks = normalLine.Split('\t'); tumorToks = tumorLine.Split('\t'); normalCount = double.Parse(normalToks[3]); tumorCount = double.Parse(tumorToks[3]); // The weighted average count of a bin could be less than 1. // Using these small counts for coverage normalization creates large ratios. // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling. if (normalCount < 1) { continue; } // skip the bin string chrom = normalToks[0]; int start = int.Parse(normalToks[1]); int end = int.Parse(normalToks[2]); // get the normal ploidy from intervalsWithPloidyByChrom double factor = CanvasDiploidBinRatioFactor * GetPloidy(referencePloidy, chrom, start, end) / 2.0; ratio = tumorCount / normalCount * factor * librarySizeFactor; normalToks[3] = String.Format("{0}", ratio); writer.WriteLine(String.Join("\t", normalToks)); } } }
/// <summary> /// Step 3: Summarize results to a simple tab-delimited file. /// </summary> protected void WriteResults(string outputPath) { using (GzipWriter writer = new GzipWriter(outputPath)) { writer.WriteLine("#Chromosome\tPosition\tRef\tAlt\tCountRef\tCountAlt"); for (int index = 0; index < this.Variants.Count; index++) { VcfVariant variant = this.Variants[index]; // skip HOM REF positions if (this.VariantCounts[index] > 5) { writer.WriteLine(string.Format("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", variant.ReferenceName, variant.ReferencePosition, variant.ReferenceAllele, variant.VariantAlleles[0], this.ReferenceCounts[index], this.VariantCounts[index])); } } } Console.WriteLine("{0} Results written to {1}", DateTime.Now, outputPath); }
public void Gzip_Position() { // Start with a MemoryStream created from the sample data using (MemoryStream source = new MemoryStream(s_sampledata)) { using (MemoryStream dest = new MemoryStream()) { // Test a compression stream using (GzipWriter compressor = new GzipWriter(dest, CompressionLevel.Optimal, true)) { // The stream should report position zero prior to compression Assert.AreEqual(0L, compressor.Position); source.CopyTo(compressor); // The stream should report non-zero after compression Assert.AreNotEqual(0L, compressor.Position); // Attempting to set the position on the stream should throw try { compressor.Position = 12345L; Assert.Fail("Property should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } source.SetLength(0); // Clear the source stream dest.Position = 0; // Reset the destination stream // Test a decompression stream using (GzipReader decompressor = new GzipReader(dest, true)) { // The stream should report position zero prior to compression Assert.AreEqual(0L, decompressor.Position); decompressor.CopyTo(source); // The stream should report non-zero after compression Assert.AreNotEqual(0L, decompressor.Position); // Attempting to set the position on the stream should throw try { decompressor.Position = 12345L; Assert.Fail("Property should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } } } } }
public static int Write(ConversionStorage <MutableString> /*!*/ tosConversion, GzipWriter /*!*/ self, object obj) { return(Write(self, Protocols.ConvertToString(tosConversion, obj))); }
public static int Write(GzipWriter/*!*/ self, [NotNull]MutableString/*!*/ val) { var stream = self.GetStream(); // TODO: this can be optimized (add MutableString.WriteTo(Stream)) var buffer = val.ToByteArray(); stream.Write(buffer, 0, buffer.Length); return buffer.Length; }
private void WriteCanvasPartitionResults(string outPath) { Dictionary <string, bool> starts = new Dictionary <string, bool>(); Dictionary <string, bool> stops = new Dictionary <string, bool>(); foreach (string chr in SegmentationResults.SegmentByChr.Keys) { for (int segmentIndex = 0; segmentIndex < SegmentationResults.SegmentByChr[chr].Length; segmentIndex++) { Segment segment = SegmentationResults.SegmentByChr[chr][segmentIndex]; starts[chr + ":" + segment.start] = true; stops[chr + ":" + segment.end] = true; } } Dictionary <string, List <GenomicBin> > ExcludedIntervals = new Dictionary <string, List <GenomicBin> >(); if (!string.IsNullOrEmpty(ForbiddenIntervalBedPath)) { ExcludedIntervals = CanvasCommon.Utilities.LoadBedFile(ForbiddenIntervalBedPath); } using (GzipWriter writer = new GzipWriter(outPath)) { int segmentNum = -1; foreach (string chr in StartByChr.Keys) { List <GenomicBin> excludeIntervals = null; if (ExcludedIntervals.ContainsKey(chr)) { excludeIntervals = ExcludedIntervals[chr]; } int excludeIndex = 0; // Points to the first interval which *doesn't* end before our current position uint previousBinEnd = 0; for (int pos = 0; pos < StartByChr[chr].Length; pos++) { uint start = StartByChr[chr][pos]; uint end = EndByChr[chr][pos]; bool newSegment = false; string key = chr + ":" + start; if (starts.ContainsKey(key)) { newSegment = true; } if (excludeIntervals != null) { while (excludeIndex < excludeIntervals.Count && excludeIntervals[excludeIndex].Stop < previousBinEnd) { excludeIndex++; } if (excludeIndex < excludeIntervals.Count) { // Note: forbiddenZoneMid should never fall inside a bin, becuase these intervals were already excluded // from consideration during the call to CanvasBin. int forbiddenZoneMid = (excludeIntervals[excludeIndex].Start + excludeIntervals[excludeIndex].Stop) / 2; if (previousBinEnd < forbiddenZoneMid && end >= forbiddenZoneMid) { newSegment = true; } } } if (previousBinEnd > 0 && MaxInterBinDistInSegment >= 0 && previousBinEnd + MaxInterBinDistInSegment < start && !newSegment) { newSegment = true; } if (newSegment) { segmentNum++; } writer.WriteLine(string.Format("{0}\t{1}\t{2}\t{3}\t{4}", chr, start, end, ScoreByChr[chr][pos], segmentNum)); previousBinEnd = end; } } } }
public static long Pos(GzipWriter/*!*/ self) { return self.GetWrapper().Position; }
public static int Write(ConversionStorage<MutableString>/*!*/ tosConversion, GzipWriter/*!*/ self, object obj) { return Write(self, Protocols.ConvertToString(tosConversion, obj)); }
public static GzipWriter Flush(GzipWriter/*!*/ self, [DefaultParameterValue(SYNC_FLUSH)]int flush) { return self; }
public static GzipWriter Output(ConversionStorage<MutableString>/*!*/ tosConversion, GzipWriter/*!*/ self, object value) { Write(tosConversion, self, value); return self; }
public static GzipWriter /*!*/ Flush(GzipWriter /*!*/ self, [DefaultParameterValue(SYNC_FLUSH)] int flush) { return(self); }
/// <summary> /// Computes fragment-based GC normalization correction factor /// </summary> /// <returns>An array of observed vs expected GC counts.</returns> static float[] ComputeObservedVsExpectedGC(Dictionary <string, HitArray> observedAlignments, Dictionary <string, byte[]> readGCContent, NexteraManifest manifest, bool debugGC, string outFile) { Dictionary <string, List <NexteraManifest.ManifestRegion> > regionsByChrom = null; if (manifest != null) { regionsByChrom = manifest.GetManifestRegionsByChromosome(); } long[] expectedReadCountsByGC = new long[numberOfGCbins]; long[] observedReadCountsByGC = new long[numberOfGCbins]; foreach (KeyValuePair <string, byte[]> chromosomeReadGCContent in readGCContent) { string chr = chromosomeReadGCContent.Key; if (!observedAlignments.ContainsKey(chr)) { continue; } if (manifest == null) // look at the entire genome { for (int i = 0; i < chromosomeReadGCContent.Value.Length; i++) { expectedReadCountsByGC[chromosomeReadGCContent.Value[i]]++; observedReadCountsByGC[chromosomeReadGCContent.Value[i]] += observedAlignments[chr].Data[i]; } } else // look at only the targeted regions { if (!regionsByChrom.ContainsKey(chr)) { continue; } int i = -1; foreach (var region in regionsByChrom[chr]) { if (i < region.Start) // avoid overlapping targeted regions { i = region.Start - 1; // i is 0-based; manifest coordinates are 1-based. } for (; i < chromosomeReadGCContent.Value.Length && i < region.End; i++) { expectedReadCountsByGC[chromosomeReadGCContent.Value[i]]++; observedReadCountsByGC[chromosomeReadGCContent.Value[i]] += observedAlignments[chr].Data[i]; } } } } // calculate ratio of observed to expected read counts for each read GC bin float[] observedVsExpectedGC = new float[numberOfGCbins]; for (int i = 0; i < numberOfGCbins; i++) { observedVsExpectedGC[i] = 1; } long sumObserved = 0; long sumExpected = 0; foreach (long gcContent in observedReadCountsByGC) { sumObserved += gcContent; } foreach (long gcContent in expectedReadCountsByGC) { sumExpected += gcContent; } for (int binIndex = 0; binIndex < numberOfGCbins; binIndex++) { if (expectedReadCountsByGC[binIndex] == 0) { expectedReadCountsByGC[binIndex] = 1; } if (observedReadCountsByGC[binIndex] == 0) { observedReadCountsByGC[binIndex] = 1; } observedVsExpectedGC[binIndex] = ((float)observedReadCountsByGC[binIndex] / (float)expectedReadCountsByGC[binIndex]) * ((float)sumExpected / (float)sumObserved); } if (debugGC) { using (GzipWriter writer = new GzipWriter(outFile + ".gcstat")) { for (int binIndex = 0; binIndex < numberOfGCbins; binIndex++) { writer.WriteLine(string.Format("{0}\t{1}\t{2}", expectedReadCountsByGC[binIndex], observedReadCountsByGC[binIndex], observedVsExpectedGC[binIndex])); } } } return(observedVsExpectedGC); }
/// <summary> /// Intersect bins with the targeted regions defined in callset.Manifest. /// Assumes that the targeted regions don't intersect, the bins are sorted by genomic location and the bins don't intersect. /// </summary> /// <param name="callset"></param> /// <param name="partitionedPath">Output of CanvasPartition. Bins are assumed to be sorted</param> /// <returns></returns> private IFileLocation IntersectBinsWithTargetedRegions(CanvasCallset callset, IFileLocation partitionedPath) { if (!partitionedPath.Exists) { return(partitionedPath); } var rawPartitionedPath = partitionedPath.AppendName(".raw"); if (rawPartitionedPath.Exists) { rawPartitionedPath.Delete(); } partitionedPath.MoveTo(rawPartitionedPath); //callset.Manifest Dictionary <string, List <NexteraManifest.ManifestRegion> > manifestRegionsByChrom = callset.Manifest.GetManifestRegionsByChromosome(); // CanvasPartition output file is in the BED format // start: 0-based, inclusive // end: 0-based, exclusive // Manifest // start: 1-based, inclusive // end: 1-based, inclusive using (GzipReader reader = new GzipReader(rawPartitionedPath.FullName)) using (GzipWriter writer = new GzipWriter(partitionedPath.FullName)) { string currentChrom = null; int manifestRegionIdx = 0; string line; string[] toks; while ((line = reader.ReadLine()) != null) { toks = line.Split('\t'); string chrom = toks[0]; int start = int.Parse(toks[1]) + 1; // 1-based, inclusive int end = int.Parse(toks[2]); // 1-based, inclusive if (chrom != currentChrom) { currentChrom = chrom; manifestRegionIdx = 0; } if (!manifestRegionsByChrom.ContainsKey(currentChrom)) { continue; } while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count && manifestRegionsByChrom[currentChrom][manifestRegionIdx].End < start) // |- manifest region -| |- bin -| { manifestRegionIdx++; } if (manifestRegionIdx >= manifestRegionsByChrom[currentChrom].Count || // |- last manifest region -| |- bin -| end < manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) // |- bin -| |- manifest region -| { continue; // skip bin } // |- bin -| // |- manifest region -| while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count && end >= manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) { // calculate intersection int intersectionStart = Math.Max(start, manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start); // 1-based, inclusive int intersectionEnd = Math.Min(end, manifestRegionsByChrom[currentChrom][manifestRegionIdx].End); // 1-based, inclusive // start/end in BED format toks[1] = String.Format("{0}", intersectionStart - 1); // 0-based, inclusive toks[2] = String.Format("{0}", intersectionEnd); // 0-based, exclusive // write intersected bin writer.WriteLine(String.Join("\t", toks)); manifestRegionIdx++; } } } return(partitionedPath); }
public void Gzip_Read() { byte[] buffer = new byte[8192]; // 8KiB data buffer using (MemoryStream compressed = new MemoryStream()) { // Start with a compressed MemoryStream created from the sample data using (GzipWriter compressor = new GzipWriter(compressed, CompressionLevel.Optimal, true)) { try { compressor.Read(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(NotSupportedException)); } compressor.Write(s_sampledata, 0, s_sampledata.Length); compressor.Flush(); } // Check the constructor for ArgumentNullException while we're here try { using (GzipReader decompressor = new GzipReader(null, false)) { }; Assert.Fail("Constructor should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } // Create a decompressor to test some of the error cases using (GzipReader decompressor = new GzipReader(compressed, true)) { // Send in some bum arguments to Read() to check they are caught try { decompressor.Read(null, 0, 0); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { decompressor.Read(buffer, -1, 0); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } try { decompressor.Read(buffer, 0, -1); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } try { decompressor.Read(buffer, 0, buffer.Length + 1024); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentException)); } // Attempting to read from the end of the compressed stream should throw an InvalidDataException try { decompressor.Read(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(InvalidDataException)); } // Attempting to read from the middle of the compressed stream should throw a GzipException compressed.Position = compressed.Position / 2; try { decompressor.Read(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(GzipException)); } // The decompression stream is trashed at this point since the input buffer was filled // with data from the middle. Thought about a special case handler for that, but it's // a fringe case. Verify that the stream is indeed trashed ... compressed.Position = 0; try { decompressor.Read(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(GzipException)); } } // Create a new decompressor against the same stream and make sure it doesn't throw using (GzipReader decompressor = new GzipReader(compressed, true)) { // Reading zero bytes should not throw an exception decompressor.Read(buffer, 0, 0); while (decompressor.Read(buffer, 0, 8192) != 0) { } } } }
public void Gzip_Encoder() { // The GzipEncoder is just a wrapper around GzipWriter that provides // more complete control over the compression/encoder parameters GzipEncoder encoder = new GzipEncoder(); // Check the default values Assert.AreEqual(65536, encoder.BufferSize); Assert.AreEqual(GzipCompressionLevel.Default, encoder.CompressionLevel); // Set some bad values and ensure they are caught by the encoder property setters try { encoder.BufferSize = -1; Assert.Fail("Property should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } try { encoder.CompressionLevel = new GzipCompressionLevel(-2); Assert.Fail("Property should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } try { encoder.CompressionLevel = new GzipCompressionLevel(10); Assert.Fail("Property should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentOutOfRangeException)); } // Set some good values encoder.BufferSize = 8192; Assert.AreEqual(8192, encoder.BufferSize); encoder.CompressionLevel = GzipCompressionLevel.Fastest; Assert.AreEqual(GzipCompressionLevel.Fastest, encoder.CompressionLevel); encoder.CompressionStrategy = GzipCompressionStrategy.HuffmanOnly; Assert.AreEqual(GzipCompressionStrategy.HuffmanOnly, encoder.CompressionStrategy); encoder.CompressionStrategy = GzipCompressionStrategy.Default; // put this back encoder.MemoryUsage = GzipMemoryUsageLevel.Optimal; Assert.AreEqual(GzipMemoryUsageLevel.Optimal, encoder.MemoryUsage); encoder.MemoryUsage = GzipMemoryUsageLevel.Default; // put this back // Check all of the Encoder methods work and encode as expected byte[] expected, actual; using (MemoryStream ms = new MemoryStream()) { using (var writer = new GzipWriter(ms, CompressionLevel.Fastest, true)) writer.Write(s_sampledata, 0, s_sampledata.Length); expected = ms.ToArray(); } // Check parameter validations try { actual = encoder.Encode((byte[])null); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { actual = encoder.Encode((Stream)null); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { actual = encoder.Encode(null, 0, 0); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { encoder.Encode(s_sampledata, null); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { encoder.Encode((byte[])null, new MemoryStream()); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { encoder.Encode((byte[])null, 0, 0, new MemoryStream()); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { encoder.Encode(s_sampledata, 0, s_sampledata.Length, (Stream)null); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { encoder.Encode((Stream)null, new MemoryStream()); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } try { encoder.Encode(new MemoryStream(), (Stream)null); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ArgumentNullException)); } // Check actual encoding operations actual = encoder.Encode(s_sampledata); Assert.IsTrue(Enumerable.SequenceEqual(expected, actual)); actual = encoder.Encode(new MemoryStream(s_sampledata)); Assert.IsTrue(Enumerable.SequenceEqual(expected, actual)); actual = encoder.Encode(s_sampledata, 0, s_sampledata.Length); Assert.IsTrue(Enumerable.SequenceEqual(expected, actual)); using (MemoryStream dest = new MemoryStream()) { encoder.Encode(s_sampledata, dest); Assert.IsTrue(Enumerable.SequenceEqual(expected, dest.ToArray())); } using (MemoryStream dest = new MemoryStream()) { encoder.Encode(new MemoryStream(s_sampledata), dest); Assert.IsTrue(Enumerable.SequenceEqual(expected, dest.ToArray())); } using (MemoryStream dest = new MemoryStream()) { encoder.Encode(s_sampledata, 0, s_sampledata.Length, dest); Assert.IsTrue(Enumerable.SequenceEqual(expected, dest.ToArray())); } }
public static GzipWriter /*!*/ Output(ConversionStorage <MutableString> /*!*/ tosConversion, GzipWriter /*!*/ self, object value) { Write(tosConversion, self, value); return(self); }
public static long Pos(GzipWriter /*!*/ self) { return(self.GetWrapper().Position); }
public void Gzip_WriterDispose() { byte[] buffer = new byte[8192]; // 8KiB data buffer // Create a dummy stream and immediately dispose of it GzipWriter stream = new GzipWriter(new MemoryStream(s_sampledata), CompressionLevel.Optimal); stream.Dispose(); // Test double dispose stream.Dispose(); // All properties and methods should throw an ObjectDisposedException try { var bs = stream.BaseStream; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { var b = stream.CanRead; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { var b = stream.CanSeek; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { var b = stream.CanWrite; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.Flush(); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { var l = stream.Length; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { var l = stream.Position; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.Position = 12345L; Assert.Fail("Property access should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.Read(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.Seek(0, SeekOrigin.Current); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.SetLength(12345L); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.Write(buffer); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } try { stream.Write(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } // Ensure that an underlying stream is disposed of properly if leaveopen is not set MemoryStream ms = new MemoryStream(s_sampledata); using (GzipWriter compressor = new GzipWriter(ms, CompressionLevel.Fastest)) { } try { ms.Write(buffer, 0, 8192); Assert.Fail("Method call should have thrown an exception"); } catch (Exception ex) { Assert.IsInstanceOfType(ex, typeof(ObjectDisposedException)); } // Ensure that an underlying stream is not disposed of if leaveopen is set ms = new MemoryStream(s_sampledata); using (GzipWriter compressor = new GzipWriter(ms, CompressionLevel.Fastest, true)) { } ms.Write(buffer, 0, 8192); ms.Dispose(); }
/// <summary> /// Invoke CanvasSNV. Return null if this fails and we need to abort CNV calling for this sample. /// </summary> protected void InvokeCanvasSnv(CanvasCallset callset) { List <UnitOfWork> jobList = new List <UnitOfWork>(); List <string> outputPaths = new List <string>(); GenomeMetadata genomeMetadata = callset.GenomeMetadata; string tumorBamPath = callset.Bam.BamFile.FullName; string normalVcfPath = callset.NormalVcfPath.FullName; foreach (GenomeMetadata.SequenceMetadata chromosome in genomeMetadata.Sequences) { // Only invoke for autosomes + allosomes; // don't invoke it for mitochondrial chromosome or extra contigs or decoys if (chromosome.Type != GenomeMetadata.SequenceType.Allosome && !chromosome.IsAutosome()) { continue; } UnitOfWork job = new UnitOfWork(); job.ExecutablePath = Path.Combine(_canvasFolder, "CanvasSNV.exe"); if (CrossPlatform.IsThisMono()) { job.CommandLine = job.ExecutablePath; job.ExecutablePath = Utilities.GetMonoPath(); } string outputPath = Path.Combine(callset.TempFolder, string.Format("{0}-{1}.SNV.txt.gz", chromosome.Name, callset.Id)); outputPaths.Add(outputPath); job.CommandLine += $" {chromosome.Name} {normalVcfPath} {tumorBamPath} {outputPath}"; if (_customParameters.ContainsKey("CanvasSNV")) { job.CommandLine = Utilities.MergeCommandLineOptions(job.CommandLine, _customParameters["CanvasSNV"], true); } job.LoggingFolder = _workManager.LoggingFolder.FullName; job.LoggingStub = string.Format("CanvasSNV-{0}-{1}", callset.Id, chromosome.Name); jobList.Add(job); } Console.WriteLine("Invoking {0} processor jobs...", jobList.Count); // Invoke CanvasSNV jobs: Console.WriteLine(">>>CanvasSNV start..."); _workManager.DoWorkParallelThreads(jobList); Console.WriteLine(">>>CanvasSNV complete!"); // Concatenate CanvasSNV results: using (GzipWriter writer = new GzipWriter(callset.VfSummaryPath)) { bool headerWritten = false; foreach (string outputPath in outputPaths) { if (!File.Exists(outputPath)) { Console.WriteLine("Error: Expected output file not found at {0}", outputPath); continue; } using (GzipReader reader = new GzipReader(outputPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } if (fileLine.Length > 0 && fileLine[0] == '#') { if (headerWritten) { continue; } headerWritten = true; } writer.WriteLine(fileLine); } } } } }