public void Add_one_chrom() { var position1 = new Mock <IPosition>(); position1.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1); position1.SetupGet(x => x.Start).Returns(100); position1.SetupGet(x => x.RefAllele).Returns("A"); position1.SetupGet(x => x.AltAlleles).Returns(new [] { "C" }); var memStream = new MemoryStream(); using (var indexCreator = new OnTheFlyIndexCreator(memStream)) { indexCreator.BeginSection("positions", 100); indexCreator.Add(position1.Object, 2588); indexCreator.EndSection("positions", 2699); } var readStream = new MemoryStream(memStream.ToArray()); readStream.Seek(0, SeekOrigin.Begin); var index = new JasixIndex(readStream); Assert.Equal(100, index.GetSectionBegin("positions")); Assert.Equal(2588, index.GetFirstVariantPosition("chr1", 100, 102)); }
public QueryProcessor(StreamReader jsonReader, Stream indexStream, StreamWriter writer = null) { _jsonReader = jsonReader; _writer = writer ?? new StreamWriter(Console.OpenStandardOutput()); _indexStream = indexStream; _jasixIndex = new JasixIndex(_indexStream); }
private void IndexHeader(JasixIndex index) { string searchTag = $"\"{JasixCommons.PositionsSectionTag}\":["; string headerTag = $"{{\"{JasixCommons.HeaderSectionTag}\":"; string line; long previousPosition = _reader.Position; while ((line = _reader.ReadLine()) != null) { if (line.StartsWith(headerTag)) { index.BeginSection(JasixCommons.HeaderSectionTag, previousPosition); Console.WriteLine($"section:{JasixCommons.HeaderSectionTag} starts at {previousPosition}"); } if (line.EndsWith(searchTag)) { { index.EndSection(JasixCommons.HeaderSectionTag, previousPosition); Console.WriteLine($"section:{JasixCommons.HeaderSectionTag} ends at {previousPosition}"); } break; } previousPosition = _reader.Position; } }
private string IndexPositions(JasixIndex index) { // we need the location before accessing the line long linePosition = _reader.Position; index.BeginSection(JasixCommons.PositionsSectionTag, linePosition); Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} starts at {linePosition}"); var previousChr = ""; var previousPos = 0; string line; while ((line = _reader.ReadLine()) != null) { if (line.OptimizedStartsWith(']')) { index.EndSection(JasixCommons.PositionsSectionTag, linePosition); Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} ends at {linePosition}"); break; } line = line.TrimEnd(','); (string chr, int position, int end) = GetChromPosition(line); CheckSorting(chr, position, previousChr, previousPos); index.Add(chr, position, end, linePosition); linePosition = _reader.Position; previousChr = chr; previousPos = position; } return(line); }
public void IndexCreation_multiChromosome() { var jsonStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("cosmicv72.indels.json.gz")), CompressionMode.Decompress); var writeStream = new MemoryStream(); using (var indexCreator = new IndexCreator(jsonStream, writeStream)) { indexCreator.CreateIndex(); } JasixIndex readBackIndex; var readStream = new MemoryStream(writeStream.ToArray()); readStream.Seek(0, SeekOrigin.Begin); using (readStream) { readBackIndex = new JasixIndex(readStream); } Assert.Equal(2268, readBackIndex.GetFirstVariantPosition("chr1", 9775924, 9775924)); Assert.Equal(14035925971, readBackIndex.GetFirstVariantPosition("chr2", 16081096, 16081096)); Assert.Equal(433156622693, readBackIndex.GetFirstVariantPosition("chr20", 36026164, 36026164)); Assert.Equal(439602269527, readBackIndex.GetFirstVariantPosition("chrX", 66765044, 66765044)); }
private static (Stream jsonStream, Stream jasixStream) GetNirvanaJsonStreamWithoutGenes(int chromNumber) { var jsonStream = new MemoryStream(); var jasixStream = new MemoryStream(); using (var bgZipStream = new BlockGZipStream(jsonStream, CompressionMode.Compress, true)) using (var writer = new BgzipTextWriter(bgZipStream)) using (var jasixIndex = new JasixIndex()) { writer.Write(NirvanaHeader); writer.Flush(); jasixIndex.BeginSection(JasixCommons.PositionsSectionTag, writer.Position); for (int i = 100 * chromNumber; i < 123 * chromNumber; i++) { writer.WriteLine($"{{\"chromosome\":\"chr{chromNumber}\",\"position\":{i}}},"); if (i % 50 == 0) { writer.Flush(); //creating another block } } writer.WriteLine($"{{\"chromosome\":\"chr{chromNumber}\",\"position\":{100 *chromNumber +25}}}"); writer.Flush(); jasixIndex.EndSection(JasixCommons.PositionsSectionTag, writer.Position); writer.Write(NirvanaFooter); jasixIndex.Write(jasixStream); } jsonStream.Position = 0; jasixStream.Position = 0; return(jsonStream, jasixStream); }
public void CreateIndex() { var searchTag = $"\"{SectionToIndex}\":["; const string headerTag = "{\"header\":"; var index = new JasixIndex(); string line; //skipping lines before the sectionToIndex arrives while ((line = _reader.ReadLine()) != null) { if (line.StartsWith(headerTag)) { index.HeaderLine = ExtractHeader(line); } if (line.EndsWith(searchTag)) { break; } } // we need the location before accessing the line var fileLoc = _reader.Position; string previousChr = ""; int previousPos = 0; while ((line = _reader.ReadLine()) != null) { if (line.StartsWith("]")) { break; } line = line.TrimEnd(','); var chrPos = GetChromPosition(line); CheckFileSorted(chrPos.chr, chrPos.position, previousChr, previousPos); index.Add(chrPos.chr, chrPos.position, chrPos.end, fileLoc); fileLoc = _reader.Position; previousChr = chrPos.chr; previousPos = chrPos.position; } index.Write(_writeStream); Console.WriteLine(); var peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage(); var wallTimeSpan = _benchmark.GetElapsedTime(); Console.WriteLine(); if (peakMemoryUsageBytes > 0) { Console.WriteLine("Peak memory usage: {0}", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes)); } Console.WriteLine("Time: {0}", Benchmark.ToHumanReadable(wallTimeSpan)); }
private static int WritePositionBlocks(Stream jsonStream, Stream jasixStream, BinaryWriter writer) { var blockCount = 0; using (var reader = new BgzBlockReader(jsonStream, true)) using (var jasixIndex = new JasixIndex(jasixStream)) { int count; var isFirstBlock = true; var positionSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.PositionsSectionTag); var geneSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.GenesSectionTag); var geneSectionEnd = jasixIndex.GetSectionEnd(JasixCommons.GenesSectionTag); do { count = reader.ReadCompressedBlock(BgzBlock); if (isFirstBlock) { if (_isFirstHeaderBlock) { writer.Write(BgzBlock, 0, count); _isFirstHeaderBlock = false; } isFirstBlock = false; } else { if (count <= 0) { continue; } // the 16 bit left shift is due to the format of bgzip file if (reader.Position << 16 > geneSectionBegin && reader.Position << 16 <= geneSectionEnd) { //setting back the stream to the gene section begin jsonStream.Position = geneSectionBegin >> 16; return(blockCount); } if (reader.Position << 16 <= positionSectionBegin || reader.Position << 16 >= geneSectionBegin) { continue; } blockCount++; writer.Write(BgzBlock, 0, count); } } while (count > 0); } return(blockCount); }
private int WritePositionBlocks(Stream jsonStream, Stream jasixStream, BinaryWriter writer) { var blockCount = 0; using (var reader = new BgzBlockReader(jsonStream, true)) using (var jasixIndex = new JasixIndex(jasixStream)) { var positionSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.PositionsSectionTag); if (positionSectionBegin == -1) { return(0); //no positions found. and therefore, cannot have genes either. } var positionSectionEnd = jasixIndex.GetSectionEnd(JasixCommons.PositionsSectionTag); var geneSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.GenesSectionTag); var isFirstBlock = true; for (int count = reader.ReadCompressedBlock(BgzBlock); count > 0; count = reader.ReadCompressedBlock(BgzBlock)) { if (isFirstBlock) { if (_isFirstHeaderBlock) { writer.Write(BgzBlock, 0, count); _isFirstHeaderBlock = false; } isFirstBlock = false; continue; } //we need the following check because there is one block between the positions and the genes block that we want to skip // the block that contains: ],"genes":[... // the 16 bit left shift is due to the representation of the position in bgzip file if (reader.Position >= positionSectionEnd >> 16) { //we have read the last position block blockCount++; writer.Write(BgzBlock, 0, count); if (geneSectionBegin != -1) { jsonStream.Position = geneSectionBegin >> 16; } return(blockCount); } blockCount++; writer.Write(BgzBlock, 0, count); } } return(blockCount); }
public void Begin_end_section_and_readback() { var index = new JasixIndex(); const string section = "section1"; index.BeginSection(section, 0); Assert.Throws <UserErrorException>(() => index.BeginSection(section, 1)); index.EndSection(section, 100); Assert.Throws <UserErrorException>(() => index.EndSection(section, 101)); Assert.Equal(0, index.GetSectionBegin(section)); Assert.Equal(100, index.GetSectionEnd(section)); }
public void GetFirstVariantPosition_multi_chrom_index() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100000); index.Add("chr1", 105, 109, 100050); index.Add("chr1", 160, 166, 100100); index.Add("chr2", 100, 100, 100150); index.Add("chr2", 102, 105, 100200); index.Flush(); var chrPos = Utilities.ParseQuery("chr1"); Assert.Equal(100000, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3)); chrPos = Utilities.ParseQuery("chr2"); Assert.Equal(100150, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3)); }
public void Write_and_read_back() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100000, "1"); index.Add("chr1", 105, 109, 100050, "1"); index.Add("chr1", 150, 1000, 100075, "1");//large variant index.Add("chr1", 160, 166, 100100, "1"); index.Add("chr2", 100, 100, 100150, "2"); index.Add("chr2", 102, 105, 100200, "2"); var writeStream = new MemoryStream(); using (writeStream) { index.Write(writeStream); } var readStream = new MemoryStream(writeStream.ToArray()); readStream.Seek(0, SeekOrigin.Begin); JasixIndex readBackIndex; using (readStream) { readBackIndex = new JasixIndex(readStream); } Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160)); Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105)); //checking large variants Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149)); var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201); Assert.NotNull(largeVariants); Assert.Single(largeVariants); Assert.Equal(100075, largeVariants[0]); }
public void FindLargeVaritants_method_does_not_return_small_variants() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100_000); index.Add("chr1", 105, 109, 100_050); index.Add("chr1", 160, 166, 100_100); index.Add("chr1", 200, 1000, 100_075);//large variant index.Add("chr2", 100, 100, 100_150); index.Add("chr2", 102, 105, 100_200); index.Flush(); //checking large variants Assert.Null(index.LargeVariantPositions("chr1", 100, 199)); var largeVariants = index.LargeVariantPositions("chr1", 100, 201); Assert.NotNull(largeVariants); Assert.Single(largeVariants); Assert.Equal(100075, largeVariants[0]); }
public void IndexWriteRead() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100000); index.Add("chr1", 105, 109, 100050); index.Add("chr1", 150, 1000, 100075);//large variant index.Add("chr1", 160, 166, 100100); index.Add("chr2", 100, 100, 100150); index.Add("chr2", 102, 105, 100200); var tempFile = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); using (var writer = FileUtilities.GetCreateStream(tempFile)) { index.Write(writer); } JasixIndex readBackIndex; using (var stream = FileUtilities.GetReadStream(tempFile)) { readBackIndex = new JasixIndex(stream); } File.Delete(tempFile); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160)); Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105)); //checking large variants Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149)); var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201); Assert.NotNull(largeVariants); Assert.Single(largeVariants); Assert.Equal(100075, largeVariants[0]); }
public void Add_fill_node_and_start_another() { var index = new JasixIndex(); //creating two nodes each containing 50 entries for (var i = 0; i < 2 * JasixCommons.PreferredNodeCount; i++) { index.Add("chr1", 100 + i, 101 + i, 100_000 + i); } index.Add("chr1", 160 + 2 * JasixCommons.PreferredNodeCount, 166 + 2 * JasixCommons.PreferredNodeCount, 200_100); index.Add("chr2", 100, 100, 200_150); index.Add("chr2", 102, 105, 200_200); index.Flush(); Assert.Equal(100_000, index.GetFirstVariantPosition("chr1", 100, 102)); Assert.Equal(100_000 + JasixCommons.PreferredNodeCount, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 55, 2 * JasixCommons.PreferredNodeCount + 55)); Assert.Equal(-1, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 120, 2 * JasixCommons.PreferredNodeCount + 124)); Assert.Equal(200_100, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 158, 2 * JasixCommons.PreferredNodeCount + 160)); Assert.Equal(200_150, index.GetFirstVariantPosition("chr2", 103, 105)); }
public void TestIndexCreation() { var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("cosmicv72.indels.json.gz")), CompressionMode.Decompress); var tempFile = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); using (var indexCreator = new IndexCreator(readStream, FileUtilities.GetCreateStream(tempFile))) { indexCreator.CreateIndex(); } JasixIndex readBackIndex; using (var stream = FileUtilities.GetReadStream(tempFile)) { readBackIndex = new JasixIndex(stream); } Assert.Equal(1591, readBackIndex.GetFirstVariantPosition("chr1", 9775924, 9775924)); Assert.Equal(11500956299, readBackIndex.GetFirstVariantPosition("chr2", 16081096, 16081096)); Assert.Equal(372100991296, readBackIndex.GetFirstVariantPosition("chr20", 36026164, 36026164)); Assert.Equal(377682846863, readBackIndex.GetFirstVariantPosition("chrX", 66765044, 66765044)); File.Delete(tempFile); }
public void Combination_of_large_and_small_variants() { var index = new JasixIndex(); //query range 10,000- 10,020 index.Add("chr1", 8_000, 9_900, 90_000); //SV not overlapping the query index.Add("chr1", 9_000, 10_005, 90_100); // partially overlapping index.Add("chr1", 9_500, 10_050, 90_200); //completely overlapping index.Add("chr1", 10_000, 10_001, 100_000); index.Add("chr1", 10_004, 10_006, 100_100); index.Add("chr1", 10_009, 10_550, 100_200);//SV starting from the middle of the range index.Add("chr1", 10_008, 10_010, 100_300); index.Add("chr1", 10_011, 10_020, 100_400); index.Add("chr1", 10_039, 10_550, 100_200);//SV past the range index.Flush(); var firstSmallVarLocation = index.GetFirstVariantPosition("chr1", 10_000, 10_020); var largeVariantLocations = index.LargeVariantPositions("chr1", 10_000, 10_020); Assert.Equal(90_000, firstSmallVarLocation); Assert.True(largeVariantLocations.SequenceEqual(new List <long> { 90_100, 90_200, 100_200 }));
private void IndexGenes(string lastLine, JasixIndex index) { if (lastLine == null) { return; } do { long linePosition = _reader.Position; if (lastLine.EndsWith($",\"{JasixCommons.GenesSectionTag}\":[")) { index.BeginSection(JasixCommons.GenesSectionTag, _reader.Position); Console.WriteLine($"section:{JasixCommons.GenesSectionTag} starts at {_reader.Position}"); } if (lastLine.EndsWith("]}")) { index.EndSection(JasixCommons.GenesSectionTag, linePosition); Console.WriteLine($"section:{JasixCommons.GenesSectionTag} ends at {linePosition}"); break; } } while ((lastLine = _reader.ReadLine()) != null); }
public void CreateIndex() { var index = new JasixIndex(); IndexHeader(index); string lastLine = IndexPositions(index); IndexGenes(lastLine, index); index.Write(_writeStream); Console.WriteLine(); long peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage(); var wallTimeSpan = _benchmark.GetElapsedTime(); Console.WriteLine(); if (peakMemoryUsageBytes > 0) { Console.WriteLine("Peak memory usage: {0}", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes)); } Console.WriteLine("Time: {0}", Benchmark.ToHumanReadable(wallTimeSpan)); }
public OnTheFlyIndexCreator(Stream indexStream) { _indexStream = indexStream; _jasixIndex = new JasixIndex(); }