public void Add_one_chrom()
        {
            var position1 = new Mock <IPosition>();

            position1.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);
            position1.SetupGet(x => x.Start).Returns(100);
            position1.SetupGet(x => x.RefAllele).Returns("A");
            position1.SetupGet(x => x.AltAlleles).Returns(new [] { "C" });

            var memStream = new MemoryStream();

            using (var indexCreator = new OnTheFlyIndexCreator(memStream))
            {
                indexCreator.BeginSection("positions", 100);
                indexCreator.Add(position1.Object, 2588);
                indexCreator.EndSection("positions", 2699);
            }

            var readStream = new MemoryStream(memStream.ToArray());

            readStream.Seek(0, SeekOrigin.Begin);
            var index = new JasixIndex(readStream);

            Assert.Equal(100, index.GetSectionBegin("positions"));
            Assert.Equal(2588, index.GetFirstVariantPosition("chr1", 100, 102));
        }
 public QueryProcessor(StreamReader jsonReader, Stream indexStream, StreamWriter writer = null)
 {
     _jsonReader  = jsonReader;
     _writer      = writer ?? new StreamWriter(Console.OpenStandardOutput());
     _indexStream = indexStream;
     _jasixIndex  = new JasixIndex(_indexStream);
 }
Beispiel #3
0
        private void IndexHeader(JasixIndex index)
        {
            string searchTag = $"\"{JasixCommons.PositionsSectionTag}\":[";
            string headerTag = $"{{\"{JasixCommons.HeaderSectionTag}\":";
            string line;

            long previousPosition = _reader.Position;

            while ((line = _reader.ReadLine()) != null)
            {
                if (line.StartsWith(headerTag))
                {
                    index.BeginSection(JasixCommons.HeaderSectionTag, previousPosition);
                    Console.WriteLine($"section:{JasixCommons.HeaderSectionTag} starts at {previousPosition}");
                }

                if (line.EndsWith(searchTag))
                {
                    {
                        index.EndSection(JasixCommons.HeaderSectionTag, previousPosition);
                        Console.WriteLine($"section:{JasixCommons.HeaderSectionTag} ends at {previousPosition}");
                    }
                    break;
                }

                previousPosition = _reader.Position;
            }
        }
Beispiel #4
0
        private string IndexPositions(JasixIndex index)
        {
            // we need the location before accessing the line
            long linePosition = _reader.Position;

            index.BeginSection(JasixCommons.PositionsSectionTag, linePosition);
            Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} starts at {linePosition}");

            var    previousChr = "";
            var    previousPos = 0;
            string line;

            while ((line = _reader.ReadLine()) != null)
            {
                if (line.OptimizedStartsWith(']'))
                {
                    index.EndSection(JasixCommons.PositionsSectionTag, linePosition);
                    Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} ends at {linePosition}");
                    break;
                }

                line = line.TrimEnd(',');
                (string chr, int position, int end) = GetChromPosition(line);

                CheckSorting(chr, position, previousChr, previousPos);

                index.Add(chr, position, end, linePosition);
                linePosition = _reader.Position;
                previousChr  = chr;
                previousPos  = position;
            }

            return(line);
        }
Beispiel #5
0
        public void IndexCreation_multiChromosome()
        {
            var jsonStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("cosmicv72.indels.json.gz")), CompressionMode.Decompress);

            var writeStream = new MemoryStream();

            using (var indexCreator = new IndexCreator(jsonStream, writeStream))
            {
                indexCreator.CreateIndex();
            }

            JasixIndex readBackIndex;
            var        readStream = new MemoryStream(writeStream.ToArray());

            readStream.Seek(0, SeekOrigin.Begin);

            using (readStream)
            {
                readBackIndex = new JasixIndex(readStream);
            }

            Assert.Equal(2268, readBackIndex.GetFirstVariantPosition("chr1", 9775924, 9775924));
            Assert.Equal(14035925971, readBackIndex.GetFirstVariantPosition("chr2", 16081096, 16081096));
            Assert.Equal(433156622693, readBackIndex.GetFirstVariantPosition("chr20", 36026164, 36026164));
            Assert.Equal(439602269527, readBackIndex.GetFirstVariantPosition("chrX", 66765044, 66765044));
        }
Beispiel #6
0
        private static (Stream jsonStream, Stream jasixStream) GetNirvanaJsonStreamWithoutGenes(int chromNumber)
        {
            var jsonStream  = new MemoryStream();
            var jasixStream = new MemoryStream();

            using (var bgZipStream = new BlockGZipStream(jsonStream, CompressionMode.Compress, true))
                using (var writer = new BgzipTextWriter(bgZipStream))
                    using (var jasixIndex = new JasixIndex())
                    {
                        writer.Write(NirvanaHeader);
                        writer.Flush();
                        jasixIndex.BeginSection(JasixCommons.PositionsSectionTag, writer.Position);
                        for (int i = 100 * chromNumber; i < 123 * chromNumber; i++)
                        {
                            writer.WriteLine($"{{\"chromosome\":\"chr{chromNumber}\",\"position\":{i}}},");
                            if (i % 50 == 0)
                            {
                                writer.Flush();  //creating another block
                            }
                        }
                        writer.WriteLine($"{{\"chromosome\":\"chr{chromNumber}\",\"position\":{100 *chromNumber +25}}}");
                        writer.Flush();
                        jasixIndex.EndSection(JasixCommons.PositionsSectionTag, writer.Position);

                        writer.Write(NirvanaFooter);
                        jasixIndex.Write(jasixStream);
                    }

            jsonStream.Position  = 0;
            jasixStream.Position = 0;
            return(jsonStream, jasixStream);
        }
Beispiel #7
0
        public void CreateIndex()
        {
            var          searchTag = $"\"{SectionToIndex}\":[";
            const string headerTag = "{\"header\":";
            var          index     = new JasixIndex();
            string       line;

            //skipping lines before the sectionToIndex arrives
            while ((line = _reader.ReadLine()) != null)
            {
                if (line.StartsWith(headerTag))
                {
                    index.HeaderLine = ExtractHeader(line);
                }
                if (line.EndsWith(searchTag))
                {
                    break;
                }
            }

            // we need the location before accessing the line
            var fileLoc = _reader.Position;

            string previousChr = "";
            int    previousPos = 0;

            while ((line = _reader.ReadLine()) != null)
            {
                if (line.StartsWith("]"))
                {
                    break;
                }
                line = line.TrimEnd(',');
                var chrPos = GetChromPosition(line);

                CheckFileSorted(chrPos.chr, chrPos.position, previousChr, previousPos);

                index.Add(chrPos.chr, chrPos.position, chrPos.end, fileLoc);
                fileLoc     = _reader.Position;
                previousChr = chrPos.chr;
                previousPos = chrPos.position;
            }

            index.Write(_writeStream);

            Console.WriteLine();

            var peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage();
            var wallTimeSpan         = _benchmark.GetElapsedTime();

            Console.WriteLine();
            if (peakMemoryUsageBytes > 0)
            {
                Console.WriteLine("Peak memory usage: {0}", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes));
            }
            Console.WriteLine("Time: {0}", Benchmark.ToHumanReadable(wallTimeSpan));
        }
Beispiel #8
0
        private static int WritePositionBlocks(Stream jsonStream, Stream jasixStream,
                                               BinaryWriter writer)
        {
            var blockCount = 0;

            using (var reader = new BgzBlockReader(jsonStream, true))
                using (var jasixIndex = new JasixIndex(jasixStream))
                {
                    int count;
                    var isFirstBlock         = true;
                    var positionSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.PositionsSectionTag);
                    var geneSectionBegin     = jasixIndex.GetSectionBegin(JasixCommons.GenesSectionTag);
                    var geneSectionEnd       = jasixIndex.GetSectionEnd(JasixCommons.GenesSectionTag);
                    do
                    {
                        count = reader.ReadCompressedBlock(BgzBlock);
                        if (isFirstBlock)
                        {
                            if (_isFirstHeaderBlock)
                            {
                                writer.Write(BgzBlock, 0, count);
                                _isFirstHeaderBlock = false;
                            }

                            isFirstBlock = false;
                        }
                        else
                        {
                            if (count <= 0)
                            {
                                continue;
                            }
                            // the 16 bit left shift is due to the format of bgzip file
                            if (reader.Position << 16 > geneSectionBegin && reader.Position << 16 <= geneSectionEnd)
                            {
                                //setting back the stream to the gene section begin
                                jsonStream.Position = geneSectionBegin >> 16;
                                return(blockCount);
                            }


                            if (reader.Position << 16 <= positionSectionBegin ||
                                reader.Position << 16 >= geneSectionBegin)
                            {
                                continue;
                            }
                            blockCount++;
                            writer.Write(BgzBlock, 0, count);
                        }
                    } while (count > 0);
                }

            return(blockCount);
        }
Beispiel #9
0
        private int WritePositionBlocks(Stream jsonStream, Stream jasixStream,
                                        BinaryWriter writer)
        {
            var blockCount = 0;

            using (var reader = new BgzBlockReader(jsonStream, true))
                using (var jasixIndex = new JasixIndex(jasixStream))
                {
                    var positionSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.PositionsSectionTag);
                    if (positionSectionBegin == -1)
                    {
                        return(0);                       //no positions found. and therefore, cannot have genes either.
                    }
                    var positionSectionEnd = jasixIndex.GetSectionEnd(JasixCommons.PositionsSectionTag);
                    var geneSectionBegin   = jasixIndex.GetSectionBegin(JasixCommons.GenesSectionTag);

                    var isFirstBlock = true;

                    for (int count = reader.ReadCompressedBlock(BgzBlock); count > 0; count = reader.ReadCompressedBlock(BgzBlock))
                    {
                        if (isFirstBlock)
                        {
                            if (_isFirstHeaderBlock)
                            {
                                writer.Write(BgzBlock, 0, count);
                                _isFirstHeaderBlock = false;
                            }

                            isFirstBlock = false;
                            continue;
                        }
                        //we need the following check because there is one block between the positions and the genes block that we want to skip
                        // the block that contains: ],"genes":[...

                        // the 16 bit left shift is due to the representation of the position in bgzip file
                        if (reader.Position >= positionSectionEnd >> 16)
                        {
                            //we have read the last position block
                            blockCount++;
                            writer.Write(BgzBlock, 0, count);
                            if (geneSectionBegin != -1)
                            {
                                jsonStream.Position = geneSectionBegin >> 16;
                            }
                            return(blockCount);
                        }

                        blockCount++;
                        writer.Write(BgzBlock, 0, count);
                    }
                }

            return(blockCount);
        }
Beispiel #10
0
        public void Begin_end_section_and_readback()
        {
            var          index   = new JasixIndex();
            const string section = "section1";

            index.BeginSection(section, 0);
            Assert.Throws <UserErrorException>(() => index.BeginSection(section, 1));
            index.EndSection(section, 100);
            Assert.Throws <UserErrorException>(() => index.EndSection(section, 101));

            Assert.Equal(0, index.GetSectionBegin(section));
            Assert.Equal(100, index.GetSectionEnd(section));
        }
Beispiel #11
0
        public void GetFirstVariantPosition_multi_chrom_index()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100000);
            index.Add("chr1", 105, 109, 100050);
            index.Add("chr1", 160, 166, 100100);
            index.Add("chr2", 100, 100, 100150);
            index.Add("chr2", 102, 105, 100200);

            index.Flush();

            var chrPos = Utilities.ParseQuery("chr1");

            Assert.Equal(100000, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3));

            chrPos = Utilities.ParseQuery("chr2");
            Assert.Equal(100150, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3));
        }
Beispiel #12
0
        public void Write_and_read_back()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100000, "1");
            index.Add("chr1", 105, 109, 100050, "1");
            index.Add("chr1", 150, 1000, 100075, "1");//large variant
            index.Add("chr1", 160, 166, 100100, "1");
            index.Add("chr2", 100, 100, 100150, "2");
            index.Add("chr2", 102, 105, 100200, "2");

            var writeStream = new MemoryStream();

            using (writeStream)
            {
                index.Write(writeStream);
            }

            var readStream = new MemoryStream(writeStream.ToArray());

            readStream.Seek(0, SeekOrigin.Begin);

            JasixIndex readBackIndex;

            using (readStream)
            {
                readBackIndex = new JasixIndex(readStream);
            }

            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160));
            Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105));

            //checking large variants
            Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149));
            var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201);

            Assert.NotNull(largeVariants);
            Assert.Single(largeVariants);
            Assert.Equal(100075, largeVariants[0]);
        }
Beispiel #13
0
        public void FindLargeVaritants_method_does_not_return_small_variants()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100_000);
            index.Add("chr1", 105, 109, 100_050);
            index.Add("chr1", 160, 166, 100_100);
            index.Add("chr1", 200, 1000, 100_075);//large variant
            index.Add("chr2", 100, 100, 100_150);
            index.Add("chr2", 102, 105, 100_200);

            index.Flush();

            //checking large variants
            Assert.Null(index.LargeVariantPositions("chr1", 100, 199));
            var largeVariants = index.LargeVariantPositions("chr1", 100, 201);

            Assert.NotNull(largeVariants);
            Assert.Single(largeVariants);
            Assert.Equal(100075, largeVariants[0]);
        }
Beispiel #14
0
        public void IndexWriteRead()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100000);
            index.Add("chr1", 105, 109, 100050);
            index.Add("chr1", 150, 1000, 100075);//large variant
            index.Add("chr1", 160, 166, 100100);
            index.Add("chr2", 100, 100, 100150);
            index.Add("chr2", 102, 105, 100200);

            var tempFile = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var writer = FileUtilities.GetCreateStream(tempFile))
            {
                index.Write(writer);
            }

            JasixIndex readBackIndex;

            using (var stream = FileUtilities.GetReadStream(tempFile))
            {
                readBackIndex = new JasixIndex(stream);
            }
            File.Delete(tempFile);

            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160));
            Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105));

            //checking large variants
            Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149));
            var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201);

            Assert.NotNull(largeVariants);
            Assert.Single(largeVariants);
            Assert.Equal(100075, largeVariants[0]);
        }
Beispiel #15
0
        public void Add_fill_node_and_start_another()
        {
            var index = new JasixIndex();

            //creating two nodes each containing 50 entries
            for (var i = 0; i < 2 * JasixCommons.PreferredNodeCount; i++)
            {
                index.Add("chr1", 100 + i, 101 + i, 100_000 + i);
            }

            index.Add("chr1", 160 + 2 * JasixCommons.PreferredNodeCount, 166 + 2 * JasixCommons.PreferredNodeCount, 200_100);
            index.Add("chr2", 100, 100, 200_150);
            index.Add("chr2", 102, 105, 200_200);

            index.Flush();

            Assert.Equal(100_000, index.GetFirstVariantPosition("chr1", 100, 102));
            Assert.Equal(100_000 + JasixCommons.PreferredNodeCount, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 55, 2 * JasixCommons.PreferredNodeCount + 55));
            Assert.Equal(-1, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 120, 2 * JasixCommons.PreferredNodeCount + 124));
            Assert.Equal(200_100, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 158, 2 * JasixCommons.PreferredNodeCount + 160));
            Assert.Equal(200_150, index.GetFirstVariantPosition("chr2", 103, 105));
        }
Beispiel #16
0
        public void TestIndexCreation()
        {
            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath("cosmicv72.indels.json.gz")), CompressionMode.Decompress);
            var tempFile   = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var indexCreator = new IndexCreator(readStream, FileUtilities.GetCreateStream(tempFile)))
            {
                indexCreator.CreateIndex();
            }
            JasixIndex readBackIndex;

            using (var stream = FileUtilities.GetReadStream(tempFile))
            {
                readBackIndex = new JasixIndex(stream);
            }

            Assert.Equal(1591, readBackIndex.GetFirstVariantPosition("chr1", 9775924, 9775924));
            Assert.Equal(11500956299, readBackIndex.GetFirstVariantPosition("chr2", 16081096, 16081096));
            Assert.Equal(372100991296, readBackIndex.GetFirstVariantPosition("chr20", 36026164, 36026164));
            Assert.Equal(377682846863, readBackIndex.GetFirstVariantPosition("chrX", 66765044, 66765044));

            File.Delete(tempFile);
        }
        public void Combination_of_large_and_small_variants()
        {
            var index = new JasixIndex();

            //query range 10,000- 10,020
            index.Add("chr1", 8_000, 9_900, 90_000);  //SV not overlapping the query
            index.Add("chr1", 9_000, 10_005, 90_100); // partially overlapping
            index.Add("chr1", 9_500, 10_050, 90_200); //completely overlapping
            index.Add("chr1", 10_000, 10_001, 100_000);
            index.Add("chr1", 10_004, 10_006, 100_100);
            index.Add("chr1", 10_009, 10_550, 100_200);//SV starting from the middle of the range
            index.Add("chr1", 10_008, 10_010, 100_300);
            index.Add("chr1", 10_011, 10_020, 100_400);
            index.Add("chr1", 10_039, 10_550, 100_200);//SV past the range

            index.Flush();

            var firstSmallVarLocation = index.GetFirstVariantPosition("chr1", 10_000, 10_020);
            var largeVariantLocations = index.LargeVariantPositions("chr1", 10_000, 10_020);

            Assert.Equal(90_000, firstSmallVarLocation);
            Assert.True(largeVariantLocations.SequenceEqual(new List <long> {
                90_100, 90_200, 100_200
            }));
Beispiel #18
0
        private void IndexGenes(string lastLine, JasixIndex index)
        {
            if (lastLine == null)
            {
                return;
            }
            do
            {
                long linePosition = _reader.Position;

                if (lastLine.EndsWith($",\"{JasixCommons.GenesSectionTag}\":["))
                {
                    index.BeginSection(JasixCommons.GenesSectionTag, _reader.Position);
                    Console.WriteLine($"section:{JasixCommons.GenesSectionTag} starts at {_reader.Position}");
                }

                if (lastLine.EndsWith("]}"))
                {
                    index.EndSection(JasixCommons.GenesSectionTag, linePosition);
                    Console.WriteLine($"section:{JasixCommons.GenesSectionTag} ends at {linePosition}");
                    break;
                }
            } while ((lastLine = _reader.ReadLine()) != null);
        }
Beispiel #19
0
        public void CreateIndex()
        {
            var index = new JasixIndex();

            IndexHeader(index);

            string lastLine = IndexPositions(index);

            IndexGenes(lastLine, index);

            index.Write(_writeStream);

            Console.WriteLine();

            long peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage();
            var  wallTimeSpan         = _benchmark.GetElapsedTime();

            Console.WriteLine();
            if (peakMemoryUsageBytes > 0)
            {
                Console.WriteLine("Peak memory usage: {0}", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes));
            }
            Console.WriteLine("Time: {0}", Benchmark.ToHumanReadable(wallTimeSpan));
        }
Beispiel #20
0
 public OnTheFlyIndexCreator(Stream indexStream)
 {
     _indexStream = indexStream;
     _jasixIndex  = new JasixIndex();
 }