Exemplo n.º 1
0
        private string IndexPositions(JasixIndex index)
        {
            // we need the location before accessing the line
            long linePosition = _reader.Position;

            index.BeginSection(JasixCommons.PositionsSectionTag, linePosition);
            Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} starts at {linePosition}");

            var    previousChr = "";
            var    previousPos = 0;
            string line;

            while ((line = _reader.ReadLine()) != null)
            {
                if (line.OptimizedStartsWith(']'))
                {
                    index.EndSection(JasixCommons.PositionsSectionTag, linePosition);
                    Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} ends at {linePosition}");
                    break;
                }

                line = line.TrimEnd(',');
                (string chr, int position, int end) = GetChromPosition(line);

                CheckSorting(chr, position, previousChr, previousPos);

                index.Add(chr, position, end, linePosition);
                linePosition = _reader.Position;
                previousChr  = chr;
                previousPos  = position;
            }

            return(line);
        }
Exemplo n.º 2
0
        public void CreateIndex()
        {
            var          searchTag = $"\"{SectionToIndex}\":[";
            const string headerTag = "{\"header\":";
            var          index     = new JasixIndex();
            string       line;

            //skipping lines before the sectionToIndex arrives
            while ((line = _reader.ReadLine()) != null)
            {
                if (line.StartsWith(headerTag))
                {
                    index.HeaderLine = ExtractHeader(line);
                }
                if (line.EndsWith(searchTag))
                {
                    break;
                }
            }

            // we need the location before accessing the line
            var fileLoc = _reader.Position;

            string previousChr = "";
            int    previousPos = 0;

            while ((line = _reader.ReadLine()) != null)
            {
                if (line.StartsWith("]"))
                {
                    break;
                }
                line = line.TrimEnd(',');
                var chrPos = GetChromPosition(line);

                CheckFileSorted(chrPos.chr, chrPos.position, previousChr, previousPos);

                index.Add(chrPos.chr, chrPos.position, chrPos.end, fileLoc);
                fileLoc     = _reader.Position;
                previousChr = chrPos.chr;
                previousPos = chrPos.position;
            }

            index.Write(_writeStream);

            Console.WriteLine();

            var peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage();
            var wallTimeSpan         = _benchmark.GetElapsedTime();

            Console.WriteLine();
            if (peakMemoryUsageBytes > 0)
            {
                Console.WriteLine("Peak memory usage: {0}", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes));
            }
            Console.WriteLine("Time: {0}", Benchmark.ToHumanReadable(wallTimeSpan));
        }
Exemplo n.º 3
0
        public void GetFirstVariantPosition_multi_chrom_index()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100000);
            index.Add("chr1", 105, 109, 100050);
            index.Add("chr1", 160, 166, 100100);
            index.Add("chr2", 100, 100, 100150);
            index.Add("chr2", 102, 105, 100200);

            index.Flush();

            var chrPos = Utilities.ParseQuery("chr1");

            Assert.Equal(100000, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3));

            chrPos = Utilities.ParseQuery("chr2");
            Assert.Equal(100150, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3));
        }
Exemplo n.º 4
0
        public void Add_fill_node_and_start_another()
        {
            var index = new JasixIndex();

            //creating two nodes each containing 50 entries
            for (var i = 0; i < 2 * JasixCommons.PreferredNodeCount; i++)
            {
                index.Add("chr1", 100 + i, 101 + i, 100_000 + i);
            }

            index.Add("chr1", 160 + 2 * JasixCommons.PreferredNodeCount, 166 + 2 * JasixCommons.PreferredNodeCount, 200_100);
            index.Add("chr2", 100, 100, 200_150);
            index.Add("chr2", 102, 105, 200_200);

            index.Flush();

            Assert.Equal(100_000, index.GetFirstVariantPosition("chr1", 100, 102));
            Assert.Equal(100_000 + JasixCommons.PreferredNodeCount, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 55, 2 * JasixCommons.PreferredNodeCount + 55));
            Assert.Equal(-1, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 120, 2 * JasixCommons.PreferredNodeCount + 124));
            Assert.Equal(200_100, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 158, 2 * JasixCommons.PreferredNodeCount + 160));
            Assert.Equal(200_150, index.GetFirstVariantPosition("chr2", 103, 105));
        }
        public void Combination_of_large_and_small_variants()
        {
            var index = new JasixIndex();

            //query range 10,000- 10,020
            index.Add("chr1", 8_000, 9_900, 90_000);  //SV not overlapping the query
            index.Add("chr1", 9_000, 10_005, 90_100); // partially overlapping
            index.Add("chr1", 9_500, 10_050, 90_200); //completely overlapping
            index.Add("chr1", 10_000, 10_001, 100_000);
            index.Add("chr1", 10_004, 10_006, 100_100);
            index.Add("chr1", 10_009, 10_550, 100_200);//SV starting from the middle of the range
            index.Add("chr1", 10_008, 10_010, 100_300);
            index.Add("chr1", 10_011, 10_020, 100_400);
            index.Add("chr1", 10_039, 10_550, 100_200);//SV past the range

            index.Flush();

            var firstSmallVarLocation = index.GetFirstVariantPosition("chr1", 10_000, 10_020);
            var largeVariantLocations = index.LargeVariantPositions("chr1", 10_000, 10_020);

            Assert.Equal(90_000, firstSmallVarLocation);
            Assert.True(largeVariantLocations.SequenceEqual(new List <long> {
                90_100, 90_200, 100_200
            }));
Exemplo n.º 6
0
        public void Add(IPosition position, long fileLocation)
        {
            string chromName = position.Chromosome.EnsemblName;
            int    start     = position.Start;
            int?   end       = position.InfoData?.End;

            if (chromName == _lastChromName && start < _lastPosition)
            {
                throw new UserErrorException($"The Json file is not sorted at {position.Chromosome.UcscName}: {start}");
            }

            _lastPosition  = start;
            _lastChromName = chromName;

            if (end == null)
            {
                string[] altAlleles      = position.AltAlleles;
                int      altAlleleOffset = altAlleles != null && altAlleles.All(Utilities.IsNucleotideAllele) && altAlleles.Any(x => x.Length > 1) ? 1 : 0;

                end = Math.Max(position.RefAllele.Length - 1, altAlleleOffset) + start;
            }

            _jasixIndex.Add(position.Chromosome.EnsemblName, start, end.Value, fileLocation, position.Chromosome.UcscName);
        }
Exemplo n.º 7
0
        public void Write_and_read_back()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100000, "1");
            index.Add("chr1", 105, 109, 100050, "1");
            index.Add("chr1", 150, 1000, 100075, "1");//large variant
            index.Add("chr1", 160, 166, 100100, "1");
            index.Add("chr2", 100, 100, 100150, "2");
            index.Add("chr2", 102, 105, 100200, "2");

            var writeStream = new MemoryStream();

            using (writeStream)
            {
                index.Write(writeStream);
            }

            var readStream = new MemoryStream(writeStream.ToArray());

            readStream.Seek(0, SeekOrigin.Begin);

            JasixIndex readBackIndex;

            using (readStream)
            {
                readBackIndex = new JasixIndex(readStream);
            }

            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160));
            Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105));

            //checking large variants
            Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149));
            var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201);

            Assert.NotNull(largeVariants);
            Assert.Single(largeVariants);
            Assert.Equal(100075, largeVariants[0]);
        }
Exemplo n.º 8
0
        public void FindLargeVaritants_method_does_not_return_small_variants()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100_000);
            index.Add("chr1", 105, 109, 100_050);
            index.Add("chr1", 160, 166, 100_100);
            index.Add("chr1", 200, 1000, 100_075);//large variant
            index.Add("chr2", 100, 100, 100_150);
            index.Add("chr2", 102, 105, 100_200);

            index.Flush();

            //checking large variants
            Assert.Null(index.LargeVariantPositions("chr1", 100, 199));
            var largeVariants = index.LargeVariantPositions("chr1", 100, 201);

            Assert.NotNull(largeVariants);
            Assert.Single(largeVariants);
            Assert.Equal(100075, largeVariants[0]);
        }
Exemplo n.º 9
0
        public void IndexWriteRead()
        {
            var index = new JasixIndex();

            index.Add("chr1", 100, 101, 100000);
            index.Add("chr1", 105, 109, 100050);
            index.Add("chr1", 150, 1000, 100075);//large variant
            index.Add("chr1", 160, 166, 100100);
            index.Add("chr2", 100, 100, 100150);
            index.Add("chr2", 102, 105, 100200);

            var tempFile = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());

            using (var writer = FileUtilities.GetCreateStream(tempFile))
            {
                index.Write(writer);
            }

            JasixIndex readBackIndex;

            using (var stream = FileUtilities.GetReadStream(tempFile))
            {
                readBackIndex = new JasixIndex(stream);
            }
            File.Delete(tempFile);

            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124));
            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160));
            Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105));

            //checking large variants
            Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149));
            var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201);

            Assert.NotNull(largeVariants);
            Assert.Single(largeVariants);
            Assert.Equal(100075, largeVariants[0]);
        }