private string IndexPositions(JasixIndex index) { // we need the location before accessing the line long linePosition = _reader.Position; index.BeginSection(JasixCommons.PositionsSectionTag, linePosition); Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} starts at {linePosition}"); var previousChr = ""; var previousPos = 0; string line; while ((line = _reader.ReadLine()) != null) { if (line.OptimizedStartsWith(']')) { index.EndSection(JasixCommons.PositionsSectionTag, linePosition); Console.WriteLine($"section:{JasixCommons.PositionsSectionTag} ends at {linePosition}"); break; } line = line.TrimEnd(','); (string chr, int position, int end) = GetChromPosition(line); CheckSorting(chr, position, previousChr, previousPos); index.Add(chr, position, end, linePosition); linePosition = _reader.Position; previousChr = chr; previousPos = position; } return(line); }
public void CreateIndex() { var searchTag = $"\"{SectionToIndex}\":["; const string headerTag = "{\"header\":"; var index = new JasixIndex(); string line; //skipping lines before the sectionToIndex arrives while ((line = _reader.ReadLine()) != null) { if (line.StartsWith(headerTag)) { index.HeaderLine = ExtractHeader(line); } if (line.EndsWith(searchTag)) { break; } } // we need the location before accessing the line var fileLoc = _reader.Position; string previousChr = ""; int previousPos = 0; while ((line = _reader.ReadLine()) != null) { if (line.StartsWith("]")) { break; } line = line.TrimEnd(','); var chrPos = GetChromPosition(line); CheckFileSorted(chrPos.chr, chrPos.position, previousChr, previousPos); index.Add(chrPos.chr, chrPos.position, chrPos.end, fileLoc); fileLoc = _reader.Position; previousChr = chrPos.chr; previousPos = chrPos.position; } index.Write(_writeStream); Console.WriteLine(); var peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage(); var wallTimeSpan = _benchmark.GetElapsedTime(); Console.WriteLine(); if (peakMemoryUsageBytes > 0) { Console.WriteLine("Peak memory usage: {0}", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes)); } Console.WriteLine("Time: {0}", Benchmark.ToHumanReadable(wallTimeSpan)); }
public void GetFirstVariantPosition_multi_chrom_index() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100000); index.Add("chr1", 105, 109, 100050); index.Add("chr1", 160, 166, 100100); index.Add("chr2", 100, 100, 100150); index.Add("chr2", 102, 105, 100200); index.Flush(); var chrPos = Utilities.ParseQuery("chr1"); Assert.Equal(100000, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3)); chrPos = Utilities.ParseQuery("chr2"); Assert.Equal(100150, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3)); }
public void Add_fill_node_and_start_another() { var index = new JasixIndex(); //creating two nodes each containing 50 entries for (var i = 0; i < 2 * JasixCommons.PreferredNodeCount; i++) { index.Add("chr1", 100 + i, 101 + i, 100_000 + i); } index.Add("chr1", 160 + 2 * JasixCommons.PreferredNodeCount, 166 + 2 * JasixCommons.PreferredNodeCount, 200_100); index.Add("chr2", 100, 100, 200_150); index.Add("chr2", 102, 105, 200_200); index.Flush(); Assert.Equal(100_000, index.GetFirstVariantPosition("chr1", 100, 102)); Assert.Equal(100_000 + JasixCommons.PreferredNodeCount, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 55, 2 * JasixCommons.PreferredNodeCount + 55)); Assert.Equal(-1, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 120, 2 * JasixCommons.PreferredNodeCount + 124)); Assert.Equal(200_100, index.GetFirstVariantPosition("chr1", 2 * JasixCommons.PreferredNodeCount + 158, 2 * JasixCommons.PreferredNodeCount + 160)); Assert.Equal(200_150, index.GetFirstVariantPosition("chr2", 103, 105)); }
public void Combination_of_large_and_small_variants() { var index = new JasixIndex(); //query range 10,000- 10,020 index.Add("chr1", 8_000, 9_900, 90_000); //SV not overlapping the query index.Add("chr1", 9_000, 10_005, 90_100); // partially overlapping index.Add("chr1", 9_500, 10_050, 90_200); //completely overlapping index.Add("chr1", 10_000, 10_001, 100_000); index.Add("chr1", 10_004, 10_006, 100_100); index.Add("chr1", 10_009, 10_550, 100_200);//SV starting from the middle of the range index.Add("chr1", 10_008, 10_010, 100_300); index.Add("chr1", 10_011, 10_020, 100_400); index.Add("chr1", 10_039, 10_550, 100_200);//SV past the range index.Flush(); var firstSmallVarLocation = index.GetFirstVariantPosition("chr1", 10_000, 10_020); var largeVariantLocations = index.LargeVariantPositions("chr1", 10_000, 10_020); Assert.Equal(90_000, firstSmallVarLocation); Assert.True(largeVariantLocations.SequenceEqual(new List <long> { 90_100, 90_200, 100_200 }));
public void Add(IPosition position, long fileLocation) { string chromName = position.Chromosome.EnsemblName; int start = position.Start; int? end = position.InfoData?.End; if (chromName == _lastChromName && start < _lastPosition) { throw new UserErrorException($"The Json file is not sorted at {position.Chromosome.UcscName}: {start}"); } _lastPosition = start; _lastChromName = chromName; if (end == null) { string[] altAlleles = position.AltAlleles; int altAlleleOffset = altAlleles != null && altAlleles.All(Utilities.IsNucleotideAllele) && altAlleles.Any(x => x.Length > 1) ? 1 : 0; end = Math.Max(position.RefAllele.Length - 1, altAlleleOffset) + start; } _jasixIndex.Add(position.Chromosome.EnsemblName, start, end.Value, fileLocation, position.Chromosome.UcscName); }
public void Write_and_read_back() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100000, "1"); index.Add("chr1", 105, 109, 100050, "1"); index.Add("chr1", 150, 1000, 100075, "1");//large variant index.Add("chr1", 160, 166, 100100, "1"); index.Add("chr2", 100, 100, 100150, "2"); index.Add("chr2", 102, 105, 100200, "2"); var writeStream = new MemoryStream(); using (writeStream) { index.Write(writeStream); } var readStream = new MemoryStream(writeStream.ToArray()); readStream.Seek(0, SeekOrigin.Begin); JasixIndex readBackIndex; using (readStream) { readBackIndex = new JasixIndex(readStream); } Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160)); Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105)); //checking large variants Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149)); var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201); Assert.NotNull(largeVariants); Assert.Single(largeVariants); Assert.Equal(100075, largeVariants[0]); }
public void FindLargeVaritants_method_does_not_return_small_variants() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100_000); index.Add("chr1", 105, 109, 100_050); index.Add("chr1", 160, 166, 100_100); index.Add("chr1", 200, 1000, 100_075);//large variant index.Add("chr2", 100, 100, 100_150); index.Add("chr2", 102, 105, 100_200); index.Flush(); //checking large variants Assert.Null(index.LargeVariantPositions("chr1", 100, 199)); var largeVariants = index.LargeVariantPositions("chr1", 100, 201); Assert.NotNull(largeVariants); Assert.Single(largeVariants); Assert.Equal(100075, largeVariants[0]); }
public void IndexWriteRead() { var index = new JasixIndex(); index.Add("chr1", 100, 101, 100000); index.Add("chr1", 105, 109, 100050); index.Add("chr1", 150, 1000, 100075);//large variant index.Add("chr1", 160, 166, 100100); index.Add("chr2", 100, 100, 100150); index.Add("chr2", 102, 105, 100200); var tempFile = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); using (var writer = FileUtilities.GetCreateStream(tempFile)) { index.Write(writer); } JasixIndex readBackIndex; using (var stream = FileUtilities.GetReadStream(tempFile)) { readBackIndex = new JasixIndex(stream); } File.Delete(tempFile); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 100, 102)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 103, 104)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 120, 124)); Assert.Equal(100000, readBackIndex.GetFirstVariantPosition("chr1", 158, 160)); Assert.Equal(100150, readBackIndex.GetFirstVariantPosition("chr2", 103, 105)); //checking large variants Assert.Null(readBackIndex.LargeVariantPositions("chr1", 100, 149)); var largeVariants = readBackIndex.LargeVariantPositions("chr1", 100, 201); Assert.NotNull(largeVariants); Assert.Single(largeVariants); Assert.Equal(100075, largeVariants[0]); }