public static bool IsValidCover(int[] nums, IList <Interval <int> > cover) { var intervals = new List <Interval <int> >(cover); intervals.Sort(); var iArray = new IntervalArray <int>(intervals.ToArray()); int i = 0; while (i < nums.Length) { var x = nums[i]; var overlappers = iArray.GetAllOverlappingIntervals(x, x).ToList(); if (overlappers.Count == 0) { return(false); } var rangeEnd = GetRangeEnd(overlappers); // get the first number that is past the range of overlappers var j = Array.BinarySearch(nums, rangeEnd + 1); if (j < 0) { j = ~j; } i = j; } return(true); }
public void GetCompressedSequence(IChromosome chromosome) { if (chromosome.IsEmpty() || !_refIndexToIndex.TryGetValue(chromosome.Index, out int index)) { Sequence.EnableNSequence(); return; } var indexEntry = _indexEntries[index]; _stream.Position = indexEntry.FileOffset; uint tag = _reader.ReadUInt32(); if (tag != ReferenceSequenceCommon.ReferenceStartTag) { throw new InvalidDataException($"The reference start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.ReferenceStartTag}"); } (int sequenceOffset, int numBases) = GetMetadata(_reader); byte[] twoBitBuffer = GetTwoBitBuffer(_reader); IntervalArray <MaskedEntry> maskedEntryIntervalArray = GetMaskedEntries(_reader); Band[] cytogeneticBands = GetCytogeneticBands(_reader); Sequence.Set(numBases, sequenceOffset, twoBitBuffer, maskedEntryIntervalArray, cytogeneticBands); }
private static IIntervalForest <MutableGene> CreateGeneForest(List <MutableGene> genes, int numRefSeqs) { if (genes == null) { return(new NullIntervalSearch <MutableGene>()); } var intervalLists = new List <IntervalArray <MutableGene> .Interval> [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { intervalLists[i] = new List <IntervalArray <MutableGene> .Interval>(); } foreach (var transcript in genes) { intervalLists[transcript.ReferenceIndex].Add( new IntervalArray <MutableGene> .Interval(transcript.Start, transcript.End, transcript)); } // create the interval arrays var refIntervalArrays = new IntervalArray <MutableGene> [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { refIntervalArrays[i] = new IntervalArray <MutableGene>(intervalLists[i].ToArray()); } return(new IntervalForest <MutableGene>(refIntervalArrays)); }
private void UpdateCodingBlockArray(int chrIndex) { if (chrIndex == _currentChrIndex) { return; } _currentChrIndex = chrIndex; _commonCodingBlockArray = null; if (chrIndex >= _transcriptIntervalArrays.Length) { return; } var transcriptIntervalArray = _transcriptIntervalArrays[chrIndex]; if (transcriptIntervalArray == null) { return; } var geneCdsIntervals = GetPhasedCdsIntervals(transcriptIntervalArray); var intervalsWithPhase = new List <Interval <ICodingBlock> >(); foreach (var(gene, transcriptIntervals) in geneCdsIntervals) { var transcriptToCodingBlocks = GetTranscriptToCodingBlocks(transcriptIntervals, gene.OnReverseStrand); intervalsWithPhase.AddRange(GetIntervalsWithPhase(transcriptToCodingBlocks)); } _commonCodingBlockArray = new IntervalArray <ICodingBlock>(intervalsWithPhase.OrderBy(x => x.Begin).ToArray()); }
public JasixChrIndex(string refName) { ReferenceSequence = refName; _nodes = new List <JasixNode>(); _largeVariants = new List <Interval <long> >(); _intervalArray = null; }
private IntervalForest <string> GetGeneForest() { var intervalArrays = new IntervalArray <string> [25];// 1-22, X,Y,MT // creating dummy interval trees for all the chromosomes for (var i = 0; i < intervalArrays.Length; i++) { intervalArrays[i] = new IntervalArray <string>(new[] { new Interval <string>(1, int.MaxValue, "chr" + i), }); } var chrom1Array = new IntervalArray <string>(new[] { new Interval <string>(1570603, 1590558, "CDK11B"), new Interval <string>(1567060, 1570639, "MMP23B"), }); var chrom10Array = new IntervalArray <string>(new[] { new Interval <string>(92828, 95178, "TUBB8"), }); var chrom21Array = new IntervalArray <string>(new[] { new Interval <string>(31863782, 30491464, "KRTAP19-3"), new Interval <string>(31859362, 31859755, "KRTAP19-2"), }); intervalArrays[0] = chrom1Array; intervalArrays[9] = chrom10Array; intervalArrays[20] = chrom21Array; return(new IntervalForest <string>(intervalArrays)); }
private static IntervalArray <ICodonBlock>[] GetCodonBlockIntervalArrays( IntervalArray <ITranscript>[] transcriptIntervalArrays) { int numChromesomes = transcriptIntervalArrays.Length; var codonBlockIntervalArrays = new IntervalArray <ICodonBlock> [numChromesomes]; for (int chrIndex = 0; chrIndex < numChromesomes; chrIndex++) { if (transcriptIntervalArrays[chrIndex] == null) { continue; //TODO: assign an empty IntervalArray to this chr } var geneList = new List <IGene>(); // keeps the order of genes, as the intervals are already sorted at trasncripts level var geneToCodonBlocks = new Dictionary <IGene, List <ICodonBlock> >(new GeneComparer()); foreach (var transcriptInterval in transcriptIntervalArrays[chrIndex].Array) { var transcript = transcriptInterval.Value; var gene = transcript.Gene; var codonBlocks = ConstructCodonBlocksFromTranscript(transcript); if (!geneToCodonBlocks.ContainsKey(gene)) { geneToCodonBlocks.Add(gene, codonBlocks); geneList.Add(gene); } else { geneToCodonBlocks[gene].AddRange(codonBlocks); } } var allUniqueCodonBlocks = new List <ICodonBlock>(); geneList.ForEach(x => allUniqueCodonBlocks.AddRange(GetUniqueCodonBlocks(geneToCodonBlocks[x]))); codonBlockIntervalArrays[chrIndex] = new IntervalArray <ICodonBlock>(allUniqueCodonBlocks.Select(GetCodonBlockInterval).ToArray <Interval <ICodonBlock> >()); } return(codonBlockIntervalArrays); }
public static IIntervalForest <T> CreateIntervalForest <T>(T[] refIntervals, int numRefSeqs) where T : IChromosomeInterval { if (refIntervals == null) { return(new NullIntervalSearch <T>()); } var intervalLists = new List <Interval <T> > [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { intervalLists[i] = new List <Interval <T> >(); } foreach (var transcript in refIntervals) { intervalLists[transcript.Chromosome.Index].Add( new Interval <T>(transcript.Start, transcript.End, transcript)); } // create the interval arrays var refIntervalArrays = new IntervalArray <T> [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { refIntervalArrays[i] = new IntervalArray <T>(intervalLists[i].ToArray()); } return(new IntervalForest <T>(refIntervalArrays)); }
public static (IntervalForest <IGene>, Dictionary <IGene, List <ITranscript> >) GetIntervalAndTranscriptsForeachGene(IntervalArray <ITranscript>[] transcriptIntervalArrays) { int numChromesomes = transcriptIntervalArrays.Length; var geneIntervalArrays = new IntervalArray <IGene> [numChromesomes]; var geneComparer = new GeneComparer(); var geneToTranscripts = new Dictionary <IGene, List <ITranscript> >(geneComparer); for (int chrIndex = 0; chrIndex < numChromesomes; chrIndex++) { if (transcriptIntervalArrays[chrIndex] == null) { geneIntervalArrays[chrIndex] = new IntervalArray <IGene>(new Interval <IGene> [0]); continue; //TODO: assign an empty IntervalArray to this chr } var geneList = new List <IGene>(); // keeps the order of genes, as the intervals are already sorted at trasncripts level foreach (var transcriptInterval in transcriptIntervalArrays[chrIndex].Array) { var transcript = transcriptInterval.Value; var gene = transcript.Gene; if (!geneToTranscripts.ContainsKey(gene)) { geneToTranscripts.Add(gene, new List <ITranscript> { transcript }); geneList.Add(gene); } else { geneToTranscripts[gene].Append(transcript); } } geneIntervalArrays[chrIndex] = new IntervalArray <IGene>(geneList.Select(GetGeneInterval).ToArray()); } return(new IntervalForest <IGene>(geneIntervalArrays), geneToTranscripts); }
private static IIntervalForest <UgaGene> CreateGeneForest(IEnumerable <UgaGene> genes, int numRefSeqs, GenomeAssembly genomeAssembly) { bool useGrch37 = genomeAssembly == GenomeAssembly.GRCh37; var intervalLists = new List <Interval <UgaGene> > [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { intervalLists[i] = new List <Interval <UgaGene> >(); } foreach (var gene in genes) { var coords = useGrch37 ? gene.GRCh37 : gene.GRCh38; if (coords.Start == -1 && coords.End == -1) { continue; } intervalLists[gene.Chromosome.Index].Add(new Interval <UgaGene>(coords.Start, coords.End, gene)); } var refIntervalArrays = new IntervalArray <UgaGene> [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { refIntervalArrays[i] = new IntervalArray <UgaGene>(intervalLists[i].OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray()); } return(new IntervalForest <UgaGene>(refIntervalArrays)); }
public static IntervalForest <IGene> GetGeneForest(IntervalArray <ITranscript>[] transcriptIntervalArrays) { int numChromosomes = transcriptIntervalArrays.Length; var geneIntervalArrays = new IntervalArray <IGene> [numChromosomes]; var geneComparer = new GeneComparer(); for (var chrIndex = 0; chrIndex < numChromosomes; chrIndex++) { if (transcriptIntervalArrays[chrIndex] == null) { geneIntervalArrays[chrIndex] = EmptyIntervalArray; continue; // assign an empty IntervalArray to this chr } var geneList = new List <IGene>(); // keeps the order of genes, as the intervals are already sorted at trasncripts level var geneSet = new HashSet <IGene>(geneComparer); foreach (var transcriptInterval in transcriptIntervalArrays[chrIndex].Array) { var transcript = transcriptInterval.Value; var gene = transcript.Gene; if (geneSet.Contains(gene)) { continue; } geneSet.Add(gene); geneList.Add(gene); } geneIntervalArrays[chrIndex] = new IntervalArray <IGene>(geneList.Select(GetGeneInterval).ToArray()); } return(new IntervalForest <IGene>(geneIntervalArrays)); }
private static IntervalArray <T>[] ReadIntervals <T>(IExtendedBinaryReader reader, Func <T> readMethod) where T : IInterval { var numRefSeqs = reader.ReadOptInt32(); var intervalArrays = new IntervalArray <T> [numRefSeqs]; for (int refSeqIndex = 0; refSeqIndex < numRefSeqs; refSeqIndex++) { var numItems = reader.ReadOptInt32(); if (numItems == 0) { continue; } var intervals = new Interval <T> [numItems]; for (int i = 0; i < numItems; i++) { var item = readMethod(); intervals[i] = new Interval <T>(item.Start, item.End, item); } intervalArrays[refSeqIndex] = new IntervalArray <T>(intervals); } CheckGuard(reader); return(intervalArrays); }
public static IIntervalForest <int> CreateIntervalArray(List <Tuple <ushort, int, int, int> > items) { var intervalLists = new List <IntervalArray <int> .Interval> [NumRefSeqs]; for (int i = 0; i < NumRefSeqs; i++) { intervalLists[i] = new List <IntervalArray <int> .Interval>(); } foreach (var item in items) { intervalLists[item.Item1].Add(new IntervalArray <int> .Interval(item.Item2, item.Item3, item.Item4)); } // create the interval arrays var refIntervalArrays = new IntervalArray <int> [NumRefSeqs]; for (int i = 0; i < NumRefSeqs; i++) { var sortedIntervals = intervalLists[i].OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray(); refIntervalArrays[i] = new IntervalArray <int>(sortedIntervals); } return(new IntervalForest <int>(refIntervalArrays)); }
private static Dictionary <IGene, List <PhasedIntervalArray> > GetPhasedCdsIntervals( IntervalArray <ITranscript> transcriptIntervalArray) { var geneToCodingIntervals = new Dictionary <IGene, List <PhasedIntervalArray> >(new GeneComparer()); foreach (var transcriptInterval in transcriptIntervalArray.Array) { var transcript = transcriptInterval.Value; if (transcript.Id.IsPredictedTranscript()) { continue; } var gene = transcript.Gene; byte startPhase = transcript.StartExonPhase; var codingIntervals = ConstructCdsIntervalsFromTranscript(transcript); if (codingIntervals == null) { continue; } var phasedIntervals = new PhasedIntervalArray(startPhase, codingIntervals); if (geneToCodingIntervals.TryGetValue(gene, out var transcriptIntervals)) { transcriptIntervals.Add(phasedIntervals); } else { geneToCodingIntervals.Add(gene, new List <PhasedIntervalArray> { phasedIntervals }); } } return(geneToCodingIntervals); }
private static ExitCodes ProgramExecution() { var sequenceData = SequenceHelper.GetDictionaries(_refSequencePath); var logger = new ConsoleLogger(); var caches = LoadTranscriptCaches(logger, CacheConstants.TranscriptPath(_inputPrefix), CacheConstants.TranscriptPath(_inputPrefix2), sequenceData.refIndexToChromosome); if (caches.Cache.TranscriptIntervalArrays.Length != caches.Cache2.TranscriptIntervalArrays.Length) { throw new InvalidDataException($"Expected the number of reference sequences in cache 1 ({caches.Cache.TranscriptIntervalArrays.Length}) and cache 2 ({caches.Cache2.TranscriptIntervalArrays.Length}) to be the same."); } int numRefSeqs = caches.Cache.TranscriptIntervalArrays.Length; var combinedIntervalArrays = new IntervalArray <ITranscript> [numRefSeqs]; var siftPredictionsPerRef = new Prediction[numRefSeqs][]; var polyphenPredictionsPerRef = new Prediction[numRefSeqs][]; PredictionHeader siftHeader; PredictionHeader polyphenHeader; using (var siftReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix)), PredictionCacheReader.SiftDescriptions)) using (var siftReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix2)), PredictionCacheReader.SiftDescriptions)) using (var polyphenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix)), PredictionCacheReader.PolyphenDescriptions)) using (var polyphenReader2 = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix2)), PredictionCacheReader.PolyphenDescriptions)) { siftHeader = siftReader.Header; polyphenHeader = polyphenReader.Header; for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++) { var chromosome = sequenceData.refIndexToChromosome[refIndex]; Console.ForegroundColor = ConsoleColor.Yellow; logger.WriteLine($"\n{chromosome.UcscName}:"); Console.ResetColor(); var sift = CombinePredictions(logger, chromosome, "SIFT", siftReader, siftReader2); siftPredictionsPerRef[refIndex] = sift.Predictions; var polyphen = CombinePredictions(logger, chromosome, "PolyPhen", polyphenReader, polyphenReader2); polyphenPredictionsPerRef[refIndex] = polyphen.Predictions; var transcriptIntervalArray = caches.Cache.TranscriptIntervalArrays[refIndex]; var transcriptIntervalArray2 = caches.Cache2.TranscriptIntervalArrays[refIndex]; combinedIntervalArrays[refIndex] = CombineTranscripts(logger, transcriptIntervalArray, transcriptIntervalArray2, sift.Offset, polyphen.Offset); } } logger.WriteLine(); WritePredictions(logger, "SIFT", CacheConstants.SiftPath(_outputPrefix), siftHeader, siftPredictionsPerRef); WritePredictions(logger, "PolyPhen", CacheConstants.PolyPhenPath(_outputPrefix), polyphenHeader, polyphenPredictionsPerRef); WriteTranscripts(logger, CloneHeader(caches.Cache.Header), combinedIntervalArrays, caches.Cache.RegulatoryRegionIntervalArrays); return(ExitCodes.Success); }
public void IntervalArray_CheckLastIndex(int last) { IntervalArray <int> arr = new IntervalArray <int>(-30, last); int end = arr.LastIndex; Assert.Equal(last, end); }
public void IntervalArray_CheckIfContainsValue_ReturnsFalse() { IntervalArray <int> arr = new IntervalArray <int>(-2, 10); bool flag = arr.Contains(-40); Assert.False(flag); }
public void IntervalArray_CheckFirstIndex(int first) { IntervalArray <int> arr = new IntervalArray <int>(first, 30); int start = arr.FirstIndex; Assert.Equal(first, start); }
private static int GetNumCombinedTranscripts <T>(IntervalArray <T> intervalArray, IntervalArray <T> intervalArray2) { int numIntervals = intervalArray?.Array.Length ?? 0; int numIntervals2 = intervalArray2?.Array.Length ?? 0; return(numIntervals + numIntervals2); }
public IntervalForestTests() { var intervalArraysByRefIndex = new IntervalArray <string> [3]; intervalArraysByRefIndex[0] = GetIntervalArrayRefIndex0(); intervalArraysByRefIndex[1] = GetIntervalArrayRefIndex1(); intervalArraysByRefIndex[2] = GetIntervalArrayRefIndex2(); _intervalForest = new IntervalForest <string>(intervalArraysByRefIndex); }
public void IntervalArray_CheckIndexator_ReturnsCorrectValue(int index, int value) { IntervalArray <int> arr = new IntervalArray <int>(-40, 20); int result; arr[index] = value; result = arr[index]; Assert.Equal(value, result); }
internal void Set(int length, int sequenceOffset, byte[] twoBitBuffer, IntervalArray <MaskedEntry> maskedEntryIntervalArray, Band[] cytogeneticBands) { Length = length; _buffer = twoBitBuffer; _maskedIntervalSearch = maskedEntryIntervalArray; _sequenceOffset = sequenceOffset; CytogeneticBands = cytogeneticBands; _useNSequence = false; }
public static IIntervalForest <RepeatExpansionPhenotype> Load(Stream stream, GenomeAssembly desiredGenomeAssembly, IDictionary <string, IChromosome> refNameToChromosome, int numRefSeqs) { var intervalLists = new List <Interval <RepeatExpansionPhenotype> > [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { intervalLists[i] = new List <Interval <RepeatExpansionPhenotype> >(); } using (stream) { using (var reader = new StreamReader(stream)) { CheckHeader(reader, desiredGenomeAssembly); while (true) { string line = reader.ReadLine(); if (line == null) { break; } if (line == string.Empty) { continue; } try { (ushort refIndex, Interval <RepeatExpansionPhenotype> phenotypeInterval) = GetPhenotype(line, refNameToChromosome); if (refIndex == ushort.MaxValue) { throw new InvalidDataException("Unknown chromosome encountered in STR file."); } intervalLists[refIndex].Add(phenotypeInterval); } catch (Exception e) { e.Data[ExitCodeUtilities.Line] = line; throw; } } } } var refIntervalArrays = new IntervalArray <RepeatExpansionPhenotype> [numRefSeqs]; for (var i = 0; i < numRefSeqs; i++) { refIntervalArrays[i] = new IntervalArray <RepeatExpansionPhenotype>(intervalLists[i].ToArray()); } return(new IntervalForest <RepeatExpansionPhenotype>(refIntervalArrays)); }
public static Dictionary <ushort, IntervalArray <byte> > GetSpliceIntervals(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData) { var cache = transcriptData.GetCache(); var spliceIntervalDict = new Dictionary <ushort, IntervalArray <byte> >(sequenceProvider.RefIndexToChromosome.Count); foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys) { var spliceIntervals = new List <Interval <byte> >(8 * 1024); var overlappingTranscripts = cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue); if (overlappingTranscripts == null) { continue; } foreach (var transcript in overlappingTranscripts) { if (transcript.Id.IsPredictedTranscript()) { continue; } bool isFirstExon = true; foreach (var transcriptRegion in transcript.TranscriptRegions) { if (transcriptRegion.Type != TranscriptRegionType.Exon) { continue; } var firstSplicePosition = transcriptRegion.Start; var secondSplicePosition = transcriptRegion.End; var firstInterval = new Interval <byte>(firstSplicePosition - SpliceFlankLength, firstSplicePosition + SpliceFlankLength, 0); var secondInterval = new Interval <byte>(secondSplicePosition - SpliceFlankLength, secondSplicePosition + SpliceFlankLength, 0); if (!isFirstExon) { spliceIntervals.Add(firstInterval); } spliceIntervals.Add(secondInterval); isFirstExon = false; } //remove the last added interval since this is the tail of the last exon- which is not a splice site if (spliceIntervals.Count > 0) { spliceIntervals.RemoveAt(spliceIntervals.Count - 1); } } spliceIntervalDict[chromIndex] = new IntervalArray <byte>(spliceIntervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray()); } return(spliceIntervalDict); }
public void IntervalArray_TryToRemoveNonexistentValue_ReturnsFalse() { IntervalArray <int> arr = new IntervalArray <int>(-2, 10) { 2 }; bool flag = arr.Remove(1); Assert.False(flag); }
public void IntervalArray_RemoveElement_ReturnsTrue() { IntervalArray <int> arr = new IntervalArray <int>(-2, 10) { 1 }; bool flag = arr.Remove(1); Assert.True(flag); }
public void IntervalArray_AddElement_CheckIfSaidElementIsTheLastOne() { IntervalArray <int> arr = new IntervalArray <int>(-50, 3) { 1 }; int value = arr[arr.LastIndex]; Assert.Equal(1, value); }
public void IntervalArray_AddElements_ReturnsCorrectSize() { IntervalArray <int> arr = new IntervalArray <int>(-50, 3) { 1, 5, 6, 7 }; int size = arr.Count; Assert.Equal(58, size); }
public void Flush() { if (_currentNode != null) { _nodes.Add(_currentNode); } if (_largeVariants.Count != 0) { _intervalArray = new IntervalArray <long>(_largeVariants.ToArray()); } }
public void IntervalArray_ClearTheArray_ShouldThrowNullReferenceException() { IntervalArray <int> array = new IntervalArray <int>(1, 5) { 1, 2, 3, 4 }; array.Clear(); Exception ex = Assert.Throws <NullReferenceException>(() => array[array.FirstIndex]); Assert.Equal("Object reference not set to an instance of an object.", ex.Message); }