private static ExitCodes ProgramExecution() { var logger = new ConsoleLogger(); var bundle = DataBundle.GetDataBundle(_inputReferencePath, _inputPrefix); int numRefSeqs = bundle.SequenceReader.NumRefSeqs; var chromosome = ReferenceNameUtilities.GetChromosome(bundle.SequenceReader.RefNameToChromosome, _referenceName); bundle.Load(chromosome); string outputStub = GetOutputStub(chromosome, bundle.Source); var interval = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition); var transcripts = GetTranscripts(logger, bundle, interval); var sift = GetPredictionStaging(logger, "SIFT", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs); var polyphen = GetPredictionStaging(logger, "PolyPhen", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs); string referenceBases = GetReferenceBases(logger, bundle.SequenceReader, interval); var regulatoryRegionIntervalArrays = GetRegulatoryRegionIntervalArrays(logger, bundle.TranscriptCache, interval, numRefSeqs); var transcriptIntervalArrays = PredictionUtilities.UpdateTranscripts(transcripts, bundle.SiftPredictions, sift.Predictions, bundle.PolyPhenPredictions, polyphen.Predictions, numRefSeqs); var transcriptStaging = GetTranscriptStaging(bundle.TranscriptCacheData.Header, transcriptIntervalArrays, regulatoryRegionIntervalArrays); WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(outputStub)), transcriptStaging, "transcript"); WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.SiftPath(outputStub)), sift.Staging, "SIFT"); WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(outputStub)), polyphen.Staging, "PolyPhen"); WriteReference(logger, CacheConstants.BasesPath(outputStub), bundle.SequenceReader, chromosome, referenceBases, interval.Start); return(ExitCodes.Success); }
public void CreateFromSymbolicAllele_Duplication() { var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 37820921, 38404543); BreakEndAdjacency[] adjacencies = BreakEndUtilities.CreateFromSymbolicAllele(interval, VariantType.tandem_duplication); Assert.NotNull(adjacencies); Assert.Equal(2, adjacencies.Length); var observed = adjacencies[0]; Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed.Origin.Chromosome.EnsemblName); Assert.Equal(38404543, observed.Origin.Position); Assert.False(observed.Origin.OnReverseStrand); Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed.Partner.Chromosome.EnsemblName); Assert.Equal(37820920, observed.Partner.Position); Assert.False(observed.Partner.OnReverseStrand); var observed2 = adjacencies[1]; Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed2.Origin.Chromosome.EnsemblName); Assert.Equal(37820920, observed2.Origin.Position); Assert.True(observed2.Origin.OnReverseStrand); Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed2.Partner.Chromosome.EnsemblName); Assert.Equal(38404543, observed2.Partner.Position); Assert.True(observed2.Partner.OnReverseStrand); }
public void CreateFromSymbolicAllele_Deletion() { var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 1594584, 1660503); BreakEndAdjacency[] adjacencies = BreakEndUtilities.CreateFromSymbolicAllele(interval, VariantType.deletion); Assert.NotNull(adjacencies); Assert.Equal(2, adjacencies.Length); var observed = adjacencies[0]; Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed.Origin.Chromosome.EnsemblName); Assert.Equal(1594583, observed.Origin.Position); Assert.False(observed.Origin.OnReverseStrand); Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed.Partner.Chromosome.EnsemblName); Assert.Equal(1660504, observed.Partner.Position); Assert.False(observed.Partner.OnReverseStrand); var observed2 = adjacencies[1]; Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed2.Origin.Chromosome.EnsemblName); Assert.Equal(1660504, observed2.Origin.Position); Assert.True(observed2.Origin.OnReverseStrand); Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed2.Partner.Chromosome.EnsemblName); Assert.Equal(1594583, observed2.Partner.Position); Assert.True(observed2.Partner.OnReverseStrand); }
public void CreateFromSymbolicAllele_Inversion() { var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 63989116, 64291267); BreakEndAdjacency[] adjacencies = BreakEndUtilities.CreateFromSymbolicAllele(interval, VariantType.inversion); Assert.NotNull(adjacencies); Assert.Equal(2, adjacencies.Length); var observed = adjacencies[0]; Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed.Origin.Chromosome.EnsemblName); Assert.Equal(63989115, observed.Origin.Position); Assert.False(observed.Origin.OnReverseStrand); Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed.Partner.Chromosome.EnsemblName); Assert.Equal(64291267, observed.Partner.Position); Assert.True(observed.Partner.OnReverseStrand); var observed2 = adjacencies[1]; Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed2.Origin.Chromosome.EnsemblName); Assert.Equal(64291268, observed2.Origin.Position); Assert.True(observed2.Origin.OnReverseStrand); Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, observed2.Partner.Chromosome.EnsemblName); Assert.Equal(63989116, observed2.Partner.Position); Assert.False(observed2.Partner.OnReverseStrand); }
public void GetOverlappingFlankingTranscripts_NoOverlaps() { var interval = new ChromosomeInterval(ChromosomeUtilities.Chr11, 5000, 5001); ITranscript[] overlappingTranscripts = _cache.TranscriptIntervalForest.GetAllFlankingValues(interval); Assert.Null(overlappingTranscripts); }
public void CreateFromSymbolicAllele_UnhandledVariantType_ReturnNull() { var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 63989116, 64291267); BreakEndAdjacency[] adjacencies = BreakEndUtilities.CreateFromSymbolicAllele(interval, VariantType.complex_structural_alteration); Assert.Null(adjacencies); }
public void GetOverlappingFlankingTranscripts_TwoOverlaps() { var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200); ITranscript[] overlappingTranscripts = _cache.TranscriptIntervalForest.GetAllFlankingValues(interval); Assert.NotNull(overlappingTranscripts); Assert.Equal(2, overlappingTranscripts.Length); }
public void GetOverlapFractions_ReturnNulls_BreakEnd() { var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 2, 1); var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1, 2, null, null, VariantType.translocation_breakend); var(reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant); Assert.Null(reciprocalOverlap); Assert.Null(annotationOverlap); }
public void GetOverlapFractions_ReturnNulls_DifferentChroms() { var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 1, 2); var variant = new SimpleVariant(ChromosomeUtilities.Chr2, 1, 2, null, null, VariantType.deletion); var(reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant); Assert.Null(reciprocalOverlap); Assert.Null(annotationOverlap); }
public void ChromosomeInterval_Setup() { const int expectedStart = 100; const int expectedEnd = 200; var observedInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200); Assert.Equal(ChromosomeUtilities.Chr1, observedInterval.Chromosome); Assert.Equal(expectedStart, observedInterval.Start); Assert.Equal(expectedEnd, observedInterval.End); }
public void ChromosomeInterval_Setup() { var expectedChromosome = new Chromosome("chr1", "1", 0); const int expectedStart = 100; const int expectedEnd = 200; var observedInterval = new ChromosomeInterval(expectedChromosome, 100, 200); Assert.Equal(expectedChromosome, observedInterval.Chromosome); Assert.Equal(expectedStart, observedInterval.Start); Assert.Equal(expectedEnd, observedInterval.End); }
public void GetOverlapFractions_NotNull_AsExpected(int varStart, int varEnd, int saStart, int saEnd, double expectedReciprocalOverlap, double expecedAnnotationOverlap) { var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, saStart, saEnd); var variant = new SimpleVariant(ChromosomeUtilities.Chr1, varStart, varEnd, null, null, VariantType.deletion); var(reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant); Assert.NotNull(reciprocalOverlap); Assert.NotNull(annotationOverlap); Assert.Equal(expectedReciprocalOverlap, reciprocalOverlap.Value, 5); Assert.Equal(expecedAnnotationOverlap, annotationOverlap.Value, 5); }
private static (ushort RefIndex, Interval <RepeatExpansionPhenotype> Interval) GetPhenotype(string line, IDictionary <string, IChromosome> refNameToChromosome) { string[] cols = line.OptimizedSplit('\t'); if (cols.Length < MinNumberOfColumns) { throw new InvalidDataException($"Expected at least {MinNumberOfColumns} columns in the STR data file, but found only {cols.Length}."); } string chromosomeString = cols[ChromIndex]; int start = int.Parse(cols[StartIndex]); int end = int.Parse(cols[EndIndex]); string phenotype = cols[PhenotypeIndex]; string omimId = cols[OmimIndex]; int[] repeatNumbers = cols[RepeatNumbersIndex].Split(',').Select(int.Parse).ToArray(); int[] alleleCounts = cols[AlleleCountsIndex].Split(',').Select(int.Parse).ToArray(); string[] classifications = cols[CategoriesIndex].Split(',').ToArray(); Interval[] classificationRanges = cols[CategoryRangesIndex].Split(',').Select(GetInterval).ToArray(); if (repeatNumbers.Length != alleleCounts.Length) { throw new InvalidDataException($"Inconsistent number of repeat numbers ({repeatNumbers.Length}) vs. allele counts ({alleleCounts.Length})"); } if (classifications.Length != classificationRanges.Length) { throw new InvalidDataException($"Inconsistent number of values of classifications ({classifications.Length}) vs. classification ranges ({classificationRanges.Length})"); } var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, chromosomeString); var chromosomeInterval = new ChromosomeInterval(chromosome, start, end); double[] percentiles = PercentileUtilities.ComputePercentiles(repeatNumbers.Length, alleleCounts); var rePhenotype = new RepeatExpansionPhenotype(chromosomeInterval, phenotype, omimId, repeatNumbers, percentiles, classifications, classificationRanges); return(chromosome.Index, new Interval <RepeatExpansionPhenotype>(start, end, rePhenotype)); }
public MatcherTests() { var repeatNumbers = new[] { 7, 8, 9 }; double[] percentiles = { 0, 1, 1.5 }; var classificationRanges = new[] { new Interval(0, 27) }; var classifications = new[] { "Normal" }; var aInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200); var aPhenotype = new RepeatExpansionPhenotype(aInterval, "A", null, repeatNumbers, percentiles, classifications, classificationRanges); var chr1Phenotypes = new Interval <RepeatExpansionPhenotype> [1]; chr1Phenotypes[0] = new Interval <RepeatExpansionPhenotype>(aInterval.Start, aInterval.End, aPhenotype); var intervalArrays = new IntervalArray <RepeatExpansionPhenotype> [1]; intervalArrays[ChromosomeUtilities.Chr1.Index] = new IntervalArray <RepeatExpansionPhenotype>(chr1Phenotypes); var phenotypeForest = new IntervalForest <RepeatExpansionPhenotype>(intervalArrays); _matcher = new Matcher(phenotypeForest); }