public VariantFactory(IDictionary <string, IChromosome> refNameToChromosome, IRefMinorProvider refMinorProvider, bool enableVerboseTranscript) { _refNameToChromosome = refNameToChromosome; _refMinorProvider = refMinorProvider; _enableVerboseTranscript = enableVerboseTranscript; }
private static bool IsRefMinor(IRefMinorProvider refMinorProvider, IChromosome chrom, int position) { if (refMinorProvider == null) { return(false); } return(!string.IsNullOrEmpty(refMinorProvider.GetGlobalMajorAllele(chrom, position))); }
public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false) { var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator, mitoHeteroplasmyProvider, enableDq); vcfReader.ParseHeader(); vcfReader.SetRecomposer(recomposer); return(vcfReader); }
public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IRecomposer recomposer, IVcfFilter vcfFilter, IVariantIdCreator vidCreator) { var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, vidCreator); vcfReader.ParseHeader(); vcfReader.SetRecomposer(recomposer); return(vcfReader); }
public VcfReader(Stream stream, IDictionary <string, IChromosome> refNameToChromosome, IRefMinorProvider refMinorProvider, bool enableVerboseTranscript, IRecomposer recomposer) { _reader = new StreamReader(stream); _variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider, enableVerboseTranscript); _refNameToChromosome = refNameToChromosome; bool hasSampleColumn = ParseHeader(); _recomposer = hasSampleColumn ? recomposer : new NullRecomposer(); }
private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter) { _headerReader = headerReader; _reader = vcfLineReader; _variantFactory = new VariantFactory(sequenceProvider); _refMinorProvider = refMinorProvider; _vcfFilter = vcfFilter; _refNameToChromosome = sequenceProvider.RefNameToChromosome; }
public static IVcfReader GetVcfReader(string vcfPath, IDictionary <string, IChromosome> chromosomeDictionary, IRefMinorProvider refMinorProvider, bool verboseTranscript, IRecomposer recomposer) { var useStdInput = vcfPath == "-"; var peekStream = new PeekStream(useStdInput ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(vcfPath)); return(new VcfReader(peekStream, chromosomeDictionary, refMinorProvider, verboseTranscript, recomposer)); }
private VcfReader(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter, IVariantIdCreator vidCreator, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider) { _headerReader = headerReader; _reader = vcfLineReader; _variantFactory = new VariantFactory(sequenceProvider.Sequence, vidCreator); _sequenceProvider = sequenceProvider; _refMinorProvider = refMinorProvider; _vcfFilter = vcfFilter; _refNameToChromosome = sequenceProvider.RefNameToChromosome; _mitoHeteroplasmyProvider = mitoHeteroplasmyProvider; }
public static IPosition ToPosition(ISimplePosition simplePosition, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory, bool enableDq = false) { if (simplePosition == null) { return(null); } sequenceProvider.LoadChromosome(simplePosition.Chromosome); string[] vcfFields = simplePosition.VcfFields; string[] altAlleles = vcfFields[VcfCommon.AltIndex].OptimizedSplit(','); bool isReference = altAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(altAlleles[0]); string globalMajorAllele = isReference ? refMinorProvider?.GetGlobalMajorAllele(simplePosition.Chromosome, simplePosition.Start) : null; bool isRefMinor = isReference && globalMajorAllele != null; if (isReference && !isRefMinor) { return(GetReferencePosition(simplePosition)); } var infoData = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex]); int end = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length); double?quality = vcfFields[VcfCommon.QualIndex].GetNullableValue <double>(double.TryParse); string[] filters = vcfFields[VcfCommon.FilterIndex].OptimizedSplit(';'); ISample[] samples = vcfFields.ToSamples(variantFactory.FormatIndices, simplePosition, mitoHeteroplasmyProvider, enableDq); IVariant[] variants = variantFactory.CreateVariants(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed, simplePosition.IsRecomposed, simplePosition.LinkedVids, globalMajorAllele); return(new Position(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele, altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed)); }
public static (IPosition, string[]) GetPositionAndSampleNames(this SingleConfig config, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider) => (ToPosition(config.variant.GetVcfFields(), sequenceProvider, refMinorProvider), config.variant.sampleNames);
internal static IPosition ParseVcfLine(string vcfLine, IRefMinorProvider refMinorProvider, VariantFactory variantFactory, IDictionary <string, IChromosome> refNameToChromosome) { var simplePosition = GetSimplePosition(vcfLine, refNameToChromosome); return(Position.ToPosition(simplePosition, refMinorProvider, variantFactory)); }
public static (ImmutableDictionary <IChromosome, List <int> > PositionsByChromosome, int Count) GetPositions(Stream vcfStream, GenomicRange genomicRange, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider) { var positionsByChromosome = new Dictionary <IChromosome, List <int> >(); var rangeChecker = new GenomicRangeChecker(genomicRange); var refNameToChrom = sequenceProvider.RefNameToChromosome; using (var reader = new StreamReader(vcfStream)) { string line; string currentReferenceName = ""; IChromosome chromosome = null; while ((line = reader.ReadLine()) != null) { if (line.StartsWith('#')) { continue; } string[] cols = line.OptimizedSplit('\t'); string referenceName = cols[VcfCommon.ChromIndex]; if (referenceName != currentReferenceName) { if (!refNameToChrom.TryGetValue(referenceName, out chromosome)) { continue; } currentReferenceName = referenceName; } (int position, bool foundError) = cols[VcfCommon.PosIndex].OptimizedParseInt32(); if (foundError) { throw new InvalidDataException($"Unable to convert the VCF position to an integer: {cols[VcfCommon.PosIndex]}"); } if (rangeChecker.OutOfRange(chromosome, position)) { break; } string refAllele = cols[VcfCommon.RefIndex]; string altAllele = cols[VcfCommon.AltIndex]; if (altAllele == "." && !IsRefMinor(refMinorProvider, chromosome, position)) { continue; } sequenceProvider.LoadChromosome(chromosome); TryAddPosition(positionsByChromosome, chromosome, position, refAllele, altAllele, sequenceProvider.Sequence); } } int count = SortPositionsAndGetCount(positionsByChromosome); return(positionsByChromosome.ToImmutableDictionary(), count); }
internal static IPosition ParseVcfLine(string vcfLine, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, VariantFactory variantFactory) { var simplePosition = GetSimplePosition(vcfLine, sequenceProvider.RefNameToChromosome); return(Position.ToPosition(simplePosition, refMinorProvider, sequenceProvider, variantFactory)); }