public void Add_one_chrom() { var position1 = new Mock <IPosition>(); position1.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1); position1.SetupGet(x => x.Start).Returns(100); position1.SetupGet(x => x.RefAllele).Returns("A"); position1.SetupGet(x => x.AltAlleles).Returns(new [] { "C" }); var memStream = new MemoryStream(); using (var indexCreator = new OnTheFlyIndexCreator(memStream)) { indexCreator.BeginSection("positions", 100); indexCreator.Add(position1.Object, 2588); indexCreator.EndSection("positions", 2699); } var readStream = new MemoryStream(memStream.ToArray()); readStream.Seek(0, SeekOrigin.Begin); var index = new JasixIndex(readStream); Assert.Equal(100, index.GetSectionBegin("positions")); Assert.Equal(2588, index.GetFirstVariantPosition("chr1", 100, 102)); }
private void WriteOutput(IAnnotatedPosition annotatedPosition, long textWriterPosition, OnTheFlyIndexCreator jasixIndexCreator, IJsonWriter jsonWriter, LiteVcfWriter vcfWriter, LiteVcfWriter gvcfWriter, string jsonOutput) { jasixIndexCreator.Add(annotatedPosition.Position, textWriterPosition); jsonWriter.WriteJsonEntry(jsonOutput); if (vcfWriter == null && gvcfWriter == null || annotatedPosition.Position.IsRecomposed) { return; } string vcfLine = _conversion.Convert(annotatedPosition); vcfWriter?.Write(vcfLine); gvcfWriter?.Write(vcfLine); }
public JsonWriter(StreamWriter writer, Stream indexStream, string annotator, string creationTime, string vepDataVersion, List <IDataSourceVersion> dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen) { _writer = writer; _writer.NewLine = "\n"; _firstEntry = true; _positionFieldClosed = false; _leaveOpen = leaveOpen; _bgzipTextWriter = writer as BgzipTextWriter; _jasixIndexCreator = _bgzipTextWriter != null ? new OnTheFlyIndexCreator(indexStream) : null; WriteHeader(annotator, creationTime, genomeAssembly, JsonCommon.SchemaVersion, vepDataVersion, dataSourceVersions, sampleNames); }
private ExitCodes ProgramExecution() { var sequenceProvider = ProviderUtilities.GetSequenceProvider(_refSequencePath); var transcriptAnnotationProvider = ProviderUtilities.GetTranscriptAnnotationProvider(_inputCachePrefix, sequenceProvider); var saProvider = ProviderUtilities.GetSaProvider(SupplementaryAnnotationDirectories); var conservationProvider = ProviderUtilities.GetConservationProvider(SupplementaryAnnotationDirectories); var refMinorProvider = ProviderUtilities.GetRefMinorProvider(SupplementaryAnnotationDirectories); var geneAnnotationProvider = ProviderUtilities.GetGeneAnnotationProvider(SupplementaryAnnotationDirectories); var plugins = PluginUtilities.LoadPlugins(_pluginDirectory); var annotator = ProviderUtilities.GetAnnotator(transcriptAnnotationProvider, sequenceProvider, saProvider, conservationProvider, geneAnnotationProvider, plugins); var recomposer = _disableRecomposition ? new NullRecomposer() : Recomposer.Create(sequenceProvider, _inputCachePrefix); var logger = _outputFileName == "-" ? (ILogger) new NullLogger() : new ConsoleLogger(); var metrics = new PerformanceMetrics(logger); var dataSourceVersions = GetDataSourceVersions(plugins, transcriptAnnotationProvider, saProvider, geneAnnotationProvider, conservationProvider); var vepDataVersion = transcriptAnnotationProvider.VepVersion + "." + CacheConstants.DataVersion + "." + SaDataBaseCommon.DataVersion; var jasixFileName = _outputFileName + ".json.gz" + JasixCommons.FileExt; using (var outputWriter = ReadWriteUtilities.GetOutputWriter(_outputFileName)) using (var vcfReader = ReadWriteUtilities.GetVcfReader(_vcfPath, sequenceProvider.RefNameToChromosome, refMinorProvider, _reportAllSvOverlappingTranscripts, recomposer)) using (var jsonWriter = new JsonWriter(outputWriter, _annotatorVersionTag, Date.CurrentTimeStamp, vepDataVersion, dataSourceVersions, sequenceProvider.GenomeAssembly.ToString(), vcfReader.GetSampleNames())) using (var vcfWriter = _vcf ? new LiteVcfWriter(ReadWriteUtilities.GetVcfOutputWriter(_outputFileName), vcfReader.GetHeaderLines(), _annotatorVersionTag, vepDataVersion, dataSourceVersions) : null) using (var gvcfWriter = _gvcf ? new LiteVcfWriter(ReadWriteUtilities.GetGvcfOutputWriter(_outputFileName), vcfReader.GetHeaderLines(), _annotatorVersionTag, vepDataVersion, dataSourceVersions) : null) using (var jasixIndexCreator = new OnTheFlyIndexCreator(FileUtilities.GetCreateStream(jasixFileName))) { if (!(outputWriter is BgzipTextWriter bgzipTextWriter)) { throw new NullReferenceException("Unable to create the bgzip text writer."); } try { jasixIndexCreator.SetHeader(jsonWriter.Header); if (vcfReader.IsRcrsMitochondrion && annotator.GenomeAssembly == GenomeAssembly.GRCh37 || annotator.GenomeAssembly == GenomeAssembly.GRCh38 || _forceMitochondrialAnnotation) { annotator.EnableMitochondrialAnnotation(); } int previousChromIndex = -1; IPosition position; var sortedVcfChecker = new SortedVcfChecker(); while ((position = vcfReader.GetNextPosition()) != null) { sortedVcfChecker.CheckVcfOrder(position.Chromosome.UcscName); previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome, metrics); var annotatedPosition = annotator.Annotate(position); string json = annotatedPosition.GetJsonString(); if (json != null) { WriteOutput(annotatedPosition, bgzipTextWriter.Position, jasixIndexCreator, jsonWriter, vcfWriter, gvcfWriter, json); } else { gvcfWriter?.Write(string.Join("\t", position.VcfFields)); } metrics.Increment(); } WriteGeneAnnotations(annotator.GetAnnotatedGenes(), jsonWriter); } catch (Exception e) { e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine; throw; } } metrics.ShowAnnotationTime(); return(ExitCodes.Success); }