예제 #1
0
        public InterimTsvsMerger(IEnumerable <string> annotationFiles, IEnumerable <string> intervalFiles, string miscFile, IEnumerable <string> geneFiles, string compressedReference, string outputDirectory)
        {
            _outputDirectory = outputDirectory;

            var refSequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedReference));

            _genomeAssembly      = refSequenceProvider.GenomeAssembly;
            _refNameToChromosome = refSequenceProvider.RefNameToChromosome;

            _tsvReaders      = ReaderUtilities.GetSaTsvReaders(annotationFiles);
            _miscReader      = ReaderUtilities.GetMiscTsvReader(miscFile);
            _geneReaders     = ReaderUtilities.GetGeneReaders(geneFiles);
            _intervalReaders = ReaderUtilities.GetIntervalReaders(intervalFiles);

            _saHeaders = new List <SaHeader>();
            _saHeaders.AddRange(ReaderUtilities.GetTsvHeaders(_tsvReaders));
            _saHeaders.AddRange(ReaderUtilities.GetTsvHeaders(_intervalReaders));
            _geneHeaders = ReaderUtilities.GetTsvHeaders(_geneReaders)?.ToList();

            _refNames = new HashSet <string>();
            _refNames.UnionWith(ReaderUtilities.GetRefNames(_tsvReaders));
            _refNames.UnionWith(ReaderUtilities.GetRefNames(_intervalReaders));
            if (_miscReader != null)
            {
                _refNames.UnionWith(_miscReader.RefNames);
            }

            DisplayDataSources(_saHeaders, _geneHeaders);

            MergeUtilities.CheckAssemblyConsistancy(_saHeaders);
        }
예제 #2
0
        private void MergeChrom(string refName)
        {
            var creationBench             = new Benchmark();
            var currentChrAnnotationCount = 0;
            int refMinorCount;

            var saEnumerators = GetSaEnumerators(refName);

            //return;
            var globalMajorAlleleInRefMinors = GetGlobalMajorAlleleForRefMinors(refName);

            var dataSourceVersions = MergeUtilities.GetDataSourceVersions(_saHeaders);

            var ucscRefName = _refNameToChromosome[refName].UcscName;

            var header = new SupplementaryAnnotationHeader(ucscRefName, DateTime.Now.Ticks,
                                                           SaDataBaseCommon.DataVersion, dataSourceVersions, _genomeAssembly);

            //we need a list because we will enumerate over it multiple times
            var intervals = MergeUtilities.GetIntervals(_intervalReaders, refName).OrderBy(x => x.Start).ThenBy(x => x.End).ToList();

            var svIntervals           = MergeUtilities.GetSpecificIntervals(ReportFor.StructuralVariants, intervals);
            var allVariantsIntervals  = MergeUtilities.GetSpecificIntervals(ReportFor.AllVariants, intervals);
            var smallVariantIntervals = MergeUtilities.GetSpecificIntervals(ReportFor.SmallVariants, intervals);

            var saPath = Path.Combine(_outputDirectory, $"{ucscRefName}.nsa");

            using (var stream = FileUtilities.GetCreateStream(saPath))
                using (var idxStream = FileUtilities.GetCreateStream(saPath + ".idx"))
                    using (var blockSaWriter = new SaWriter(stream, idxStream, header, smallVariantIntervals, svIntervals, allVariantsIntervals, globalMajorAlleleInRefMinors))
                    {
                        int         position;
                        ISaPosition saPosition;
                        (position, saPosition) = GetNextInterimPosition(saEnumerators);

                        while (saPosition != null)
                        {
                            blockSaWriter.Write(saPosition, position);
                            currentChrAnnotationCount++;
                            (position, saPosition) = GetNextInterimPosition(saEnumerators);
                        }

                        refMinorCount = blockSaWriter.RefMinorCount;
                    }

            Console.WriteLine($"{ucscRefName,-23}  {currentChrAnnotationCount,10:n0}   {intervals.Count,6:n0}    {refMinorCount,6:n0}   {creationBench.GetElapsedIterationTime(currentChrAnnotationCount, "variants", out double _)}");
        }
예제 #3
0
        private static (int, ISaPosition) GetNextInterimPosition(List <IEnumerator <IInterimSaItem> > iSaEnumerators)
        {
            var minItems = MergeUtilities.GetMinItems(iSaEnumerators);

            return(MergeUtilities.GetSaPosition(minItems));
        }