Beispiel #1
0
        private static List <Band> GetCytogeneticBands(ushort refIndex, int numRefSeqs, IDictionary <string, IChromosome> refNameToChromosome)
        {
            List <Band> chrBands = CytogeneticBandsReader.GetCytogeneticBands(FileUtilities.GetReadStream(_cytogeneticBandPath), numRefSeqs, refNameToChromosome)[refIndex];

            int substringBegin = _beginPosition;
            int substringEnd   = _beginPosition + _endPosition - 1;

            return(chrBands.Where(band => Intervals.Utilities.Overlaps(substringBegin, substringEnd, band.Begin, band.End))
                   .ToList());
        }
Beispiel #2
0
        private static ExitCodes ProgramExecution()
        {
            var genomeAssembly = GenomeAssemblyHelper.Convert(_genomeAssembly);

            Console.Write("- loading previous reference names... ");
            List <IChromosome> oldChromosomes = ReferenceNamesReader.GetReferenceNames(FileUtilities.GetReadStream(_referenceNamesPath));

            Console.WriteLine("finished.");

            IDictionary <string, IChromosome> oldRefNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(oldChromosomes);

            Console.Write("- reading the genome assembly report... ");
            List <IChromosome> chromosomes = AssemblyReader.GetChromosomes(FileUtilities.GetReadStream(_genomeAssemblyReportPath), oldRefNameToChromosome, oldChromosomes.Count);
            int numRefSeqs = chromosomes.Count;

            Console.WriteLine($"{numRefSeqs} references found.");

            Console.Write("- checking reference index contiguity... ");
            CheckReferenceIndexContiguity(chromosomes, oldChromosomes);
            Console.WriteLine("contiguous.");

            IDictionary <string, IChromosome> refNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(chromosomes);

            Console.Write("- reading cytogenetic bands... ");
            List <Band>[] cytogeneticBandsByRef = CytogeneticBandsReader.GetCytogeneticBands(FileUtilities.GetReadStream(_cytogeneticBandPath),
                                                                                             numRefSeqs, refNameToChromosome);
            Console.WriteLine("finished.");

            Console.WriteLine("- reading FASTA files:");
            List <FastaSequence> fastaSequences = GetFastaSequences(_fastaPrefix, refNameToChromosome);
            long genomeLength = GetGenomeLength(fastaSequences);

            Console.WriteLine($"- genome length: {genomeLength:N0}");

            Console.Write("- check if chrY has PAR masking... ");
            CheckChrYPadding(fastaSequences);
            Console.WriteLine("unmasked.");

            Console.Write("- applying 2-bit compression... ");
            List <Creation.ReferenceSequence> referenceSequences = CreateReferenceSequences(fastaSequences, cytogeneticBandsByRef);

            Console.WriteLine("finished.");

            Console.Write("- creating reference sequence file... ");
            CreateReferenceSequenceFile(genomeAssembly, _patchLevel, chromosomes, referenceSequences);
            long fileSize = new FileInfo(_outputCompressedPath).Length;

            Console.WriteLine($"{fileSize:N0} bytes");

            return(ExitCodes.Success);
        }