private static ExitCodes ProgramExecution() { var genomeAssembly = GenomeAssemblyHelper.Convert(_genomeAssembly); Console.Write("- loading previous reference names... "); List <IChromosome> oldChromosomes = ReferenceNamesReader.GetReferenceNames(FileUtilities.GetReadStream(_referenceNamesPath)); Console.WriteLine("finished."); IDictionary <string, IChromosome> oldRefNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(oldChromosomes); Console.Write("- reading the genome assembly report... "); List <IChromosome> chromosomes = AssemblyReader.GetChromosomes(FileUtilities.GetReadStream(_genomeAssemblyReportPath), oldRefNameToChromosome, oldChromosomes.Count); int numRefSeqs = chromosomes.Count; Console.WriteLine($"{numRefSeqs} references found."); Console.Write("- checking reference index contiguity... "); CheckReferenceIndexContiguity(chromosomes, oldChromosomes); Console.WriteLine("contiguous."); IDictionary <string, IChromosome> refNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(chromosomes); Console.Write("- reading cytogenetic bands... "); List <Band>[] cytogeneticBandsByRef = CytogeneticBandsReader.GetCytogeneticBands(FileUtilities.GetReadStream(_cytogeneticBandPath), numRefSeqs, refNameToChromosome); Console.WriteLine("finished."); Console.WriteLine("- reading FASTA files:"); List <FastaSequence> fastaSequences = GetFastaSequences(_fastaPrefix, refNameToChromosome); long genomeLength = GetGenomeLength(fastaSequences); Console.WriteLine($"- genome length: {genomeLength:N0}"); Console.Write("- check if chrY has PAR masking... "); CheckChrYPadding(fastaSequences); Console.WriteLine("unmasked."); Console.Write("- applying 2-bit compression... "); List <Creation.ReferenceSequence> referenceSequences = CreateReferenceSequences(fastaSequences, cytogeneticBandsByRef); Console.WriteLine("finished."); Console.Write("- creating reference sequence file... "); CreateReferenceSequenceFile(genomeAssembly, _patchLevel, chromosomes, referenceSequences); long fileSize = new FileInfo(_outputCompressedPath).Length; Console.WriteLine($"{fileSize:N0} bytes"); return(ExitCodes.Success); }
private static ExitCodes ProgramExecution() { var genomeAssembly = GenomeAssemblyHelper.Convert(_genomeAssembly); Console.Write("- reading the genome assembly report... "); var dummyRefNameToChromosome = new Dictionary <string, IChromosome>(); List <IChromosome> chromosomes = AssemblyReader.GetChromosomes(FileUtilities.GetReadStream(_genomeAssemblyReportPath), dummyRefNameToChromosome, 0); int numRefSeqs = chromosomes.Count; Console.WriteLine($"{numRefSeqs} references found."); IDictionary <string, IChromosome> refNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(chromosomes); Console.Write("- reading FASTA file... "); var fastaSequence = GetFastaSequence(_fastaPath, refNameToChromosome); Console.WriteLine($"- sequence length: {fastaSequence.Bases.Length:N0}"); Console.Write("- reading cytogenetic bands... "); List <Band> cytogeneticBands = GetCytogeneticBands(fastaSequence.Chromosome.Index, numRefSeqs, refNameToChromosome); Console.WriteLine("finished."); Console.Write("- applying 2-bit compression... "); var referenceSequence = CreateReferenceSequence(fastaSequence, cytogeneticBands); Console.WriteLine("finished."); Console.Write("- creating reference sequence file... "); var minimalChromosomes = new List <IChromosome> { fastaSequence.Chromosome }; CreateReferenceSequenceFile(genomeAssembly, minimalChromosomes, referenceSequence); long fileSize = new FileInfo(_outputCompressedPath).Length; Console.WriteLine($"{fileSize:N0} bytes"); return(ExitCodes.Success); }