public static void WriteBases(DataBundle bundle, List <TranscriptPacket> packets, string outputStub, List <string> outputFiles) { var refPath = outputStub + ".bases" + TempCacheExt; outputFiles.Add(refPath); const int flankingLength = 500; using (var writer = new CompressedSequenceWriter(refPath, bundle.SequenceReader.Metadata, bundle.Sequence.CytogeneticBands, bundle.Cache.Header.GenomeAssembly)) { for (ushort refIndex = 0; refIndex < bundle.Sequence.Renamer.NumRefSeqs; refIndex++) { var interval = GetBoundingInterval(packets, refIndex, flankingLength); if (interval == null) { continue; } LoadSequence(bundle, refIndex); var bases = bundle.Sequence.Substring(interval.Start, interval.End - interval.Start + 1); var ensemblRefName = bundle.Sequence.Renamer.EnsemblReferenceNames[refIndex]; writer.Write(ensemblRefName, bases, interval.Start); } } }
private static void WriteReference(ILogger logger, string outputPath, CompressedSequenceReader reader, IChromosome chromosome, string referenceBases, int offset) { logger.Write("- writing reference bases... "); var cytogeneticBands = new CytogeneticBands(reader.CytogeneticBands); using (var writer = new CompressedSequenceWriter(FileUtilities.GetCreateStream(outputPath), reader.ReferenceMetadataList, cytogeneticBands, reader.Assembly)) { writer.Write(chromosome.EnsemblName, referenceBases, offset); } logger.WriteLine("finished."); }
/// <summary> /// this function is used when we want to populate the chromosome renamer and the cytogenetic bands /// </summary> public static void WriteEmptyBases(DataBundle bundle, HashSet <ushort> refIndices, string outputStub, List <string> outputFiles) { var refPath = outputStub + ".bases" + TempCacheExt; outputFiles.Add(refPath); using (var writer = new CompressedSequenceWriter(refPath, bundle.SequenceReader.Metadata, bundle.Sequence.CytogeneticBands, bundle.Cache.Header.GenomeAssembly)) { foreach (var refIndex in refIndices.OrderBy(x => x)) { var ensemblRefName = bundle.Sequence.Renamer.EnsemblReferenceNames[refIndex]; writer.Write(ensemblRefName, "A"); } } }
/// <summary> /// converts the FASTA file to a compressed reference file /// </summary> public void Convert(string inputFastaPath, string inputCytogeneticBandpath, string inputChromosomeNamesPath, string outputCompressedPath, GenomeAssembly genomeAssembly) { Console.Write("- getting reference metadata... "); var referenceMetaDataList = GetReferenceMetadata(inputChromosomeNamesPath); Console.WriteLine("{0} references found.", referenceMetaDataList.Count); var renamer = new ChromosomeRenamer(); renamer.AddReferenceMetadata(referenceMetaDataList); // pre-allocate the cytogenetic bands Console.Write("- getting cytogenetic bands... "); var cytogeneticBands = GetCytogeneticBands(inputCytogeneticBandpath, renamer); Console.WriteLine("finished.\n"); // parse the reference using (var fastaReader = new FastaReader(inputFastaPath)) { using (var writer = new CompressedSequenceWriter(outputCompressedPath, referenceMetaDataList, cytogeneticBands, genomeAssembly)) { Console.WriteLine("Converting the following reference sequences:"); while (true) { var referenceSequence = fastaReader.GetReferenceSequence(); if (referenceSequence == null) { break; } Console.WriteLine("- {0} ({1:n0} bytes)", referenceSequence.Name, referenceSequence.Bases.Length); writer.Write(referenceSequence.Name, referenceSequence.Bases); } } } Console.WriteLine("\nFile size: {0}", new FileInfo(outputCompressedPath).Length); }