Esempio n. 1
0
        public static void WriteBases(DataBundle bundle, List <TranscriptPacket> packets, string outputStub,
                                      List <string> outputFiles)
        {
            var refPath = outputStub + ".bases" + TempCacheExt;

            outputFiles.Add(refPath);

            const int flankingLength = 500;

            using (var writer = new CompressedSequenceWriter(refPath, bundle.SequenceReader.Metadata,
                                                             bundle.Sequence.CytogeneticBands, bundle.Cache.Header.GenomeAssembly))
            {
                for (ushort refIndex = 0; refIndex < bundle.Sequence.Renamer.NumRefSeqs; refIndex++)
                {
                    var interval = GetBoundingInterval(packets, refIndex, flankingLength);
                    if (interval == null)
                    {
                        continue;
                    }

                    LoadSequence(bundle, refIndex);
                    var bases = bundle.Sequence.Substring(interval.Start, interval.End - interval.Start + 1);

                    var ensemblRefName = bundle.Sequence.Renamer.EnsemblReferenceNames[refIndex];
                    writer.Write(ensemblRefName, bases, interval.Start);
                }
            }
        }
Esempio n. 2
0
        private static void WriteReference(ILogger logger, string outputPath, CompressedSequenceReader reader,
                                           IChromosome chromosome, string referenceBases, int offset)
        {
            logger.Write("- writing reference bases... ");
            var cytogeneticBands = new CytogeneticBands(reader.CytogeneticBands);

            using (var writer = new CompressedSequenceWriter(FileUtilities.GetCreateStream(outputPath),
                                                             reader.ReferenceMetadataList, cytogeneticBands, reader.Assembly))
            {
                writer.Write(chromosome.EnsemblName, referenceBases, offset);
            }
            logger.WriteLine("finished.");
        }
Esempio n. 3
0
        /// <summary>
        /// this function is used when we want to populate the chromosome renamer and the cytogenetic bands
        /// </summary>
        public static void WriteEmptyBases(DataBundle bundle, HashSet <ushort> refIndices, string outputStub,
                                           List <string> outputFiles)
        {
            var refPath = outputStub + ".bases" + TempCacheExt;

            outputFiles.Add(refPath);

            using (var writer = new CompressedSequenceWriter(refPath, bundle.SequenceReader.Metadata,
                                                             bundle.Sequence.CytogeneticBands, bundle.Cache.Header.GenomeAssembly))
            {
                foreach (var refIndex in refIndices.OrderBy(x => x))
                {
                    var ensemblRefName = bundle.Sequence.Renamer.EnsemblReferenceNames[refIndex];
                    writer.Write(ensemblRefName, "A");
                }
            }
        }
Esempio n. 4
0
        /// <summary>
        /// converts the FASTA file to a compressed reference file
        /// </summary>
        public void Convert(string inputFastaPath, string inputCytogeneticBandpath, string inputChromosomeNamesPath,
                            string outputCompressedPath, GenomeAssembly genomeAssembly)
        {
            Console.Write("- getting reference metadata... ");
            var referenceMetaDataList = GetReferenceMetadata(inputChromosomeNamesPath);

            Console.WriteLine("{0} references found.", referenceMetaDataList.Count);

            var renamer = new ChromosomeRenamer();

            renamer.AddReferenceMetadata(referenceMetaDataList);

            // pre-allocate the cytogenetic bands
            Console.Write("- getting cytogenetic bands... ");
            var cytogeneticBands = GetCytogeneticBands(inputCytogeneticBandpath, renamer);

            Console.WriteLine("finished.\n");

            // parse the reference
            using (var fastaReader = new FastaReader(inputFastaPath))
            {
                using (var writer = new CompressedSequenceWriter(outputCompressedPath, referenceMetaDataList, cytogeneticBands, genomeAssembly))
                {
                    Console.WriteLine("Converting the following reference sequences:");

                    while (true)
                    {
                        var referenceSequence = fastaReader.GetReferenceSequence();
                        if (referenceSequence == null)
                        {
                            break;
                        }

                        Console.WriteLine("- {0} ({1:n0} bytes)", referenceSequence.Name, referenceSequence.Bases.Length);

                        writer.Write(referenceSequence.Name, referenceSequence.Bases);
                    }
                }
            }

            Console.WriteLine("\nFile size: {0}", new FileInfo(outputCompressedPath).Length);
        }