public ChromosomeRenamerTests()
        {
            var referenceMetadata = new List <ReferenceMetadata>
            {
                new ReferenceMetadata("chr1", "1", true),
                new ReferenceMetadata("chrX", "X", true),
                new ReferenceMetadata("chrM", "MT", true),
                new ReferenceMetadata("chr1", "1", true),
                new ReferenceMetadata(null, null, true),
                new ReferenceMetadata("chrEBV", "EBV", false)
            };

            _renamer.AddReferenceMetadata(referenceMetadata);
        }
        public void AddReferenceNameEnsemblEmpty()
        {
            const string ucscReferenceName = "chr1";

            var emptyChromosomeNamer = new ChromosomeRenamer();
            var referenceMetadata    = new List <ReferenceMetadata>
            {
                new ReferenceMetadata(null, ucscReferenceName, true)
            };

            emptyChromosomeNamer.AddReferenceMetadata(referenceMetadata);

            var observedUcscReferenceName    = emptyChromosomeNamer.GetUcscReferenceName(null);
            var observedEnsemblReferenceName = emptyChromosomeNamer.GetEnsemblReferenceName(ucscReferenceName);

            Assert.Equal(ucscReferenceName, observedEnsemblReferenceName);
            Assert.Null(observedUcscReferenceName);
        }
Exemple #3
0
        /// <summary>
        /// converts the FASTA file to a compressed reference file
        /// </summary>
        public void Convert(string inputFastaPath, string inputCytogeneticBandpath, string inputChromosomeNamesPath,
                            string outputCompressedPath, GenomeAssembly genomeAssembly)
        {
            Console.Write("- getting reference metadata... ");
            var referenceMetaDataList = GetReferenceMetadata(inputChromosomeNamesPath);

            Console.WriteLine("{0} references found.", referenceMetaDataList.Count);

            var renamer = new ChromosomeRenamer();

            renamer.AddReferenceMetadata(referenceMetaDataList);

            // pre-allocate the cytogenetic bands
            Console.Write("- getting cytogenetic bands... ");
            var cytogeneticBands = GetCytogeneticBands(inputCytogeneticBandpath, renamer);

            Console.WriteLine("finished.\n");

            // parse the reference
            using (var fastaReader = new FastaReader(inputFastaPath))
            {
                using (var writer = new CompressedSequenceWriter(outputCompressedPath, referenceMetaDataList, cytogeneticBands, genomeAssembly))
                {
                    Console.WriteLine("Converting the following reference sequences:");

                    while (true)
                    {
                        var referenceSequence = fastaReader.GetReferenceSequence();
                        if (referenceSequence == null)
                        {
                            break;
                        }

                        Console.WriteLine("- {0} ({1:n0} bytes)", referenceSequence.Name, referenceSequence.Bases.Length);

                        writer.Write(referenceSequence.Name, referenceSequence.Bases);
                    }
                }
            }

            Console.WriteLine("\nFile size: {0}", new FileInfo(outputCompressedPath).Length);
        }