Beispiel #1
0
        private static FastaSequence GetFastaSequence(string fastaPath, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var references = new List <FastaSequence>();

            FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(fastaPath), CompressionMode.Decompress), refNameToChromosome, references);

            if (references.Count != 1)
            {
                throw new InvalidDataException($"Expected 1 reference, but found {references.Count} references.");
            }

            var    reference = references[0];
            int    length    = _endPosition - _beginPosition + 1;
            string substring = reference.Bases.Substring(_beginPosition - 1, length);

            return(new FastaSequence(reference.Chromosome, substring));
        }
Beispiel #2
0
        private static List <FastaSequence> GetFastaSequences(string fastaPrefix, IDictionary <string, IChromosome> refNameToChromosome)
        {
            string directory = Path.GetDirectoryName(fastaPrefix);
            string prefix    = Path.GetFileName(fastaPrefix);

            string[] fastaFiles = Directory.GetFiles(directory, $"{prefix}*.fa.gz");

            var references = new List <FastaSequence>();

            foreach (string filePath in fastaFiles)
            {
                Console.Write($"  - parsing {Path.GetFileName(filePath)}... ");
                FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(filePath), CompressionMode.Decompress), refNameToChromosome, references);
                Console.WriteLine($"total: {references.Count} sequences");
            }

            return(references.OrderBy(x => x.Chromosome.Index).ToList());
        }