private static FastaSequence GetFastaSequence(string fastaPath, IDictionary <string, IChromosome> refNameToChromosome) { var references = new List <FastaSequence>(); FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(fastaPath), CompressionMode.Decompress), refNameToChromosome, references); if (references.Count != 1) { throw new InvalidDataException($"Expected 1 reference, but found {references.Count} references."); } var reference = references[0]; int length = _endPosition - _beginPosition + 1; string substring = reference.Bases.Substring(_beginPosition - 1, length); return(new FastaSequence(reference.Chromosome, substring)); }
private static List <FastaSequence> GetFastaSequences(string fastaPrefix, IDictionary <string, IChromosome> refNameToChromosome) { string directory = Path.GetDirectoryName(fastaPrefix); string prefix = Path.GetFileName(fastaPrefix); string[] fastaFiles = Directory.GetFiles(directory, $"{prefix}*.fa.gz"); var references = new List <FastaSequence>(); foreach (string filePath in fastaFiles) { Console.Write($" - parsing {Path.GetFileName(filePath)}... "); FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(filePath), CompressionMode.Decompress), refNameToChromosome, references); Console.WriteLine($"total: {references.Count} sequences"); } return(references.OrderBy(x => x.Chromosome.Index).ToList()); }