public void BuildGraph_GenerateDotFile_WriteContigs(string fastaPath) { var assemblyName = "IntegrationTests"; var projectPath = Environment.CurrentDirectory.Substring(0, Environment.CurrentDirectory.IndexOf(assemblyName) + assemblyName.Length); fastaPath = Path.Combine(projectPath, fastaPath); var fileService = new FileService(); var kmerLength = 19; var errorCorrector = new ErrorCorrector(kmerLength); var fastaReader = new FastaReader(fileService); var reads = fastaReader.ParseFastaFile(fastaPath); errorCorrector.BuildHistogram(reads); var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector); var graph = graphBuilder.Build(reads); errorCorrector.PrintResult(); graph.CleanUp(); var dotFileDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "graphs"); Directory.CreateDirectory(dotFileDirectory); graphBuilder.ToDot(fileService, Path.Combine(dotFileDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".dot"), graph); var contigs = graph.GetContigs(); var contigsDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "contigs"); Directory.CreateDirectory(contigsDirectory); fastaReader.WriteFastaFile(Path.Combine(contigsDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".contigs.fasta"), contigs); }
private void Run() { var fileService = new FileService(); var kmerLength = K ?? 19; var errorCorrector = new ErrorCorrector(kmerLength, true); var fastaReader = new FastaReader(fileService); var reads = fastaReader.ParseFastaFile(ReadsPath); // errorCorrector.BuildHistogram(reads); var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector); var graph = graphBuilder.Build(reads); errorCorrector.PrintResult(); graph.CleanUp(); if (!string.IsNullOrWhiteSpace(DotFilePath)) { Directory.CreateDirectory(Path.GetDirectoryName(DotFilePath)); graphBuilder.ToDot(fileService, DotFilePath, graph); } var contigs = graph.GetContigs(); Directory.CreateDirectory(Path.GetDirectoryName(ContigsPath)); fastaReader.WriteFastaFile(ContigsPath, contigs); }
public void ParseFastaFile_EmptyFile_ThrowsArgumentException() { var fileReader = new TestFileService(); fileReader.fileLines = new List <string>(); var fastaReader = new FastaReader(fileReader); Assert.Throws <ArgumentException>(() => fastaReader.ParseFastaFile("path").ToList()); }
public void ParseFastaFile_ReturnsCorrectSequences() { var fileReader = new TestFileService(); var fastaReader = new FastaReader(fileReader); var sequences = fastaReader.ParseFastaFile("path").ToList(); Assert.Equal(10, sequences.Count); Assert.All(sequences, s => Assert.Equal(80, s.Length)); }
public void ParseFastaFile_NoSequencesInFile_ThrowsArgumentException() { var fileReader = new TestFileService(); fileReader.fileLines = new List <string>() { "this is definitely not a fasta file", "ATCGCTGJGJGNVBCACABJP", "dvdfvdf fsdfs dfsdfsgdf" }; var fastaReader = new FastaReader(fileReader); Assert.Throws <ArgumentException>(() => fastaReader.ParseFastaFile("path").ToList()); }
public void Correct_RealData(string fastaPath) { var assemblyName = "IntegrationTests"; var projectPath = Environment.CurrentDirectory.Substring(0, Environment.CurrentDirectory.IndexOf(assemblyName) + assemblyName.Length); fastaPath = Path.Combine(projectPath, fastaPath); var kmerLength = 19; var corrector = new ErrorCorrector(kmerLength); var fastaReader = new FastaReader(new FileService()); var reads = fastaReader.ParseFastaFile(fastaPath); var histogram = corrector.BuildHistogram(reads); Console.WriteLine("Before correction"); WriteLowCountDistinctKmersCount(histogram, 12); var kmersCorrected = corrector.CorrectReadsAndSplitToKmers(reads); var histogramCorrected = corrector.BuildHistogram(kmersCorrected.Select(k => k.ToString())); Console.WriteLine("After correction"); WriteLowCountDistinctKmersCount(histogramCorrected, 12); corrector.PrintResult(); }