public void BuildGraph_GenerateDotFile_WriteContigs(string fastaPath) { var assemblyName = "IntegrationTests"; var projectPath = Environment.CurrentDirectory.Substring(0, Environment.CurrentDirectory.IndexOf(assemblyName) + assemblyName.Length); fastaPath = Path.Combine(projectPath, fastaPath); var fileService = new FileService(); var kmerLength = 19; var errorCorrector = new ErrorCorrector(kmerLength); var fastaReader = new FastaReader(fileService); var reads = fastaReader.ParseFastaFile(fastaPath); errorCorrector.BuildHistogram(reads); var graphBuilder = new DeBruijnGraphBuilder(kmerLength, errorCorrector); var graph = graphBuilder.Build(reads); errorCorrector.PrintResult(); graph.CleanUp(); var dotFileDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "graphs"); Directory.CreateDirectory(dotFileDirectory); graphBuilder.ToDot(fileService, Path.Combine(dotFileDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".dot"), graph); var contigs = graph.GetContigs(); var contigsDirectory = Path.Combine(Path.GetDirectoryName(fastaPath), "contigs"); Directory.CreateDirectory(contigsDirectory); fastaReader.WriteFastaFile(Path.Combine(contigsDirectory, Path.GetFileNameWithoutExtension(fastaPath) + ".contigs.fasta"), contigs); }
public void Correct_RealData(string fastaPath) { var assemblyName = "IntegrationTests"; var projectPath = Environment.CurrentDirectory.Substring(0, Environment.CurrentDirectory.IndexOf(assemblyName) + assemblyName.Length); fastaPath = Path.Combine(projectPath, fastaPath); var kmerLength = 19; var corrector = new ErrorCorrector(kmerLength); var fastaReader = new FastaReader(new FileService()); var reads = fastaReader.ParseFastaFile(fastaPath); var histogram = corrector.BuildHistogram(reads); Console.WriteLine("Before correction"); WriteLowCountDistinctKmersCount(histogram, 12); var kmersCorrected = corrector.CorrectReadsAndSplitToKmers(reads); var histogramCorrected = corrector.BuildHistogram(kmersCorrected.Select(k => k.ToString())); Console.WriteLine("After correction"); WriteLowCountDistinctKmersCount(histogramCorrected, 12); corrector.PrintResult(); }
public void Correct(string read, string expectedRead, int numberOfErrors) { var corrector = new ErrorCorrector(3); var reads = new List <string> { read }; var expectedReads = new List <string> { expectedRead }; corrector.BuildHistogram(reads); var expectedKmers = new TestErrorCorrector(3).CorrectReadsAndSplitToKmers(expectedReads).ToList(); var correctedKmers = corrector.CorrectReadsAndSplitToKmers(reads).ToList(); Assert.Equal(expectedKmers, correctedKmers); Assert.Equal(numberOfErrors, corrector.CorrectedKmersCount); }
public void Histogram() { var corrector = new ErrorCorrector(3); var reads = new List <string> { string.Join("", Enumerable.Repeat("CAT", 10)) }; var histogram = corrector.BuildHistogram(reads); var expected = new Dictionary <string, int> { { "ATC", 9 }, { "CAT", 10 }, { "TCA", 9 } }; Assert.Equal(expected, histogram); }