/// <summary> /// Writes ambiguous reads that are filtered out to the specified file. /// </summary> /// <param name="ambiguousReads">Reads with ambiguous symbols.</param> /// <param name="ambiguousFilename">File to write.</param> private static void WriteAmbiguousReads(BlockingCollection<ISequence> ambiguousReads, string ambiguousFilename) { FastAFormatter formatter = new FastAFormatter() { AutoFlush = true }; using (formatter.Open(ambiguousFilename)) { while (!ambiguousReads.IsCompleted) { ISequence seq; if (ambiguousReads.TryTake(out seq, -1)) { formatter.Format(seq); } } } }
/// <summary> /// Write sequences to the file /// </summary> /// <param name="sequences"></param> private void WriteSequences(IEnumerable<ISequence> sequences) { if (!string.IsNullOrEmpty(this.OutputFile)) { int count = 0; var formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence sequence in sequences) { count++; formatter.Format(sequence); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Results:"); foreach (ISequence seq in sequences) { Output.WriteLine(OutputLevel.Results, seq.ID); Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray())); } } }
/// <summary> /// Filters the test data for the input file /// </summary> /// <param name="inputFile">Input File</param> /// <param name="outputFile">Output File</param> /// <param name="repeatLength">Repeat Length</param> static void FilterTestData(string inputFile, string outputFile, int repeatLength) { if (File.Exists(inputFile)) { Console.WriteLine("Processing the file '{0}'.", inputFile); // Read the inputfile with the help of FastA Parser FastAParser parserObj = new FastAParser(); FastAFormatter outputWriter = new FastAFormatter(); using (parserObj.Open(inputFile)) using (outputWriter.Open(outputFile)) { IEnumerable<ISequence> inputReads = parserObj.Parse(); // Going through read by read in a given file foreach (ISequence seq in inputReads) { // Get the First read in the file byte[] actualRead = seq.ToArray(); // Assign the temporary local variables required byte previousChar = actualRead[0]; int repeatLenCount = 0; bool ignoreRead = false; // Go through each and every character/byte in the read for (int j = 1; j < actualRead.Length; j++) { // Check if the previous character is same as current. if (previousChar == actualRead[j]) { repeatLenCount++; // if repeat length exceeds, skip this read and continue with other read if (repeatLenCount == repeatLength) { Console.WriteLine("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped", (char)previousChar, repeatLength, seq.ID); ignoreRead = true; break; } continue; } repeatLenCount = 0; previousChar = actualRead[j]; } Console.WriteLine("Read '{0}' Processed.", seq.ID); // Check if the length exceeds the max length and write it to the output file if (!ignoreRead) { outputWriter.Format(seq); } } } Console.WriteLine(); Console.WriteLine("Filtering Completed!!"); } else Console.WriteLine("Enter Valid File Path."); }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IEnumerable<ISequence> assembly) { int counter = 1; if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence seq in assembly) { if (string.IsNullOrEmpty(seq.ID)) seq.ID = GenerateSequenceId(counter); formatter.Format(seq); counter++; } } Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:"); foreach (ISequence seq in assembly) { if (string.IsNullOrEmpty(seq.ID)) seq.ID = GenerateSequenceId(counter); Output.WriteLine(OutputLevel.Results, seq.ID); Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray())); counter++; } } }
public void FastAFormatterValidateWriteWithStream() { string actualSequence = string.Empty; var formatter = new FastAFormatter(); { using (formatter.Open(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a stream. ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Format(seqOriginal); formatter.Close(); } IEnumerable<ISequence> seq = null; using (var reader = File.OpenRead(Constants.FastaTempFileName)) { // Read the new file, then compare the sequences var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seq = parser.Parse(reader); //Create a list of sequences. List<ISequence> seqsList = seq.ToList(); Assert.IsNotNull(seqsList); var seqString = new string(seqsList[0].Select(a => (char) a).ToArray()); Assert.AreEqual(actualSequence, seqString); } } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
public void FastAFormatterValidateWrite1() { var formatter = new FastAFormatter(); using (formatter.Open(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence) { ID = "" }; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); var seqList = new List<ISequence> { seqOriginal, seqOriginal, seqOriginal }; formatter.Format(seqList); formatter.Close(); IEnumerable<ISequence> seqsNew = null; // Read the new file, then compare the sequences var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seqsNew = parser.Parse(Constants.FastaTempFileName); char[] seqString = seqsNew.ElementAt(0).Select(a => (char) a).ToArray(); var newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(3, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); Assert.AreEqual(new string(seqsNew.ElementAt(0).Select(a => (char) a).ToArray()), newSequence); ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.", newSequence)); // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } } }
/// <summary> /// Does the logic behind the sequence simulation /// </summary> public void DoSimulation(string outputFileName, Action<long,long> updateSimulationStats, Action<string> simulationComplete) { const string filePostfix = "_{0}.fa"; FileInfo file = new FileInfo(outputFileName); if (file.Directory == null || !file.Directory.Exists) throw new ArgumentException("Could not write to the output directory for " + outputFileName); if (Settings.OutputSequenceCount <= 0) throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero."); if (Settings.SequenceLength <= 0) throw new ArgumentException("'Mean Output Length' should be greater than zero."); string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension)); long seqCount = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength; long fileCount = seqCount / Settings.OutputSequenceCount; if (seqCount % Settings.OutputSequenceCount != 0) fileCount++; // Update the UI updateSimulationStats(seqCount, fileCount); int fileIndex = 1; FastAFormatter formatter = null; List<ISequence> generatedSequenceList = new List<ISequence>(); for (long i = 0; i < seqCount; i++) { generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings)); if (generatedSequenceList.Count >= Settings.OutputSequenceCount) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(); using (formatter.Open(outFile.FullName)) { formatter.Format(generatedSequenceList); } generatedSequenceList.Clear(); } } // Pick off any remaining sequences into the final file. if (generatedSequenceList.Count > 0) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(); using (formatter.Open(outFile.FullName)) { formatter.Format(generatedSequenceList); } simulationComplete(formatter.Name); } // Either we ended exactly on the boundary with no additional sequences // generated, OR we never generated any files. else { simulationComplete(formatter != null ? formatter.Name : string.Empty); } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (assembly.AssembledSequences.Count == 0) { Output.WriteLine(OutputLevel.Results, "No sequences assembled."); return; } EnsureContigNames(assembly.AssembledSequences); if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence seq in assembly.AssembledSequences) { formatter.Format(seq); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count); FastAFormatter formatter = new FastAFormatter { AutoFlush = true, MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2) }; foreach (ISequence seq in assembly.AssembledSequences) formatter.Format(Console.OpenStandardOutput(), seq); } }
/// <summary> /// It writes Contigs to the file. /// </summary> /// <param name="scaffolds">The list of scaffolds sequence.</param> private void WriteContigs(IList<ISequence> scaffolds) { if (scaffolds.Count == 0) { Output.WriteLine(OutputLevel.Information, "No Scaffolds generated."); return; } EnsureContigNames(scaffolds); if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { formatter.Format(scaffolds); } Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count); FastAFormatter formatter = new FastAFormatter { MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2), AutoFlush = true }; formatter.Format(Console.OpenStandardOutput(), scaffolds); } }