/// <summary> /// It writes Contigs to the file. /// </summary> /// <param name="scaffolds">The list of scaffolds sequence.</param> private void WriteContigs(IList <ISequence> scaffolds) { if (scaffolds.Count == 0) { Output.WriteLine(OutputLevel.Information, "No Scaffolds generated."); return; } EnsureContigNames(scaffolds); if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { formatter.Format(scaffolds); } Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count); FastAFormatter formatter = new FastAFormatter { MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2), AutoFlush = true }; formatter.Format(Console.OpenStandardOutput(), scaffolds); } }
/// <summary> /// Write sequences to the file /// </summary> /// <param name="sequences"></param> private void WriteSequences(IEnumerable <ISequence> sequences) { if (!string.IsNullOrEmpty(this.OutputFile)) { int count = 0; var formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence sequence in sequences) { count++; formatter.Format(sequence); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Results:"); foreach (ISequence seq in sequences) { Output.WriteLine(OutputLevel.Results, seq.ID); Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray())); } } }
/// <summary> /// Validates general FastA Parser test cases which are further Formatted /// with the xml node name specified. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateParseFormatGeneralTestCases(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode).TestDir(); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(filePath)); string filepathTmp = Path.Combine(Path.GetTempPath(), "temp.fasta"); // Ensure output is deleted if (File.Exists(filepathTmp)) { File.Delete(filepathTmp); } List <ISequence> seqsOriginal; var parserObj = new FastAParser(); { // Read the original file parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsOriginal = parserObj.Parse(filePath).ToList(); Assert.IsFalse(seqsOriginal.Count == 0); } // Write to a new file var formatter = new FastAFormatter(); formatter.Format(seqsOriginal, filepathTmp); try { // Compare original with new file var parserObjNew = new FastAParser(); { // Read the new file, then compare the sequences parserObjNew.Alphabet = Utility.GetAlphabet(alphabet); IEnumerable <ISequence> seqsNew = parserObjNew.Parse(filepathTmp); Assert.IsNotNull(seqsNew); int count = 0; foreach (ISequence newSequence in seqsNew) { string s1 = seqsOriginal[count].ConvertToString(); string s2 = newSequence.ConvertToString(); Assert.AreEqual(s1, s2); count++; } Assert.AreEqual(count, seqsOriginal.Count, "Number of sequences is different."); } } finally { // Delete new file File.Delete(filepathTmp); } }
private void ValidateFormatterGeneralTestCases(string nodeName) { // Gets the actual sequence and the alphabet from the Xml string expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string formattedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.", expectedSequence, alphabet)); // Replacing all the empty characters, Paragraphs and null entries added // while formatting the xml. ISequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), formattedSequence) { ID = "test" }; Assert.IsNotNull(seqOriginal); // Write it to a file var formatter = new FastAFormatter(); { // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format("FastA Formatter : Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Format(seqOriginal, Constants.FastaTempFileName); } // Read the new file, then compare the sequences var parserObj = new FastAParser(); { parserObj.Alphabet = Utility.GetAlphabet(alphabet); IEnumerable <ISequence> seqsNew = parserObj.Parse(Constants.FastaTempFileName); // Get a single sequence ISequence seqNew = seqsNew.FirstOrDefault(); Assert.IsNotNull(seqNew); string newSequence = seqNew.ConvertToString(); ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : New Sequence is '{0}'.", newSequence)); Assert.AreEqual(formattedSequence, newSequence); Assert.AreEqual(seqOriginal.ID, seqNew.ID); // Verify only one sequence exists. Assert.AreEqual(1, seqsNew.Count()); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); }
public void FastAFormatterValidateWrite() { var formatter = new FastAFormatter(); { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Format(seqOriginal, Constants.FastaTempFileName); IEnumerable <ISequence> seqsNew = null; // Read the new file, then compare the sequences var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seqsNew = parser.Parse(Constants.FastaTempFileName); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); var newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(1, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); var orgSeq = new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual(orgSeq, newSequence); ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.", newSequence)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
public void FastAFormatterValidateWriteWithStream() { string actualSequence = string.Empty; var formatter = new FastAFormatter(); { using (formatter.Open(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a stream. ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Format(seqOriginal); formatter.Close(); } IEnumerable <ISequence> seq = null; using (var reader = File.OpenRead(Constants.FastaTempFileName)) { // Read the new file, then compare the sequences var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seq = parser.Parse(reader); //Create a list of sequences. List <ISequence> seqsList = seq.ToList(); Assert.IsNotNull(seqsList); var seqString = new string(seqsList[0].Select(a => (char)a).ToArray()); Assert.AreEqual(actualSequence, seqString); } } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
public static async void AssemblySequences(string fastqFileName) { var parser = new FastQParser(); List <IQualitativeSequence> sequences = new List <IQualitativeSequence>(); using (var fileStream = new FileStream(fastqFileName, FileMode.Open)) { sequences = parser.Parse(fileStream).ToList(); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); IDeNovoAssembly assembly = assembler.Assemble(sequences); FastAFormatter outputFormatter = new FastAFormatter(); outputFormatter.Open("assembled_sequences.fasta"); outputFormatter.Format(assembly.AssembledSequences); outputFormatter.Close(); }
/// <summary> /// Writes ambiguous reads that are filtered out to the specified file. /// </summary> /// <param name="ambiguousReads">Reads with ambiguous symbols.</param> /// <param name="ambiguousFilename">File to write.</param> private static void WriteAmbiguousReads(BlockingCollection <ISequence> ambiguousReads, string ambiguousFilename) { FastAFormatter formatter = new FastAFormatter() { AutoFlush = true }; using (formatter.Open(ambiguousFilename)) { while (!ambiguousReads.IsCompleted) { ISequence seq; if (ambiguousReads.TryTake(out seq, -1)) { formatter.Format(seq); } } } }
public static void WriteFasta(IEnumerable <ISequence> sequences, string filePath) { FastAFormatter formatter = new FastAFormatter(); using (FileStream stream = File.Create(filePath)) formatter.Format(stream, sequences); using (StreamReader reader = new StreamReader(filePath)) using (StreamWriter writer = new StreamWriter(filePath + ".tmp")) { while (true) { string line = reader.ReadLine(); if (line == null) { break; } writer.Write(line + '\n'); } } File.Delete(filePath); File.Move(filePath + ".tmp", filePath); }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IEnumerable <ISequence> assembly) { int counter = 1; if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence seq in assembly) { if (string.IsNullOrEmpty(seq.ID)) { seq.ID = GenerateSequenceId(counter); } formatter.Format(seq); counter++; } } Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:"); foreach (ISequence seq in assembly) { if (string.IsNullOrEmpty(seq.ID)) { seq.ID = GenerateSequenceId(counter); } Output.WriteLine(OutputLevel.Results, seq.ID); Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray())); counter++; } } }
/// <summary> /// Creates clustalw alignment job /// and returns it's id. /// </summary> /// <param name="subsequencesIds"> /// Ids of subsequences selected for alignment /// </param> /// <returns> /// JSON containing result status (Success / Error) /// and remote job id or errror message. /// </returns> public string CreateAlignmentTask(long[] subsequencesIds) { try { ISequence[] bioSequences; using (var db = new LibiadaWebEntities()) { var subsequenceExtractor = new SubsequenceExtractor(db); bioSequences = subsequenceExtractor.GetBioSequencesForFastaConverter(subsequencesIds); } string fasta; FastAFormatter formatter = new FastAFormatter(); using (MemoryStream stream = new MemoryStream()) { formatter.Format(stream, bioSequences); fasta = Encoding.ASCII.GetString(stream.ToArray()); } string result; using (var webClient = new WebClient()) { webClient.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded"; Uri url = new Uri("https://www.ebi.ac.uk/Tools/services/rest/clustalo/run"); // TODO: make email global parameter result = webClient.UploadString(url, $"[email protected]&sequence={fasta}"); } return(JsonConvert.SerializeObject(new { Status = "Success", Result = result })); } catch (Exception ex) { return(JsonConvert.SerializeObject(new { Status = "Error", ex.Message })); } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (assembly.AssembledSequences.Count == 0) { Output.WriteLine(OutputLevel.Results, "No sequences assembled."); return; } EnsureContigNames(assembly.AssembledSequences); if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence seq in assembly.AssembledSequences) { formatter.Format(seq); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count); FastAFormatter formatter = new FastAFormatter { AutoFlush = true, MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2) }; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Format(Console.OpenStandardOutput(), seq); } } }
/// <summary> /// Filters the test data for the input file /// </summary> /// <param name="inputFile">Input File</param> /// <param name="outputFile">Output File</param> /// <param name="repeatLength">Repeat Length</param> static void FilterTestData(string inputFile, string outputFile, int repeatLength) { if (File.Exists(inputFile)) { Console.WriteLine("Processing the file '{0}'.", inputFile); // Read the inputfile with the help of FastA Parser FastAParser parserObj = new FastAParser(); FastAFormatter outputWriter = new FastAFormatter(); using (parserObj.Open(inputFile)) using (outputWriter.Open(outputFile)) { IEnumerable <ISequence> inputReads = parserObj.Parse(); // Going through read by read in a given file foreach (ISequence seq in inputReads) { // Get the First read in the file byte[] actualRead = seq.ToArray(); // Assign the temporary local variables required byte previousChar = actualRead[0]; int repeatLenCount = 0; bool ignoreRead = false; // Go through each and every character/byte in the read for (int j = 1; j < actualRead.Length; j++) { // Check if the previous character is same as current. if (previousChar == actualRead[j]) { repeatLenCount++; // if repeat length exceeds, skip this read and continue with other read if (repeatLenCount == repeatLength) { Console.WriteLine("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped", (char)previousChar, repeatLength, seq.ID); ignoreRead = true; break; } continue; } repeatLenCount = 0; previousChar = actualRead[j]; } Console.WriteLine("Read '{0}' Processed.", seq.ID); // Check if the length exceeds the max length and write it to the output file if (!ignoreRead) { outputWriter.Format(seq); } } } Console.WriteLine(); Console.WriteLine("Filtering Completed!!"); } else { Console.WriteLine("Enter Valid File Path."); } }
/// <summary> /// Does the logic behind the sequence simulation /// </summary> public void DoSimulation(string outputFileName, Action <long, long> updateSimulationStats, Action <string> simulationComplete) { const string filePostfix = "_{0}.fa"; FileInfo file = new FileInfo(outputFileName); if (file.Directory == null || !file.Directory.Exists) { throw new ArgumentException("Could not write to the output directory for " + outputFileName); } if (Settings.OutputSequenceCount <= 0) { throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero."); } if (Settings.SequenceLength <= 0) { throw new ArgumentException("'Mean Output Length' should be greater than zero."); } string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension)); long seqCount = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength; long fileCount = seqCount / Settings.OutputSequenceCount; if (seqCount % Settings.OutputSequenceCount != 0) { fileCount++; } // Update the UI updateSimulationStats(seqCount, fileCount); int fileIndex = 1; FastAFormatter formatter = null; List <ISequence> generatedSequenceList = new List <ISequence>(); for (long i = 0; i < seqCount; i++) { generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings)); if (generatedSequenceList.Count >= Settings.OutputSequenceCount) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(); using (formatter.Open(outFile.FullName)) { formatter.Format(generatedSequenceList); } generatedSequenceList.Clear(); } } // Pick off any remaining sequences into the final file. if (generatedSequenceList.Count > 0) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(); using (formatter.Open(outFile.FullName)) { formatter.Format(generatedSequenceList); } simulationComplete(formatter.Name); } // Either we ended exactly on the boundary with no additional sequences // generated, OR we never generated any files. else { simulationComplete(formatter != null ? formatter.Name : string.Empty); } }