/// <summary> /// The execution method for the activity. /// </summary> /// <param name="executionContext">The execution context.</param> /// <returns>The execution status.</returns> protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { FastAFormatter formatter = new FastAFormatter(); formatter.Open(OutputFile); if ((Sequence == null) && (SequenceList != null)) { foreach (ISequence sequence in SequenceList) { formatter.Write(sequence); } } else if ((Sequence != null) && (SequenceList == null)) { formatter.Write(Sequence); } else if ((Sequence != null) && (SequenceList != null)) { foreach (ISequence sequence in SequenceList) { formatter.Write(sequence); } formatter.Write(Sequence); } formatter.Close(); return(ActivityExecutionStatus.Closed); }
/// <summary> /// Write sequences to the file /// </summary> /// <param name="sequences"></param> private void WriteSequences(IEnumerable <ISequence> sequences) { if (!string.IsNullOrEmpty(this.OutputFile)) { int count = 0; using (var formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence sequence in sequences) { count++; formatter.Write(sequence); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Results:"); foreach (ISequence seq in sequences) { Output.WriteLine(OutputLevel.Results, seq.ID); Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray())); } } }
/// <summary> /// Convert a list of ISequences to FASTA format and write to file. In order to reduce the amount of compute time required /// by BLAST, we limit the number of sequences being fed to BLAST. /// </summary> /// <param name="sequences">IEnumerable list of Sequence objects</param> /// <param name="output">Name of the output FASTA file</param> /// <param name="maxSequences">Optional maximum number of sequences to convert</param> /// <param name="overwrite">If true, any existing file with the same name will be overwritten. Otherwise, the file will not be overwritten and conversion will be skipped.</param> /// <returns>True if a Fasta file was written, false if it already exists</returns> public static bool ConvertToFASTA(IEnumerable <ISequence> sequences, string output, int maxSequences, bool overwrite = false) { // If conditions: // 1. File doesn't exist; OR // 2. File exists but is empty; OR // 3. File exists but overwrite flag is set. if (!File.Exists(output) || new FileInfo(output).Length == 0 || overwrite) { FastAFormatter fa = new FastAFormatter(output); int count = 0; foreach (var seqObj in sequences) { fa.Write(seqObj); ++count; if (count >= maxSequences) { break; } } fa.Close(); return(true); } return(false); }
public HaploGrepSharp.NewSearchMethods.HaploTypeReport OutputAssembly(string fileNamePrefix) { if (SuccessfulAssembly) { FastAFormatter fa = new FastAFormatter(fileNamePrefix + "BestGreedyAssembly.fna"); StringBuilder sb = new StringBuilder(StaticResources.CRS_LENGTH); var bestAssembly = GreedyPathAssembly; bestAssembly.FinalizeAndOrientToReference(); Bio.Sequence s = new Bio.Sequence(bestAssembly.Sequence); s.ID = "GreedyAssembly - length=" + AssemblyLength.ToString(); // + bestAssembly.FirstReferencePosition.Value.ToString() + " - " + GreedyPathAssembly.LastReferencePosition.Value.ToString(); fa.Write(s); fa.Close(); //Now report all differences as well StreamWriter sw = new StreamWriter(fileNamePrefix + "Report.txt"); var searcher = new HaploGrepSharp.NewSearchMethods.HaplotypeSearcher(); List <string> linesToWrite = new List <string> (); var report = searcher.GetHaplotypeReport(s, linesToWrite, fileNamePrefix); foreach (var l in linesToWrite) { sw.WriteLine(l); } sw.Close(); return(report); } return(null); }
public void FastAFormatterValidateWrite() { using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); IEnumerable <ISequence> seqsNew = null; // Read the new file, then compare the sequences using (FastAParser parser = new FastAParser(Constants.FastaTempFileName)) { parser.Alphabet = Alphabets.Protein; seqsNew = parser.Parse(); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(1, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); string orgSeq = new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual(orgSeq, newSequence); Console.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method and is as expected.", newSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.", newSequence)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
/// <summary> /// Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void writeContigs(PadenaAssembly assembly) { if (assembly.AssembledSequences.Count == 0) { Output.WriteLine(OutputLevel.Results, "\tNo sequences assembled."); return; } ensureContigNames(assembly.AssembledSequences); if (!string.IsNullOrEmpty(this.DiagnosticFilePrefix)) { using (FastAFormatter formatter = new FastAFormatter(ContigFileName)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } Output.WriteLine(OutputLevel.Information, "\tWrote {0} sequences to {1}", assembly.AssembledSequences.Count, ContigFileName); } else { Output.WriteLine(OutputLevel.Information, "\tAssembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count); using (FastAFormatter formatter = new FastAFormatter()) { formatter.Open(new StreamWriter(Console.OpenStandardOutput())); formatter.MaxSymbolsAllowedPerLine = decideOutputWidth(); formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } } }
/// <summary> /// Validates general FastA Parser test cases which are further Formatted /// with the xml node name specified. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateParseFormatGeneralTestCases(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(filePath)); string filepathTmp = Path.Combine(Path.GetTempPath(), "temp.fasta"); // Ensure output is deleted if (File.Exists(filepathTmp)) { File.Delete(filepathTmp); } List <ISequence> seqsOriginal; using (var parserObj = new FastAParser(filePath)) { // Read the original file parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsOriginal = parserObj.Parse().ToList(); Assert.IsFalse(seqsOriginal.Count == 0); } // Write to a new file using (var formatter = new FastAFormatter(filepathTmp)) { formatter.Write(seqsOriginal); } try { // Compare original with new file using (var parserObjNew = new FastAParser(filepathTmp)) { // Read the new file, then compare the sequences parserObjNew.Alphabet = Utility.GetAlphabet(alphabet); IEnumerable <ISequence> seqsNew = parserObjNew.Parse(); Assert.IsNotNull(seqsNew); int count = 0; foreach (ISequence newSequence in seqsNew) { string s1 = seqsOriginal[count].ConvertToString(); string s2 = newSequence.ConvertToString(); Assert.AreEqual(s1, s2); count++; } Assert.AreEqual(count, seqsOriginal.Count, "Number of sequences is different."); } } finally { // Delete new file File.Delete(filepathTmp); } }
/// <summary> /// Exports a given list of sequences to a file in FastA format /// </summary> /// <param name="sequences">List of Sequences to be exported.</param> /// <param name="filename">Target filename.</param> static void ExportFastA(ICollection <ISequence> sequences, string filename) { // A formatter to export the output FastAFormatter formatter = new FastAFormatter(filename); // Exports the sequences to a file formatter.Write(sequences); }
/// <summary> /// Exports a given sequence to a file in FastA format /// </summary> /// <param name="sequence">Sequence to be exported.</param> /// <param name="filename">Target filename.</param> static void ExportFastA(ISequence sequence, string filename) { // A formatter to export the output FastAFormatter formatter = new FastAFormatter(filename); // Exports the sequence to a file formatter.Write(sequence); }
public void FastAFormatterValidateWriteWithStream() { string actualSequence = string.Empty; using (var formatter = new FastAFormatter()) { using (var writer = new StreamWriter(Constants.FastaTempFileName)) { formatter.Open(writer); // Gets the actual sequence and the alphabet from the Xml actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a stream. ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); } IEnumerable <ISequence> seq = null; using (var reader = new StreamReader(Constants.FastaTempFileName)) { // Read the new file, then compare the sequences using (var parser = new FastAParser()) { parser.Alphabet = Alphabets.Protein; seq = parser.Parse(reader); //Create a list of sequences. List <ISequence> seqsList = seq.ToList(); Assert.IsNotNull(seqsList); var seqString = new string(seqsList[0].Select(a => (char)a).ToArray()); Assert.AreEqual(actualSequence, seqString); } } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
private void WriteSequences(IEnumerable <ISequence> sequences) { using (FastAFormatter ff = new FastAFormatter(this.OutputFile)) { foreach (ISequence sequence in sequences) { ff.Write(sequence); } } }
private void ValidateFormatterGeneralTestCases(string nodeName) { // Gets the actual sequence and the alphabet from the Xml string expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string formattedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.", expectedSequence, alphabet)); // Replacing all the empty characters, Paragraphs and null entries added // while formatting the xml. ISequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), formattedSequence) { ID = "test" }; Assert.IsNotNull(seqOriginal); // Write it to a file using (var formatter = new FastAFormatter(Constants.FastaTempFileName)) { // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); } // Read the new file, then compare the sequences using (var parserObj = new FastAParser(Constants.FastaTempFileName)) { parserObj.Alphabet = Utility.GetAlphabet(alphabet); IEnumerable <ISequence> seqsNew = parserObj.Parse(); // Get a single sequence ISequence seqNew = seqsNew.FirstOrDefault(); Assert.IsNotNull(seqNew); string newSequence = seqNew.ConvertToString(); ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : New Sequence is '{0}'.", newSequence)); Assert.AreEqual(formattedSequence, newSequence); Assert.AreEqual(seqOriginal.ID, seqNew.ID); // Verify only one sequence exists. Assert.AreEqual(1, seqsNew.Count()); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); }
/// <summary> /// Writes ambiguous reads that are filtered out to the specified file. /// </summary> /// <param name="ambiguousReads">Reads with ambiguous symbols.</param> /// <param name="ambiguousFilename">File to write.</param> private static void WriteAmbiguousReads(BlockingCollection <ISequence> ambiguousReads, string ambiguousFilename) { FastAFormatter formatter = new FastAFormatter(ambiguousFilename); while (!ambiguousReads.IsCompleted) { ISequence seq; if (ambiguousReads.TryTake(out seq, -1)) { formatter.Write(seq); formatter.Flush(); } } formatter.Close(); }
static void OldMain(string[] args) { StreamWriter SW = new StreamWriter(HOME + "CountsByDate.csv"); StreamReader SR = new StreamReader("FileLocations.csv"); string[] lines = SR.ReadToEnd().Split('\n'); lines = lines.Skip(1).ToArray(); // Parallel.ForEach(lines, line => Console.WriteLine("Starting"); foreach (string line in lines) { Console.WriteLine(line); try { string[] split = line.Split(','); string fname = split[0]; string patientid = split[1]; string date = split[2]; var mtReads = MitoDataGrabber.OutputMitoReadsFromBamFile(fname); FastAFormatter fao = new FastAFormatter(HOME + patientid + ".fa"); long count = 0; foreach (var seq in mtReads) { count++; fao.Write(seq); } fao.Close(); FileInfo FI = new FileInfo(fname); string size = FI.Length.ToString(); lock (SW) { SW.WriteLine(String.Join(",", patientid, count.ToString(), size, date)); Console.WriteLine(patientid + " has " + count.ToString() + " reads"); } if (args.Length > 2) { break; } } catch (Exception thrown) { Console.WriteLine(thrown.Message); } } //); SW.Close(); }
private void WriteSequences(IEnumerable <ISequence> sequences) { if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter ff = new FastAFormatter(this.OutputFile)) { foreach (ISequence sequence in sequences) { ff.Write(sequence); } } } else { foreach (ISequence sequence in sequences) { Console.WriteLine(new string(sequence.Select(a => (char)a).ToArray())); } } }
/// <summary> /// It writes Contigs to the file. /// </summary> /// <param name="scaffolds">The list of scaffolds sequence.</param> private void WriteContigs(IList <ISequence> scaffolds) { if (scaffolds.Count == 0) { Output.WriteLine(OutputLevel.Information, "No Scaffolds generated."); return; } EnsureContigNames(scaffolds); if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in scaffolds) { formatter.Write(seq); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count); using (FastAFormatter formatter = new FastAFormatter()) { formatter.Open(new StreamWriter(Console.OpenStandardOutput())); formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2); formatter.AutoFlush = true; foreach (ISequence seq in scaffolds) { formatter.Write(seq); } } } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (assembly.AssembledSequences.Count == 0) { Output.WriteLine(OutputLevel.Results, "No sequences assembled."); return; } EnsureContigNames(assembly.AssembledSequences); if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count); using (FastAFormatter formatter = new FastAFormatter()) { formatter.Open(new StreamWriter(Console.OpenStandardOutput())); formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2); formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } } }
public void OutputMTReads() { if (String.IsNullOrEmpty(Filename)) { throw new ArgumentNullException("No input file specified"); } if (!Filename.EndsWith(BAM_FILE_SUFFIX)) { throw new ArgumentNullException("Input file must be a .BAM file"); } if (string.IsNullOrEmpty(OutputFile)) { OutputFile = Filename.Remove(Filename.Length - BAM_FILE_SUFFIX.Length) + DEFAULT_EXPORT_SUFFIX; } IEnumerable <ISequence> mtReads; if (CRSAlignedOnly) { mtReads = MitoDataGrabber.OutputMitoReadsFromBamFileAlignedToCRSOnly(Filename, pfractionToOutput); } else { mtReads = MitoDataGrabber.OutputMitoReadsFromBamFile(Filename); } FastAFormatter fao = new FastAFormatter(OutputFile); long count = 0; foreach (var seq in mtReads) { count++; fao.Write(seq); } fao.Close(); FileInfo FI = new FileInfo(OutputFile); Console.WriteLine("Wrote " + count.ToString() + " reads to output file."); Console.WriteLine("Of Size: " + GetMTDataFromBAM.Program.FormatMemorySize(FI.Length)); }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> /// <param name="outputWriter">A TextWriter to which the output will be written to.</param> protected void WriteContigs(IEnumerable <ISequence> assembly, TextWriter outputWriter) { if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly) { formatter.Write(seq); } } } else { foreach (ISequence seq in assembly) { outputWriter.WriteLine(seq.ID); outputWriter.WriteLine(new string(seq.Select(a => (char)a).ToArray())); } } }
/// <summary> /// It writes Contigs to the file. /// </summary> /// <param name="scaffolds">The list of scaffolds sequence.</param> private void WriteContigs(IEnumerable <ISequence> scaffolds) { if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in scaffolds) { formatter.Write(seq); } } } else { foreach (ISequence seq in scaffolds) { Console.WriteLine(seq.ID); Console.WriteLine(new string(seq.Select(a => (char)a).ToArray())); } } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } } else { foreach (ISequence seq in assembly.AssembledSequences) { Console.WriteLine(seq.ID); Console.WriteLine(new string(seq.Select(a => (char)a).ToArray())); } } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IEnumerable <ISequence> assembly) { int counter = 1; if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly) { if (string.IsNullOrEmpty(seq.ID)) { seq.ID = GenerateSequenceId(counter); } formatter.Write(seq); counter++; } } Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:"); foreach (ISequence seq in assembly) { if (string.IsNullOrEmpty(seq.ID)) { seq.ID = GenerateSequenceId(counter); } Output.WriteLine(OutputLevel.Results, seq.ID); Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray())); counter++; } } }
void ValidateParseFormatGeneralTestCases(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : File Exists in the Path '{0}'.", filePath)); string filepathTmp = "tmp.ffn"; using (FastAParser parserObj = new FastAParser(filePath)) { using (FastAFormatter formatter = new FastAFormatter(filepathTmp)) { // Read the original file IEnumerable <ISequence> seqsOriginal = null; parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsOriginal = parserObj.Parse(); Assert.IsNotNull(seqsOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : Creating the Temp file '{0}'.", filepathTmp)); foreach (Sequence s in seqsOriginal) { formatter.Write(s); } formatter.Close(); using (FastAParser parserObjNew = new FastAParser(filepathTmp)) { // Read the new file, then compare the sequences IEnumerable <ISequence> seqsNew = null; parserObjNew.Alphabet = Utility.GetAlphabet(alphabet); seqsNew = parserObjNew.Parse(); Assert.IsNotNull(seqsNew); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countOriginal = seqsOriginal.Count(); int countNew = seqsNew.Count(); Assert.AreEqual(countOriginal, countNew); ApplicationLog.WriteLine("FastA Formatter :The Number of sequences are matching."); int i; for (i = 0; i < countOriginal; i++) { Assert.AreEqual(seqsOriginal.ElementAt(i).ID, seqsNew.ElementAt(i).ID); string orgSeq = new string(seqsOriginal.ElementAt(i).Select(a => (char)a).ToArray()); string newSeq = new string(seqsNew.ElementAt(i).Select(a => (char)a).ToArray()); Assert.AreEqual(orgSeq, newSeq); Console.WriteLine( string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method and is as expected.", seqsNew.ElementAt(i).ID)); ApplicationLog.WriteLine( string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method.", seqsNew.ElementAt(i).ID)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. parserObjNew.Close(); } File.Delete(filepathTmp); ApplicationLog.WriteLine("Deleted the temp file created."); } } }
/// <summary> /// Filters the test data for the input file /// </summary> /// <param name="inputFile">Input File</param> /// <param name="outputFile">Output File</param> /// <param name="repeatLength">Repeat Length</param> static void FilterTestData(string inputFile, string outputFile, int repeatLength) { if (File.Exists(inputFile)) { Console.WriteLine(string.Format("Processing the file '{0}'.", inputFile)); // Read the inputfile with the help of FastA Parser using (FastAParser parserObj = new FastAParser(inputFile)) { using (FastAFormatter outputWriter = new FastAFormatter(outputFile)) { IEnumerable <ISequence> inputReads = parserObj.Parse(); // Going through read by read in a given file foreach (ISequence seq in inputReads) { // Get the First read in the file byte[] actualRead = seq.ToArray(); // Assign the temporary local variables required byte previousChar = actualRead[0]; int repeatLenCount = 0; bool ignoreRead = false; // Go through each and every character/byte in the read for (int j = 1; j < actualRead.Length; j++) { // Check if the previous character is same as current. if (previousChar == actualRead[j]) { repeatLenCount++; // if repeat length exceeds, skip this read and continue with other read if (repeatLenCount == repeatLength) { Console.WriteLine(string.Format("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped", (char)previousChar, repeatLength, seq.ID)); ignoreRead = true; break; } else { continue; } } else { repeatLenCount = 0; previousChar = actualRead[j]; continue; } } Console.WriteLine(string.Format("Read '{0}' Processed.", seq.ID)); // Check if the length exceeds the max length and write it to the output file if (!ignoreRead) { outputWriter.Write(seq); } } } } Console.WriteLine(); Console.WriteLine("Filtering Completed!!"); } else { Console.WriteLine("Enter Valid File Path."); } }
/// <summary> /// Does the logic behind the sequence simulation /// </summary> internal void DoSimulation(SimulatorWindow window, string outputFileName, SimulatorSettings settings) { FileInfo file = new FileInfo(outputFileName); if (!file.Directory.Exists) { throw new ArgumentException("Could not write to the output directory for " + outputFileName); } if (settings.OutputSequenceCount <= 0) { throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero."); } if (settings.SequenceLength <= 0) { throw new ArgumentException("'Mean Output Length' should be greater than zero."); } string filePrefix; if (String.IsNullOrEmpty(file.Extension)) { filePrefix = file.FullName; } else { filePrefix = file.FullName.Substring(0, file.FullName.IndexOf(file.Extension)); } string filePostfix = "_{0}.fa"; long seqCount = (settings.DepthOfCoverage * SequenceToSplit.Count) / settings.SequenceLength; long fileCount = seqCount / settings.OutputSequenceCount; if (seqCount % settings.OutputSequenceCount != 0) { fileCount++; } window.UpdateSimulationStats(seqCount, fileCount); if (generatedSequenceList == null) { generatedSequenceList = new List <ISequence>(); } else { generatedSequenceList.Clear(); } int fileIndex = 1; FastAFormatter formatter = null; for (long i = 0; i < seqCount; i++) { generatedSequenceList.Add(CreateSubsequence(settings, i)); if (generatedSequenceList.Count >= settings.OutputSequenceCount) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); generatedSequenceList.Clear(); } } if (generatedSequenceList.Count > 0) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); window.NotifySimulationComplete(formatter.Name); } else { window.NotifySimulationComplete(string.Empty); } }
/// <summary> /// Does the logic behind the sequence simulation /// </summary> public void DoSimulation(string outputFileName, Action <long, long> updateSimulationStats, Action <string> simulationComplete) { const string filePostfix = "_{0}.fa"; FileInfo file = new FileInfo(outputFileName); if (file.Directory == null || !file.Directory.Exists) { throw new ArgumentException("Could not write to the output directory for " + outputFileName); } if (Settings.OutputSequenceCount <= 0) { throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero."); } if (Settings.SequenceLength <= 0) { throw new ArgumentException("'Mean Output Length' should be greater than zero."); } string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension)); long seqCount = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength; long fileCount = seqCount / Settings.OutputSequenceCount; if (seqCount % Settings.OutputSequenceCount != 0) { fileCount++; } // Update the UI updateSimulationStats(seqCount, fileCount); int fileIndex = 1; FastAFormatter formatter = null; List <ISequence> generatedSequenceList = new List <ISequence>(); for (long i = 0; i < seqCount; i++) { generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings)); if (generatedSequenceList.Count >= Settings.OutputSequenceCount) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); generatedSequenceList.Clear(); } } // Pick off any remaining sequences into the final file. if (generatedSequenceList.Count > 0) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); simulationComplete(formatter.Name); } // Either we ended exactly on the boundary with no additional sequences // generated, OR we never generated any files. else { simulationComplete(formatter != null ? formatter.Name : string.Empty); } }
/// <summary> /// Save to disk a list of sequences in FASTA format. /// </summary> /// <param name="sequences"></param> /// <param name="saveFilename"></param> public static string SaveSequencesAsFasta(List <ISequence> sequences, string saveFilename, bool appendSequenceCountToFilename = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename, ProgressActionSet progressActionSet = null) { if (sequences == null) // || sequences.Count == 0) { throw new ArgumentOutOfRangeException(nameof(sequences)); } if (string.IsNullOrWhiteSpace(saveFilename)) { throw new ArgumentOutOfRangeException(nameof(saveFilename)); } string result = null; // new List<string>(); if (appendSequenceCountToFilename) { saveFilename = AddSequenceAndProteinCountToFilename(sequences, saveFilename); } // make sure directory exists var fileInfo = new FileInfo(saveFilename); if (fileInfo.Exists) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { fileInfo = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName)); if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: already exists, appended number: " + fileInfo.FullName, progressActionSet); } } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: overwriting file: " + fileInfo.FullName, progressActionSet); } } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: skipped file, already exists: " + fileInfo.FullName, progressActionSet); } return(result); } } else { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: new file: " + fileInfo.FullName, progressActionSet); } } if (fileInfo.Directory != null) { fileInfo.Directory.Create(); } var formatter = new FastAFormatter(fileInfo.FullName); formatter.Write(sequences); formatter.Close(); result = fileInfo.FullName; return(result); }
void ValidateFormatterGeneralTestCases(string nodeName) { using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string formattedActualSequence = actualSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alphabet)); // Replacing all the empty characters, Paragraphs and null entries added // while formatting the xml. Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), encodingObj.GetBytes(actualSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""))); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); // Read the new file, then compare the sequences IEnumerable <ISequence> seqsNew = null; using (FastAParser parserObj = new FastAParser(Constants.FastaTempFileName)) { parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsNew = parserObj.Parse(); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(1, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); Assert.AreEqual(formattedActualSequence, newSequence); Console.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method and is as expected.", newSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method.", newSequence)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }