public void FastAFormatterValidateWrite() { using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); IEnumerable <ISequence> seqsNew = null; // Read the new file, then compare the sequences using (FastAParser parser = new FastAParser(Constants.FastaTempFileName)) { parser.Alphabet = Alphabets.Protein; seqsNew = parser.Parse(); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(1, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); string orgSeq = new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual(orgSeq, newSequence); Console.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method and is as expected.", newSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.", newSequence)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
/// <summary> /// Convert a list of ISequences to FASTA format and write to file. In order to reduce the amount of compute time required /// by BLAST, we limit the number of sequences being fed to BLAST. /// </summary> /// <param name="sequences">IEnumerable list of Sequence objects</param> /// <param name="output">Name of the output FASTA file</param> /// <param name="maxSequences">Optional maximum number of sequences to convert</param> /// <param name="overwrite">If true, any existing file with the same name will be overwritten. Otherwise, the file will not be overwritten and conversion will be skipped.</param> /// <returns>True if a Fasta file was written, false if it already exists</returns> public static bool ConvertToFASTA(IEnumerable <ISequence> sequences, string output, int maxSequences, bool overwrite = false) { // If conditions: // 1. File doesn't exist; OR // 2. File exists but is empty; OR // 3. File exists but overwrite flag is set. if (!File.Exists(output) || new FileInfo(output).Length == 0 || overwrite) { FastAFormatter fa = new FastAFormatter(output); int count = 0; foreach (var seqObj in sequences) { fa.Write(seqObj); ++count; if (count >= maxSequences) { break; } } fa.Close(); return(true); } return(false); }
/// <summary> /// The execution method for the activity. /// </summary> /// <param name="executionContext">The execution context.</param> /// <returns>The execution status.</returns> protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { FastAFormatter formatter = new FastAFormatter(); formatter.Open(OutputFile); if ((Sequence == null) && (SequenceList != null)) { foreach (ISequence sequence in SequenceList) { formatter.Write(sequence); } } else if ((Sequence != null) && (SequenceList == null)) { formatter.Write(Sequence); } else if ((Sequence != null) && (SequenceList != null)) { foreach (ISequence sequence in SequenceList) { formatter.Write(sequence); } formatter.Write(Sequence); } formatter.Close(); return(ActivityExecutionStatus.Closed); }
public HaploGrepSharp.NewSearchMethods.HaploTypeReport OutputAssembly(string fileNamePrefix) { if (SuccessfulAssembly) { FastAFormatter fa = new FastAFormatter(fileNamePrefix + "BestGreedyAssembly.fna"); StringBuilder sb = new StringBuilder(StaticResources.CRS_LENGTH); var bestAssembly = GreedyPathAssembly; bestAssembly.FinalizeAndOrientToReference(); Bio.Sequence s = new Bio.Sequence(bestAssembly.Sequence); s.ID = "GreedyAssembly - length=" + AssemblyLength.ToString(); // + bestAssembly.FirstReferencePosition.Value.ToString() + " - " + GreedyPathAssembly.LastReferencePosition.Value.ToString(); fa.Write(s); fa.Close(); //Now report all differences as well StreamWriter sw = new StreamWriter(fileNamePrefix + "Report.txt"); var searcher = new HaploGrepSharp.NewSearchMethods.HaplotypeSearcher(); List <string> linesToWrite = new List <string> (); var report = searcher.GetHaplotypeReport(s, linesToWrite, fileNamePrefix); foreach (var l in linesToWrite) { sw.WriteLine(l); } sw.Close(); return(report); } return(null); }
public void FastAFormatterValidateWriteWithStream() { string actualSequence = string.Empty; using (var formatter = new FastAFormatter()) { using (var writer = new StreamWriter(Constants.FastaTempFileName)) { formatter.Open(writer); // Gets the actual sequence and the alphabet from the Xml actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a stream. ApplicationLog.WriteLine(string.Format(null, "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); } IEnumerable <ISequence> seq = null; using (var reader = new StreamReader(Constants.FastaTempFileName)) { // Read the new file, then compare the sequences using (var parser = new FastAParser()) { parser.Alphabet = Alphabets.Protein; seq = parser.Parse(reader); //Create a list of sequences. List <ISequence> seqsList = seq.ToList(); Assert.IsNotNull(seqsList); var seqString = new string(seqsList[0].Select(a => (char)a).ToArray()); Assert.AreEqual(actualSequence, seqString); } } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
public void FastAFormatterInvalidateClose() { try { FastAFormatter formatter = new FastAFormatter(); formatter.Close(); Assert.Fail(); } catch (InvalidOperationException ex) { ApplicationLog.Write("Fasta P2 : InvalidOperationException caught successfully. " + ex.Message); } }
/// <summary> /// Writes ambiguous reads that are filtered out to the specified file. /// </summary> /// <param name="ambiguousReads">Reads with ambiguous symbols.</param> /// <param name="ambiguousFilename">File to write.</param> private static void WriteAmbiguousReads(BlockingCollection <ISequence> ambiguousReads, string ambiguousFilename) { FastAFormatter formatter = new FastAFormatter(ambiguousFilename); while (!ambiguousReads.IsCompleted) { ISequence seq; if (ambiguousReads.TryTake(out seq, -1)) { formatter.Write(seq); formatter.Flush(); } } formatter.Close(); }
static void OldMain(string[] args) { StreamWriter SW = new StreamWriter(HOME + "CountsByDate.csv"); StreamReader SR = new StreamReader("FileLocations.csv"); string[] lines = SR.ReadToEnd().Split('\n'); lines = lines.Skip(1).ToArray(); // Parallel.ForEach(lines, line => Console.WriteLine("Starting"); foreach (string line in lines) { Console.WriteLine(line); try { string[] split = line.Split(','); string fname = split[0]; string patientid = split[1]; string date = split[2]; var mtReads = MitoDataGrabber.OutputMitoReadsFromBamFile(fname); FastAFormatter fao = new FastAFormatter(HOME + patientid + ".fa"); long count = 0; foreach (var seq in mtReads) { count++; fao.Write(seq); } fao.Close(); FileInfo FI = new FileInfo(fname); string size = FI.Length.ToString(); lock (SW) { SW.WriteLine(String.Join(",", patientid, count.ToString(), size, date)); Console.WriteLine(patientid + " has " + count.ToString() + " reads"); } if (args.Length > 2) { break; } } catch (Exception thrown) { Console.WriteLine(thrown.Message); } } //); SW.Close(); }
public static async void AssemblySequences(string fastqFileName) { var parser = new FastQParser(); List <IQualitativeSequence> sequences = new List <IQualitativeSequence>(); using (var fileStream = new FileStream(fastqFileName, FileMode.Open)) { sequences = parser.Parse(fileStream).ToList(); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); IDeNovoAssembly assembly = assembler.Assemble(sequences); FastAFormatter outputFormatter = new FastAFormatter(); outputFormatter.Open("assembled_sequences.fasta"); outputFormatter.Format(assembly.AssembledSequences); outputFormatter.Close(); }
public void OutputMTReads() { if (String.IsNullOrEmpty(Filename)) { throw new ArgumentNullException("No input file specified"); } if (!Filename.EndsWith(BAM_FILE_SUFFIX)) { throw new ArgumentNullException("Input file must be a .BAM file"); } if (string.IsNullOrEmpty(OutputFile)) { OutputFile = Filename.Remove(Filename.Length - BAM_FILE_SUFFIX.Length) + DEFAULT_EXPORT_SUFFIX; } IEnumerable <ISequence> mtReads; if (CRSAlignedOnly) { mtReads = MitoDataGrabber.OutputMitoReadsFromBamFileAlignedToCRSOnly(Filename, pfractionToOutput); } else { mtReads = MitoDataGrabber.OutputMitoReadsFromBamFile(Filename); } FastAFormatter fao = new FastAFormatter(OutputFile); long count = 0; foreach (var seq in mtReads) { count++; fao.Write(seq); } fao.Close(); FileInfo FI = new FileInfo(OutputFile); Console.WriteLine("Wrote " + count.ToString() + " reads to output file."); Console.WriteLine("Of Size: " + GetMTDataFromBAM.Program.FormatMemorySize(FI.Length)); }
/// <summary> /// Does the logic behind the sequence simulation /// </summary> internal void DoSimulation(SimulatorWindow window, string outputFileName, SimulatorSettings settings) { FileInfo file = new FileInfo(outputFileName); if (!file.Directory.Exists) { throw new ArgumentException("Could not write to the output directory for " + outputFileName); } if (settings.OutputSequenceCount <= 0) { throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero."); } if (settings.SequenceLength <= 0) { throw new ArgumentException("'Mean Output Length' should be greater than zero."); } string filePrefix; if (String.IsNullOrEmpty(file.Extension)) { filePrefix = file.FullName; } else { filePrefix = file.FullName.Substring(0, file.FullName.IndexOf(file.Extension)); } string filePostfix = "_{0}.fa"; long seqCount = (settings.DepthOfCoverage * SequenceToSplit.Count) / settings.SequenceLength; long fileCount = seqCount / settings.OutputSequenceCount; if (seqCount % settings.OutputSequenceCount != 0) { fileCount++; } window.UpdateSimulationStats(seqCount, fileCount); if (generatedSequenceList == null) { generatedSequenceList = new List <ISequence>(); } else { generatedSequenceList.Clear(); } int fileIndex = 1; FastAFormatter formatter = null; for (long i = 0; i < seqCount; i++) { generatedSequenceList.Add(CreateSubsequence(settings, i)); if (generatedSequenceList.Count >= settings.OutputSequenceCount) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); generatedSequenceList.Clear(); } } if (generatedSequenceList.Count > 0) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); window.NotifySimulationComplete(formatter.Name); } else { window.NotifySimulationComplete(string.Empty); } }
/// <summary> /// Does the logic behind the sequence simulation /// </summary> public void DoSimulation(string outputFileName, Action <long, long> updateSimulationStats, Action <string> simulationComplete) { const string filePostfix = "_{0}.fa"; FileInfo file = new FileInfo(outputFileName); if (file.Directory == null || !file.Directory.Exists) { throw new ArgumentException("Could not write to the output directory for " + outputFileName); } if (Settings.OutputSequenceCount <= 0) { throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero."); } if (Settings.SequenceLength <= 0) { throw new ArgumentException("'Mean Output Length' should be greater than zero."); } string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension)); long seqCount = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength; long fileCount = seqCount / Settings.OutputSequenceCount; if (seqCount % Settings.OutputSequenceCount != 0) { fileCount++; } // Update the UI updateSimulationStats(seqCount, fileCount); int fileIndex = 1; FastAFormatter formatter = null; List <ISequence> generatedSequenceList = new List <ISequence>(); for (long i = 0; i < seqCount; i++) { generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings)); if (generatedSequenceList.Count >= Settings.OutputSequenceCount) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); generatedSequenceList.Clear(); } } // Pick off any remaining sequences into the final file. if (generatedSequenceList.Count > 0) { FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++)); formatter = new FastAFormatter(outFile.FullName); foreach (ISequence seq in generatedSequenceList) { formatter.Write(seq); } formatter.Close(); simulationComplete(formatter.Name); } // Either we ended exactly on the boundary with no additional sequences // generated, OR we never generated any files. else { simulationComplete(formatter != null ? formatter.Name : string.Empty); } }
void ValidateParseFormatGeneralTestCases(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : File Exists in the Path '{0}'.", filePath)); string filepathTmp = "tmp.ffn"; using (FastAParser parserObj = new FastAParser(filePath)) { using (FastAFormatter formatter = new FastAFormatter(filepathTmp)) { // Read the original file IEnumerable <ISequence> seqsOriginal = null; parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsOriginal = parserObj.Parse(); Assert.IsNotNull(seqsOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : Creating the Temp file '{0}'.", filepathTmp)); foreach (Sequence s in seqsOriginal) { formatter.Write(s); } formatter.Close(); using (FastAParser parserObjNew = new FastAParser(filepathTmp)) { // Read the new file, then compare the sequences IEnumerable <ISequence> seqsNew = null; parserObjNew.Alphabet = Utility.GetAlphabet(alphabet); seqsNew = parserObjNew.Parse(); Assert.IsNotNull(seqsNew); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countOriginal = seqsOriginal.Count(); int countNew = seqsNew.Count(); Assert.AreEqual(countOriginal, countNew); ApplicationLog.WriteLine("FastA Formatter :The Number of sequences are matching."); int i; for (i = 0; i < countOriginal; i++) { Assert.AreEqual(seqsOriginal.ElementAt(i).ID, seqsNew.ElementAt(i).ID); string orgSeq = new string(seqsOriginal.ElementAt(i).Select(a => (char)a).ToArray()); string newSeq = new string(seqsNew.ElementAt(i).Select(a => (char)a).ToArray()); Assert.AreEqual(orgSeq, newSeq); Console.WriteLine( string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method and is as expected.", seqsNew.ElementAt(i).ID)); ApplicationLog.WriteLine( string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method.", seqsNew.ElementAt(i).ID)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. parserObjNew.Close(); } File.Delete(filepathTmp); ApplicationLog.WriteLine("Deleted the temp file created."); } } }
void ValidateFormatterGeneralTestCases(string nodeName) { using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); string formattedActualSequence = actualSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alphabet)); // Replacing all the empty characters, Paragraphs and null entries added // while formatting the xml. Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), encodingObj.GetBytes(actualSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""))); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); // Read the new file, then compare the sequences IEnumerable <ISequence> seqsNew = null; using (FastAParser parserObj = new FastAParser(Constants.FastaTempFileName)) { parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsNew = parserObj.Parse(); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(1, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); Assert.AreEqual(formattedActualSequence, newSequence); Console.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method and is as expected.", newSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method.", newSequence)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
public void FastAFormatterInvalidateClose() { FastAFormatter formatter = new FastAFormatter(); formatter.Close(); }
/// <summary> /// Save to disk a list of sequences in FASTA format. /// </summary> /// <param name="sequences"></param> /// <param name="saveFilename"></param> public static string SaveSequencesAsFasta(List <ISequence> sequences, string saveFilename, bool appendSequenceCountToFilename = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename, ProgressActionSet progressActionSet = null) { if (sequences == null) // || sequences.Count == 0) { throw new ArgumentOutOfRangeException(nameof(sequences)); } if (string.IsNullOrWhiteSpace(saveFilename)) { throw new ArgumentOutOfRangeException(nameof(saveFilename)); } string result = null; // new List<string>(); if (appendSequenceCountToFilename) { saveFilename = AddSequenceAndProteinCountToFilename(sequences, saveFilename); } // make sure directory exists var fileInfo = new FileInfo(saveFilename); if (fileInfo.Exists) { if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename) { fileInfo = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName)); if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: already exists, appended number: " + fileInfo.FullName, progressActionSet); } } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile) { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: overwriting file: " + fileInfo.FullName, progressActionSet); } } else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile) { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: skipped file, already exists: " + fileInfo.FullName, progressActionSet); } return(result); } } else { if (progressActionSet != null) { ProgressActionSet.Report("Save sequence: new file: " + fileInfo.FullName, progressActionSet); } } if (fileInfo.Directory != null) { fileInfo.Directory.Create(); } var formatter = new FastAFormatter(fileInfo.FullName); formatter.Write(sequences); formatter.Close(); result = fileInfo.FullName; return(result); }