Пример #1
0
        /// <summary>
        /// The execution method for the activity.
        /// </summary>
        /// <param name="executionContext">The execution context.</param>
        /// <returns>The execution status.</returns>
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            FastAFormatter formatter = new FastAFormatter();

            formatter.Open(OutputFile);

            if ((Sequence == null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }
            }
            else if ((Sequence != null) && (SequenceList == null))
            {
                formatter.Write(Sequence);
            }
            else if ((Sequence != null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }

                formatter.Write(Sequence);
            }

            formatter.Close();
            return(ActivityExecutionStatus.Closed);
        }
Пример #2
0
        /// <summary>
        /// Write sequences to the file
        /// </summary>
        /// <param name="sequences"></param>
        private void WriteSequences(IEnumerable <ISequence> sequences)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                int count = 0;
                using (var formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;
                    foreach (ISequence sequence in sequences)
                    {
                        count++;
                        formatter.Write(sequence);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Results:");

                foreach (ISequence seq in sequences)
                {
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Convert a list of ISequences to FASTA format and write to file. In order to reduce the amount of compute time required
        /// by BLAST, we limit the number of sequences being fed to BLAST.
        /// </summary>
        /// <param name="sequences">IEnumerable list of Sequence objects</param>
        /// <param name="output">Name of the output FASTA file</param>
        /// <param name="maxSequences">Optional maximum number of sequences to convert</param>
        /// <param name="overwrite">If true, any existing file with the same name will be overwritten. Otherwise, the file will not be overwritten and conversion will be skipped.</param>
        /// <returns>True if a Fasta file was written, false if it already exists</returns>
        public static bool ConvertToFASTA(IEnumerable <ISequence> sequences, string output, int maxSequences, bool overwrite = false)
        {
            // If conditions:
            // 1. File doesn't exist; OR
            // 2. File exists but is empty; OR
            // 3. File exists but overwrite flag is set.
            if (!File.Exists(output) || new FileInfo(output).Length == 0 || overwrite)
            {
                FastAFormatter fa = new FastAFormatter(output);

                int count = 0;
                foreach (var seqObj in sequences)
                {
                    fa.Write(seqObj);
                    ++count;

                    if (count >= maxSequences)
                    {
                        break;
                    }
                }

                fa.Close();
                return(true);
            }
            return(false);
        }
 public HaploGrepSharp.NewSearchMethods.HaploTypeReport OutputAssembly(string fileNamePrefix)
 {
     if (SuccessfulAssembly)
     {
         FastAFormatter fa           = new FastAFormatter(fileNamePrefix + "BestGreedyAssembly.fna");
         StringBuilder  sb           = new StringBuilder(StaticResources.CRS_LENGTH);
         var            bestAssembly = GreedyPathAssembly;
         bestAssembly.FinalizeAndOrientToReference();
         Bio.Sequence s = new Bio.Sequence(bestAssembly.Sequence);
         s.ID = "GreedyAssembly - length=" + AssemblyLength.ToString();                 // + bestAssembly.FirstReferencePosition.Value.ToString() + " - " + GreedyPathAssembly.LastReferencePosition.Value.ToString();
         fa.Write(s);
         fa.Close();
         //Now report all differences as well
         StreamWriter  sw           = new StreamWriter(fileNamePrefix + "Report.txt");
         var           searcher     = new HaploGrepSharp.NewSearchMethods.HaplotypeSearcher();
         List <string> linesToWrite = new List <string> ();
         var           report       = searcher.GetHaplotypeReport(s, linesToWrite, fileNamePrefix);
         foreach (var l in linesToWrite)
         {
             sw.WriteLine(l);
         }
         sw.Close();
         return(report);
     }
     return(null);
 }
Пример #5
0
        public void FastAFormatterValidateWrite()
        {
            using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName))
            {
                // Gets the actual sequence and the alphabet from the Xml
                string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                        Constants.ExpectedSequenceNode);
                string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                 Constants.AlphabetNameNode);
                // Logs information to the log file
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                       actualSequence, alpName));
                Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alpName),
                                                    actualSequence);
                seqOriginal.ID = "";
                Assert.IsNotNull(seqOriginal);
                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName));
                formatter.Write(seqOriginal);
                formatter.Close();
                IEnumerable <ISequence> seqsNew = null;

                // Read the new file, then compare the sequences
                using (FastAParser parser = new FastAParser(Constants.FastaTempFileName))
                {
                    parser.Alphabet = Alphabets.Protein;
                    seqsNew         = parser.Parse();
                    char[] seqString   = seqsNew.ElementAt(0).Select(a => (char)a).ToArray();
                    string newSequence = new string(seqString);
                    Assert.IsNotNull(seqsNew);

                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "FastA Formatter BVT: New Sequence is '{0}'.",
                                                           newSequence));

                    // Now compare the sequences.
                    int countNew = seqsNew.Count();
                    Assert.AreEqual(1, countNew);
                    ApplicationLog.WriteLine("The Number of sequences are matching.");
                    Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID);
                    string orgSeq = new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray());

                    Assert.AreEqual(orgSeq, newSequence);
                    Console.WriteLine(string.Format((IFormatProvider)null,
                                                    "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method and is as expected.",
                                                    newSequence));

                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.",
                                                           newSequence));
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }
Пример #6
0
        /// <summary>
        /// Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void writeContigs(PadenaAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "\tNo sequences assembled.");
                return;
            }
            ensureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.DiagnosticFilePrefix))
            {
                using (FastAFormatter formatter = new FastAFormatter(ContigFileName)) {
                    formatter.AutoFlush = true;
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "\tWrote {0} sequences to {1}", assembly.AssembledSequences.Count, ContigFileName);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "\tAssembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                using (FastAFormatter formatter = new FastAFormatter()) {
                    formatter.Open(new StreamWriter(Console.OpenStandardOutput()));
                    formatter.MaxSymbolsAllowedPerLine = decideOutputWidth();
                    formatter.AutoFlush = true;
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
        }
Пример #7
0
        /// <summary>
        ///     Validates general FastA Parser test cases which are further Formatted
        ///     with the xml node name specified.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateParseFormatGeneralTestCases(string nodeName)
        {
            // Gets the expected sequence from the Xml
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);

            Assert.IsTrue(File.Exists(filePath));
            string filepathTmp = Path.Combine(Path.GetTempPath(), "temp.fasta");

            // Ensure output is deleted
            if (File.Exists(filepathTmp))
            {
                File.Delete(filepathTmp);
            }

            List <ISequence> seqsOriginal;

            using (var parserObj = new FastAParser(filePath))
            {
                // Read the original file
                parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                seqsOriginal       = parserObj.Parse().ToList();
                Assert.IsFalse(seqsOriginal.Count == 0);
            }

            // Write to a new file
            using (var formatter = new FastAFormatter(filepathTmp))
            {
                formatter.Write(seqsOriginal);
            }

            try
            {
                // Compare original with new file
                using (var parserObjNew = new FastAParser(filepathTmp))
                {
                    // Read the new file, then compare the sequences
                    parserObjNew.Alphabet = Utility.GetAlphabet(alphabet);
                    IEnumerable <ISequence> seqsNew = parserObjNew.Parse();
                    Assert.IsNotNull(seqsNew);

                    int count = 0;
                    foreach (ISequence newSequence in seqsNew)
                    {
                        string s1 = seqsOriginal[count].ConvertToString();
                        string s2 = newSequence.ConvertToString();
                        Assert.AreEqual(s1, s2);
                        count++;
                    }

                    Assert.AreEqual(count, seqsOriginal.Count, "Number of sequences is different.");
                }
            }
            finally
            {
                // Delete new file
                File.Delete(filepathTmp);
            }
        }
        /// <summary>
        /// Exports a given list of sequences to a file in FastA format
        /// </summary>
        /// <param name="sequences">List of Sequences to be exported.</param>
        /// <param name="filename">Target filename.</param>
        static void ExportFastA(ICollection <ISequence> sequences, string filename)
        {
            // A formatter to export the output
            FastAFormatter formatter = new FastAFormatter(filename);

            // Exports the sequences to a file
            formatter.Write(sequences);
        }
        /// <summary>
        /// Exports a given sequence to a file in FastA format
        /// </summary>
        /// <param name="sequence">Sequence to be exported.</param>
        /// <param name="filename">Target filename.</param>
        static void ExportFastA(ISequence sequence, string filename)
        {
            // A formatter to export the output
            FastAFormatter formatter = new FastAFormatter(filename);

            // Exports the sequence to a file
            formatter.Write(sequence);
        }
Пример #10
0
        public void FastAFormatterValidateWriteWithStream()
        {
            string actualSequence = string.Empty;

            using (var formatter = new FastAFormatter())
            {
                using (var writer = new StreamWriter(Constants.FastaTempFileName))
                {
                    formatter.Open(writer);

                    // Gets the actual sequence and the alphabet from the Xml
                    actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.ExpectedSequenceNode);
                    string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.AlphabetNameNode);

                    // Logs information to the log file
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                           actualSequence, alpName));
                    var seqOriginal = new Sequence(Utility.GetAlphabet(alpName),
                                                   actualSequence);

                    seqOriginal.ID = "";
                    Assert.IsNotNull(seqOriginal);
                    // Use the formatter to write the original sequences to a stream.
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                           Constants.FastaTempFileName));
                    formatter.Write(seqOriginal);
                    formatter.Close();
                }
                IEnumerable <ISequence> seq = null;

                using (var reader = new StreamReader(Constants.FastaTempFileName))
                {
                    // Read the new file, then compare the sequences
                    using (var parser = new FastAParser())
                    {
                        parser.Alphabet = Alphabets.Protein;
                        seq             = parser.Parse(reader);

                        //Create a list of sequences.
                        List <ISequence> seqsList = seq.ToList();
                        Assert.IsNotNull(seqsList);

                        var seqString = new string(seqsList[0].Select(a => (char)a).ToArray());
                        Assert.AreEqual(actualSequence, seqString);
                    }
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }
Пример #11
0
 private void WriteSequences(IEnumerable <ISequence> sequences)
 {
     using (FastAFormatter ff = new FastAFormatter(this.OutputFile))
     {
         foreach (ISequence sequence in sequences)
         {
             ff.Write(sequence);
         }
     }
 }
Пример #12
0
        private void ValidateFormatterGeneralTestCases(string nodeName)
        {
            // Gets the actual sequence and the alphabet from the Xml
            string expectedSequence  = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode);
            string formattedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");

            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                   expectedSequence, alphabet));

            // Replacing all the empty characters, Paragraphs and null entries added
            // while formatting the xml.
            ISequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), formattedSequence)
            {
                ID = "test"
            };

            Assert.IsNotNull(seqOriginal);

            // Write it to a file
            using (var formatter = new FastAFormatter(Constants.FastaTempFileName))
            {
                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                formatter.Write(seqOriginal);
            }

            // Read the new file, then compare the sequences
            using (var parserObj = new FastAParser(Constants.FastaTempFileName))
            {
                parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                IEnumerable <ISequence> seqsNew = parserObj.Parse();

                // Get a single sequence
                ISequence seqNew = seqsNew.FirstOrDefault();
                Assert.IsNotNull(seqNew);

                string newSequence = seqNew.ConvertToString();
                ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : New Sequence is '{0}'.", newSequence));
                Assert.AreEqual(formattedSequence, newSequence);
                Assert.AreEqual(seqOriginal.ID, seqNew.ID);

                // Verify only one sequence exists.
                Assert.AreEqual(1, seqsNew.Count());
            }

            // Passed all the tests, delete the tmp file. If we failed an Assert,
            // the tmp file will still be there in case we need it for debugging.
            File.Delete(Constants.FastaTempFileName);
            ApplicationLog.WriteLine("Deleted the temp file created.");
        }
Пример #13
0
        /// <summary>
        /// Writes ambiguous reads that are filtered out to the specified file.
        /// </summary>
        /// <param name="ambiguousReads">Reads with ambiguous symbols.</param>
        /// <param name="ambiguousFilename">File to write.</param>
        private static void WriteAmbiguousReads(BlockingCollection <ISequence> ambiguousReads, string ambiguousFilename)
        {
            FastAFormatter formatter = new FastAFormatter(ambiguousFilename);

            while (!ambiguousReads.IsCompleted)
            {
                ISequence seq;
                if (ambiguousReads.TryTake(out seq, -1))
                {
                    formatter.Write(seq);
                    formatter.Flush();
                }
            }

            formatter.Close();
        }
Пример #14
0
        static void OldMain(string[] args)
        {
            StreamWriter SW = new StreamWriter(HOME + "CountsByDate.csv");
            StreamReader SR = new StreamReader("FileLocations.csv");

            string[] lines = SR.ReadToEnd().Split('\n');
            lines = lines.Skip(1).ToArray();
            //  Parallel.ForEach(lines, line =>
            Console.WriteLine("Starting");
            foreach (string line in lines)
            {
                Console.WriteLine(line);

                try
                {
                    string[]       split     = line.Split(',');
                    string         fname     = split[0];
                    string         patientid = split[1];
                    string         date      = split[2];
                    var            mtReads   = MitoDataGrabber.OutputMitoReadsFromBamFile(fname);
                    FastAFormatter fao       = new FastAFormatter(HOME + patientid + ".fa");
                    long           count     = 0;
                    foreach (var seq in mtReads)
                    {
                        count++;
                        fao.Write(seq);
                    }
                    fao.Close();
                    FileInfo FI   = new FileInfo(fname);
                    string   size = FI.Length.ToString();
                    lock (SW)
                    {
                        SW.WriteLine(String.Join(",", patientid, count.ToString(), size, date));
                        Console.WriteLine(patientid + " has " + count.ToString() + " reads");
                    }
                    if (args.Length > 2)
                    {
                        break;
                    }
                }
                catch (Exception thrown)
                { Console.WriteLine(thrown.Message); }
            }
            //);
            SW.Close();
        }
Пример #15
0
 private void WriteSequences(IEnumerable <ISequence> sequences)
 {
     if (!string.IsNullOrEmpty(this.OutputFile))
     {
         using (FastAFormatter ff = new FastAFormatter(this.OutputFile))
         {
             foreach (ISequence sequence in sequences)
             {
                 ff.Write(sequence);
             }
         }
     }
     else
     {
         foreach (ISequence sequence in sequences)
         {
             Console.WriteLine(new string(sequence.Select(a => (char)a).ToArray()));
         }
     }
 }
Пример #16
0
        /// <summary>
        /// It writes Contigs to the file.
        /// </summary>
        /// <param name="scaffolds">The list of scaffolds sequence.</param>
        private void WriteContigs(IList <ISequence> scaffolds)
        {
            if (scaffolds.Count == 0)
            {
                Output.WriteLine(OutputLevel.Information, "No Scaffolds generated.");
                return;
            }

            EnsureContigNames(scaffolds);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in scaffolds)
                    {
                        formatter.Write(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count);
                using (FastAFormatter formatter = new FastAFormatter())
                {
                    formatter.Open(new StreamWriter(Console.OpenStandardOutput()));
                    formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2);
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in scaffolds)
                    {
                        formatter.Write(seq);
                    }
                }
            }
        }
Пример #17
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "No sequences assembled.");
                return;
            }

            EnsureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                using (FastAFormatter formatter = new FastAFormatter())
                {
                    formatter.Open(new StreamWriter(Console.OpenStandardOutput()));
                    formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2);
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
        }
        public void OutputMTReads()
        {
            if (String.IsNullOrEmpty(Filename))
            {
                throw new ArgumentNullException("No input file specified");
            }
            if (!Filename.EndsWith(BAM_FILE_SUFFIX))
            {
                throw new ArgumentNullException("Input file must be a .BAM file");
            }
            if (string.IsNullOrEmpty(OutputFile))
            {
                OutputFile = Filename.Remove(Filename.Length - BAM_FILE_SUFFIX.Length) + DEFAULT_EXPORT_SUFFIX;
            }
            IEnumerable <ISequence> mtReads;

            if (CRSAlignedOnly)
            {
                mtReads = MitoDataGrabber.OutputMitoReadsFromBamFileAlignedToCRSOnly(Filename, pfractionToOutput);
            }
            else
            {
                mtReads = MitoDataGrabber.OutputMitoReadsFromBamFile(Filename);
            }

            FastAFormatter fao   = new FastAFormatter(OutputFile);
            long           count = 0;

            foreach (var seq in mtReads)
            {
                count++;
                fao.Write(seq);
            }
            fao.Close();
            FileInfo FI = new FileInfo(OutputFile);

            Console.WriteLine("Wrote " + count.ToString() + " reads to output file.");
            Console.WriteLine("Of Size: " + GetMTDataFromBAM.Program.FormatMemorySize(FI.Length));
        }
Пример #19
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        /// <param name="outputWriter">A TextWriter to which the output will be written to.</param>
        protected void WriteContigs(IEnumerable <ISequence> assembly, TextWriter outputWriter)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly)
                    {
                        formatter.Write(seq);
                    }
                }
            }
            else
            {
                foreach (ISequence seq in assembly)
                {
                    outputWriter.WriteLine(seq.ID);
                    outputWriter.WriteLine(new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
Пример #20
0
        /// <summary>
        /// It writes Contigs to the file.
        /// </summary>
        /// <param name="scaffolds">The list of scaffolds sequence.</param>
        private void WriteContigs(IEnumerable <ISequence> scaffolds)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in scaffolds)
                    {
                        formatter.Write(seq);
                    }
                }
            }
            else
            {
                foreach (ISequence seq in scaffolds)
                {
                    Console.WriteLine(seq.ID);
                    Console.WriteLine(new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
Пример #21
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
            else
            {
                foreach (ISequence seq in assembly.AssembledSequences)
                {
                    Console.WriteLine(seq.ID);
                    Console.WriteLine(new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
Пример #22
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IEnumerable <ISequence> assembly)
        {
            int counter = 1;

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly)
                    {
                        if (string.IsNullOrEmpty(seq.ID))
                        {
                            seq.ID = GenerateSequenceId(counter);
                        }
                        formatter.Write(seq);
                        counter++;
                    }
                }
                Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:");
                foreach (ISequence seq in assembly)
                {
                    if (string.IsNullOrEmpty(seq.ID))
                    {
                        seq.ID = GenerateSequenceId(counter);
                    }
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                    counter++;
                }
            }
        }
Пример #23
0
        void ValidateParseFormatGeneralTestCases(string nodeName)
        {
            // Gets the expected sequence from the Xml
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                              Constants.FilePathNode);
            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                              Constants.AlphabetNameNode);

            Assert.IsTrue(File.Exists(filePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "FastA Formatter : File Exists in the Path '{0}'.",
                                                   filePath));
            string filepathTmp = "tmp.ffn";

            using (FastAParser parserObj = new FastAParser(filePath))
            {
                using (FastAFormatter formatter = new FastAFormatter(filepathTmp))
                {
                    // Read the original file
                    IEnumerable <ISequence> seqsOriginal = null;
                    parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                    seqsOriginal       = parserObj.Parse();
                    Assert.IsNotNull(seqsOriginal);

                    // Use the formatter to write the original sequences to a temp file
                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "FastA Formatter : Creating the Temp file '{0}'.",
                                                           filepathTmp));
                    foreach (Sequence s in seqsOriginal)
                    {
                        formatter.Write(s);
                    }
                    formatter.Close();

                    using (FastAParser parserObjNew = new FastAParser(filepathTmp))
                    {
                        // Read the new file, then compare the sequences
                        IEnumerable <ISequence> seqsNew = null;
                        parserObjNew.Alphabet = Utility.GetAlphabet(alphabet);
                        seqsNew = parserObjNew.Parse();
                        Assert.IsNotNull(seqsNew);

                        char[] seqString   = seqsNew.ElementAt(0).Select(a => (char)a).ToArray();
                        string newSequence = new string(seqString);

                        ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                               "FastA Formatter : New Sequence is '{0}'.",
                                                               newSequence));

                        // Now compare the sequences.
                        int countOriginal = seqsOriginal.Count();
                        int countNew      = seqsNew.Count();
                        Assert.AreEqual(countOriginal, countNew);
                        ApplicationLog.WriteLine("FastA Formatter :The Number of sequences are matching.");

                        int i;
                        for (i = 0; i < countOriginal; i++)
                        {
                            Assert.AreEqual(seqsOriginal.ElementAt(i).ID, seqsNew.ElementAt(i).ID);
                            string orgSeq = new string(seqsOriginal.ElementAt(i).Select(a => (char)a).ToArray());
                            string newSeq = new string(seqsNew.ElementAt(i).Select(a => (char)a).ToArray());
                            Assert.AreEqual(orgSeq, newSeq);
                            Console.WriteLine(
                                string.Format((IFormatProvider)null,
                                              "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method and is as expected.",
                                              seqsNew.ElementAt(i).ID));
                            ApplicationLog.WriteLine(
                                string.Format((IFormatProvider)null, "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method.",
                                              seqsNew.ElementAt(i).ID));
                        }

                        // Passed all the tests, delete the tmp file. If we failed an Assert,
                        // the tmp file will still be there in case we need it for debugging.
                        parserObjNew.Close();
                    }
                    File.Delete(filepathTmp);
                    ApplicationLog.WriteLine("Deleted the temp file created.");
                }
            }
        }
Пример #24
0
        /// <summary>
        /// Filters the test data for the input file
        /// </summary>
        /// <param name="inputFile">Input File</param>
        /// <param name="outputFile">Output File</param>
        /// <param name="repeatLength">Repeat Length</param>
        static void FilterTestData(string inputFile, string outputFile,
                                   int repeatLength)
        {
            if (File.Exists(inputFile))
            {
                Console.WriteLine(string.Format("Processing the file '{0}'.", inputFile));

                // Read the inputfile with the help of FastA Parser
                using (FastAParser parserObj = new FastAParser(inputFile))
                {
                    using (FastAFormatter outputWriter = new FastAFormatter(outputFile))
                    {
                        IEnumerable <ISequence> inputReads = parserObj.Parse();

                        // Going through read by read in a given file
                        foreach (ISequence seq in inputReads)
                        {
                            // Get the First read in the file
                            byte[] actualRead = seq.ToArray();

                            // Assign the temporary local variables required
                            byte previousChar   = actualRead[0];
                            int  repeatLenCount = 0;
                            bool ignoreRead     = false;

                            // Go through each and every character/byte in the read
                            for (int j = 1; j < actualRead.Length; j++)
                            {
                                // Check if the previous character is same as current.
                                if (previousChar == actualRead[j])
                                {
                                    repeatLenCount++;

                                    // if repeat length exceeds, skip this read and continue with other read
                                    if (repeatLenCount == repeatLength)
                                    {
                                        Console.WriteLine(string.Format("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped",
                                                                        (char)previousChar, repeatLength, seq.ID));
                                        ignoreRead = true;
                                        break;
                                    }
                                    else
                                    {
                                        continue;
                                    }
                                }
                                else
                                {
                                    repeatLenCount = 0;
                                    previousChar   = actualRead[j];
                                    continue;
                                }
                            }

                            Console.WriteLine(string.Format("Read '{0}' Processed.", seq.ID));

                            // Check if the length exceeds the max length and write it to the output file
                            if (!ignoreRead)
                            {
                                outputWriter.Write(seq);
                            }
                        }
                    }
                }
                Console.WriteLine();
                Console.WriteLine("Filtering Completed!!");
            }
            else
            {
                Console.WriteLine("Enter Valid File Path.");
            }
        }
Пример #25
0
        /// <summary>
        /// Does the logic behind the sequence simulation
        /// </summary>
        internal void DoSimulation(SimulatorWindow window, string outputFileName, SimulatorSettings settings)
        {
            FileInfo file = new FileInfo(outputFileName);

            if (!file.Directory.Exists)
            {
                throw new ArgumentException("Could not write to the output directory for " + outputFileName);
            }

            if (settings.OutputSequenceCount <= 0)
            {
                throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero.");
            }

            if (settings.SequenceLength <= 0)
            {
                throw new ArgumentException("'Mean Output Length' should be greater than zero.");
            }

            string filePrefix;

            if (String.IsNullOrEmpty(file.Extension))
            {
                filePrefix = file.FullName;
            }
            else
            {
                filePrefix = file.FullName.Substring(0, file.FullName.IndexOf(file.Extension));
            }

            string filePostfix = "_{0}.fa";

            long seqCount  = (settings.DepthOfCoverage * SequenceToSplit.Count) / settings.SequenceLength;
            long fileCount = seqCount / settings.OutputSequenceCount;

            if (seqCount % settings.OutputSequenceCount != 0)
            {
                fileCount++;
            }

            window.UpdateSimulationStats(seqCount, fileCount);

            if (generatedSequenceList == null)
            {
                generatedSequenceList = new List <ISequence>();
            }
            else
            {
                generatedSequenceList.Clear();
            }

            int            fileIndex = 1;
            FastAFormatter formatter = null;

            for (long i = 0; i < seqCount; i++)
            {
                generatedSequenceList.Add(CreateSubsequence(settings, i));

                if (generatedSequenceList.Count >= settings.OutputSequenceCount)
                {
                    FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                    formatter = new FastAFormatter(outFile.FullName);
                    foreach (ISequence seq in generatedSequenceList)
                    {
                        formatter.Write(seq);
                    }
                    formatter.Close();
                    generatedSequenceList.Clear();
                }
            }

            if (generatedSequenceList.Count > 0)
            {
                FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                formatter = new FastAFormatter(outFile.FullName);
                foreach (ISequence seq in generatedSequenceList)
                {
                    formatter.Write(seq);
                }
                formatter.Close();
                window.NotifySimulationComplete(formatter.Name);
            }
            else
            {
                window.NotifySimulationComplete(string.Empty);
            }
        }
Пример #26
0
        /// <summary>
        /// Does the logic behind the sequence simulation
        /// </summary>
        public void DoSimulation(string outputFileName, Action <long, long> updateSimulationStats, Action <string> simulationComplete)
        {
            const string filePostfix = "_{0}.fa";

            FileInfo file = new FileInfo(outputFileName);

            if (file.Directory == null || !file.Directory.Exists)
            {
                throw new ArgumentException("Could not write to the output directory for " + outputFileName);
            }

            if (Settings.OutputSequenceCount <= 0)
            {
                throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero.");
            }

            if (Settings.SequenceLength <= 0)
            {
                throw new ArgumentException("'Mean Output Length' should be greater than zero.");
            }

            string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension));

            long seqCount  = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength;
            long fileCount = seqCount / Settings.OutputSequenceCount;

            if (seqCount % Settings.OutputSequenceCount != 0)
            {
                fileCount++;
            }

            // Update the UI
            updateSimulationStats(seqCount, fileCount);

            int              fileIndex             = 1;
            FastAFormatter   formatter             = null;
            List <ISequence> generatedSequenceList = new List <ISequence>();

            for (long i = 0; i < seqCount; i++)
            {
                generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings));
                if (generatedSequenceList.Count >= Settings.OutputSequenceCount)
                {
                    FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                    formatter = new FastAFormatter(outFile.FullName);
                    foreach (ISequence seq in generatedSequenceList)
                    {
                        formatter.Write(seq);
                    }
                    formatter.Close();
                    generatedSequenceList.Clear();
                }
            }

            // Pick off any remaining sequences into the final file.
            if (generatedSequenceList.Count > 0)
            {
                FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                formatter = new FastAFormatter(outFile.FullName);
                foreach (ISequence seq in generatedSequenceList)
                {
                    formatter.Write(seq);
                }
                formatter.Close();
                simulationComplete(formatter.Name);
            }

            // Either we ended exactly on the boundary with no additional sequences
            // generated, OR we never generated any files.
            else
            {
                simulationComplete(formatter != null ? formatter.Name : string.Empty);
            }
        }
        /// <summary>
        ///     Save to disk a list of sequences in FASTA format.
        /// </summary>
        /// <param name="sequences"></param>
        /// <param name="saveFilename"></param>
        public static string SaveSequencesAsFasta(List <ISequence> sequences, string saveFilename, bool appendSequenceCountToFilename = true, FileExistsHandler.FileExistsOptions fileExistsOptions = FileExistsHandler.FileExistsOptions.AppendNumberToFilename, ProgressActionSet progressActionSet = null)
        {
            if (sequences == null) // || sequences.Count == 0)
            {
                throw new ArgumentOutOfRangeException(nameof(sequences));
            }

            if (string.IsNullOrWhiteSpace(saveFilename))
            {
                throw new ArgumentOutOfRangeException(nameof(saveFilename));
            }

            string result = null; // new List<string>();


            if (appendSequenceCountToFilename)
            {
                saveFilename = AddSequenceAndProteinCountToFilename(sequences, saveFilename);
            }

            // make sure directory exists
            var fileInfo = new FileInfo(saveFilename);

            if (fileInfo.Exists)
            {
                if (fileExistsOptions == FileExistsHandler.FileExistsOptions.AppendNumberToFilename)
                {
                    fileInfo = new FileInfo(FileExistsHandler.FindNextFreeOutputFilename(fileInfo.FullName));

                    if (progressActionSet != null)
                    {
                        ProgressActionSet.Report("Save sequence: already exists, appended number: " + fileInfo.FullName, progressActionSet);
                    }
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.OverwriteFile)
                {
                    if (progressActionSet != null)
                    {
                        ProgressActionSet.Report("Save sequence: overwriting file: " + fileInfo.FullName, progressActionSet);
                    }
                }
                else if (fileExistsOptions == FileExistsHandler.FileExistsOptions.SkipFile)
                {
                    if (progressActionSet != null)
                    {
                        ProgressActionSet.Report("Save sequence: skipped file, already exists: " + fileInfo.FullName, progressActionSet);
                    }

                    return(result);
                }
            }
            else
            {
                if (progressActionSet != null)
                {
                    ProgressActionSet.Report("Save sequence: new file: " + fileInfo.FullName, progressActionSet);
                }
            }

            if (fileInfo.Directory != null)
            {
                fileInfo.Directory.Create();
            }


            var formatter = new FastAFormatter(fileInfo.FullName);

            formatter.Write(sequences);
            formatter.Close();
            result = fileInfo.FullName;


            return(result);
        }
Пример #28
0
        void ValidateFormatterGeneralTestCases(string nodeName)
        {
            using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName))
            {
                // Gets the actual sequence and the alphabet from the Xml
                string actualSequence = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                        Constants.ExpectedSequenceNode);
                string formattedActualSequence = actualSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");

                string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                  Constants.AlphabetNameNode);

                // Logs information to the log file
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                       actualSequence, alphabet));

                // Replacing all the empty characters, Paragraphs and null entries added
                // while formatting the xml.
                Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet),
                                                    encodingObj.GetBytes(actualSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "")));
                seqOriginal.ID = "";
                Assert.IsNotNull(seqOriginal);

                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "FastA Formatter : Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                formatter.Write(seqOriginal);
                formatter.Close();

                // Read the new file, then compare the sequences
                IEnumerable <ISequence> seqsNew = null;
                using (FastAParser parserObj = new FastAParser(Constants.FastaTempFileName))
                {
                    parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                    seqsNew            = parserObj.Parse();

                    char[] seqString   = seqsNew.ElementAt(0).Select(a => (char)a).ToArray();
                    string newSequence = new string(seqString);

                    Assert.IsNotNull(seqsNew);
                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "FastA Formatter : New Sequence is '{0}'.",
                                                           newSequence));

                    // Now compare the sequences.
                    int countNew = seqsNew.Count();
                    Assert.AreEqual(1, countNew);

                    ApplicationLog.WriteLine("The Number of sequences are matching.");
                    Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID);
                    Assert.AreEqual(formattedActualSequence, newSequence);

                    Console.WriteLine(string.Format((IFormatProvider)null,
                                                    "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method and is as expected.",
                                                    newSequence));
                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                           "FastA Formatter : The FASTA sequences '{0}' are matching with Format() method.",
                                                           newSequence));
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }