Esempio n. 1
0
 /// <summary>
 /// It Writes the contigs to the file.
 /// </summary>
 /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
 protected void WriteContigs(IEnumerable<ISequence> assembly)
 {
     int counter = 1;
     if (!string.IsNullOrEmpty(this.OutputFile))
     {
         FastAFormatter formatter = new FastAFormatter { AutoFlush = true };
         using (formatter.Open(this.OutputFile))
         {
             foreach (ISequence seq in assembly)
             {
                 if (string.IsNullOrEmpty(seq.ID))
                     seq.ID = GenerateSequenceId(counter);
                 formatter.Format(seq);
                 counter++;
             }
         }
         Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified);
     }
     else
     {
         Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:");
         foreach (ISequence seq in assembly)
         {
             if (string.IsNullOrEmpty(seq.ID))
                 seq.ID = GenerateSequenceId(counter);
             Output.WriteLine(OutputLevel.Results, seq.ID);
             Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
             counter++;
         }
     }
 }
Esempio n. 2
0
 /// <summary>
 /// Writes the reads to StandardOutput.
 /// </summary>
 /// <param name="reads">Reads to write.</param>
 private static void WriteOutput(IEnumerable<ISequence> reads)
 {
     FastAFormatter formatter = new FastAFormatter();
     foreach (ISequence seq in reads)
     {
         Console.WriteLine(formatter.FormatString(seq));
     }
 }
Esempio n. 3
0
        private void ValidateFormatterGeneralTestCases(string nodeName)
        {
            // Gets the actual sequence and the alphabet from the Xml
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode);
            string formattedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");

            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                   expectedSequence, alphabet));

            // Replacing all the empty characters, Paragraphs and null entries added 
            // while formatting the xml.
            ISequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), formattedSequence) {ID = "test"};
            Assert.IsNotNull(seqOriginal);

            // Write it to a file
            var formatter = new FastAFormatter();
            {
                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format("FastA Formatter : Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                formatter.Format(seqOriginal, Constants.FastaTempFileName);
            }

            // Read the new file, then compare the sequences
            var parserObj = new FastAParser();
            {
                parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                IEnumerable<ISequence> seqsNew = parserObj.Parse(Constants.FastaTempFileName);

                // Get a single sequence
                ISequence seqNew = seqsNew.FirstOrDefault();
                Assert.IsNotNull(seqNew);

                string newSequence = seqNew.ConvertToString();
                ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : New Sequence is '{0}'.", newSequence));
                Assert.AreEqual(formattedSequence, newSequence);
                Assert.AreEqual(seqOriginal.ID, seqNew.ID);

                // Verify only one sequence exists.
                Assert.AreEqual(1, seqsNew.Count());
            }

            // Passed all the tests, delete the tmp file. If we failed an Assert,
            // the tmp file will still be there in case we need it for debugging.
            File.Delete(Constants.FastaTempFileName);
            ApplicationLog.WriteLine("Deleted the temp file created.");
        }
Esempio n. 4
0
        /// <summary>
        ///     Validates general FastA Parser test cases which are further Formatted
        ///     with the xml node name specified.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateParseFormatGeneralTestCases(string nodeName)
        {
            // Gets the expected sequence from the Xml
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);
            Assert.IsTrue(File.Exists(filePath));
            string filepathTmp = Path.Combine(Path.GetTempPath(), "temp.fasta");

            // Ensure output is deleted
            if (File.Exists(filepathTmp))
                File.Delete(filepathTmp);

            List<ISequence> seqsOriginal;
            var parserObj = new FastAParser();
            {
                // Read the original file
                parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                seqsOriginal = parserObj.Parse(filePath).ToList();
                Assert.IsFalse(seqsOriginal.Count == 0);
            }

            // Write to a new file
            var formatter = new FastAFormatter();
            formatter.Format(seqsOriginal, filepathTmp);
            
            try
            {
                // Compare original with new file
                var parserObjNew = new FastAParser();
                {
                    // Read the new file, then compare the sequences
                    parserObjNew.Alphabet = Utility.GetAlphabet(alphabet);
                    IEnumerable<ISequence> seqsNew = parserObjNew.Parse(filepathTmp);
                    Assert.IsNotNull(seqsNew);

                    int count = 0;
                    foreach (ISequence newSequence in seqsNew)
                    {
                        string s1 = seqsOriginal[count].ConvertToString();
                        string s2 = newSequence.ConvertToString();
                        Assert.AreEqual(s1, s2);
                        count++;
                    }

                    Assert.AreEqual(count, seqsOriginal.Count, "Number of sequences is different.");
                }
            }
            finally
            {
                // Delete new file
                File.Delete(filepathTmp);
            }
        }
        /// <summary>
        /// Exports a given list of sequences to a file in FastA format
        /// </summary>
        /// <param name="sequences">List of Sequences to be exported.</param>
        /// <param name="filename">Target filename.</param>
        public static void ExportFastA(ICollection<ISequence> sequences, string filename)
        {
            // A formatter to export the output
            FastAFormatter formatter = new FastAFormatter(filename);

            // Exports the sequences to a file
            formatter.Write(sequences);
        }
Esempio n. 6
0
        /// <summary>
        /// Filters the test data for the input file
        /// </summary>
        /// <param name="inputFile">Input File</param>
        /// <param name="outputFile">Output File</param>
        /// <param name="repeatLength">Repeat Length</param>
        static void FilterTestData(string inputFile, string outputFile, int repeatLength)
        {
            if (File.Exists(inputFile))
            {
                Console.WriteLine("Processing the file '{0}'.", inputFile);

                // Read the inputfile with the help of FastA Parser           
                FastAParser parserObj = new FastAParser();
                FastAFormatter outputWriter = new FastAFormatter();

                using (parserObj.Open(inputFile))
                using (outputWriter.Open(outputFile))
                {
                    IEnumerable<ISequence> inputReads = parserObj.Parse();

                    // Going through read by read in a given file
                    foreach (ISequence seq in inputReads)
                    {
                        // Get the First read in the file
                        byte[] actualRead = seq.ToArray();

                        // Assign the temporary local variables required
                        byte previousChar = actualRead[0];
                        int repeatLenCount = 0;
                        bool ignoreRead = false;

                        // Go through each and every character/byte in the read
                        for (int j = 1; j < actualRead.Length; j++)
                        {
                            // Check if the previous character is same as current.
                            if (previousChar == actualRead[j])
                            {
                                repeatLenCount++;

                                // if repeat length exceeds, skip this read and continue with other read
                                if (repeatLenCount == repeatLength)
                                {
                                    Console.WriteLine("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped", 
                                        (char)previousChar, repeatLength, seq.ID);
                                    ignoreRead = true;
                                    break;
                                }
                                continue;
                            }
                            repeatLenCount = 0;
                            previousChar = actualRead[j];
                        }

                        Console.WriteLine("Read '{0}' Processed.", seq.ID);

                        // Check if the length exceeds the max length and write it to the output file
                        if (!ignoreRead)
                        {
                            outputWriter.Format(seq);
                        }
                    }
                }
                Console.WriteLine();
                Console.WriteLine("Filtering Completed!!");
            }
            else
                Console.WriteLine("Enter Valid File Path.");
        }
Esempio n. 7
0
        public void FastAFormatterValidateWrite1()
        {
            var formatter = new FastAFormatter();
            using (formatter.Open(Constants.FastaTempFileName))
            {
                // Gets the actual sequence and the alphabet from the Xml
                string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                        Constants.ExpectedSequenceNode);
                string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                 Constants.AlphabetNameNode);

                // Logs information to the log file
                ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                       actualSequence, alpName));
                var seqOriginal = new Sequence(Utility.GetAlphabet(alpName),
                                               actualSequence) { ID = "" };
                Assert.IsNotNull(seqOriginal);

                // Use the formatter to write the original sequences to a temp file            
                ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                var seqList = new List<ISequence> { seqOriginal, seqOriginal, seqOriginal };
                formatter.Format(seqList);
                formatter.Close();

                IEnumerable<ISequence> seqsNew = null;
                // Read the new file, then compare the sequences            
                var parser = new FastAParser();
                {
                    parser.Alphabet = Alphabets.Protein;
                    seqsNew = parser.Parse(Constants.FastaTempFileName);
                    char[] seqString = seqsNew.ElementAt(0).Select(a => (char) a).ToArray();
                    var newSequence = new string(seqString);
                    Assert.IsNotNull(seqsNew);
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: New Sequence is '{0}'.",
                                                           newSequence));

                    // Now compare the sequences.
                    int countNew = seqsNew.Count();
                    Assert.AreEqual(3, countNew);
                    ApplicationLog.WriteLine("The Number of sequences are matching.");
                    Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID);
                    Assert.AreEqual(new string(seqsNew.ElementAt(0).Select(a => (char) a).ToArray()), newSequence);

                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.",
                                                           newSequence));

                    // Passed all the tests, delete the tmp file. If we failed an Assert,
                    // the tmp file will still be there in case we need it for debugging.
                    File.Delete(Constants.FastaTempFileName);
                    ApplicationLog.WriteLine("Deleted the temp file created.");
                }
            }
        }
Esempio n. 8
0
 public void FastAFormatterInvalidateClose()
 {
     FastAFormatter formatter = new FastAFormatter();
     formatter.Close();
 }
Esempio n. 9
0
 public void FastAFormatterValidateGetSupportedFileTypes()
 {
     var formatter = new FastAFormatter();
     string supportedFileType = formatter.SupportedFileTypes;
     Assert.IsNotNull(supportedFileType);
     Assert.IsTrue(supportedFileType.Contains(".fa"));
     Assert.IsTrue(supportedFileType.Contains(".mpfa"));
     Assert.IsTrue(supportedFileType.Contains(".fna"));
     Assert.IsTrue(supportedFileType.Contains(".faa"));
     Assert.IsTrue(supportedFileType.Contains(".fsa"));
     Assert.IsTrue(supportedFileType.Contains(".fas"));
     Assert.IsTrue(supportedFileType.Contains(".fasta"));
 }
Esempio n. 10
0
 public void FastAFormatterValidateGetDescription()
 {
     var formatter = new FastAFormatter();
     string desc = formatter.Description;
     Assert.IsNotNull(desc);
 }
Esempio n. 11
0
 public void FastAFormatterValidateGetName()
 {
     var formatter = new FastAFormatter();
     string name = formatter.Name;
     Assert.IsNotNull(name);
     Assert.AreEqual(name, "FastA");
 }
Esempio n. 12
0
        /// <summary>
        /// Does the logic behind the sequence simulation
        /// </summary>
        public void DoSimulation(string outputFileName, Action<long,long> updateSimulationStats, Action<string> simulationComplete)
        {
            const string filePostfix = "_{0}.fa";

            FileInfo file = new FileInfo(outputFileName);
            if (file.Directory == null || !file.Directory.Exists)
                throw new ArgumentException("Could not write to the output directory for " + outputFileName);

            if (Settings.OutputSequenceCount <= 0)
                throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero.");

            if (Settings.SequenceLength <= 0)
                throw new ArgumentException("'Mean Output Length' should be greater than zero.");

            string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension));

            long seqCount = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength;
            long fileCount = seqCount / Settings.OutputSequenceCount;
            if (seqCount % Settings.OutputSequenceCount != 0)
                fileCount++;

            // Update the UI
            updateSimulationStats(seqCount, fileCount);

            int fileIndex = 1;
            FastAFormatter formatter = null;
            List<ISequence> generatedSequenceList = new List<ISequence>();

            for (long i = 0; i < seqCount; i++)
            {
                generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings));
                if (generatedSequenceList.Count >= Settings.OutputSequenceCount)
                {
                    FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                    formatter = new FastAFormatter();
                    using (formatter.Open(outFile.FullName))
                    {
                        formatter.Format(generatedSequenceList);
                    }
                    generatedSequenceList.Clear();
                }
            }

            // Pick off any remaining sequences into the final file.
            if (generatedSequenceList.Count > 0)
            {
                FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                formatter = new FastAFormatter();
                using (formatter.Open(outFile.FullName))
                {
                    formatter.Format(generatedSequenceList);
                }
                simulationComplete(formatter.Name);
            }

            // Either we ended exactly on the boundary with no additional sequences
            // generated, OR we never generated any files.
            else
            {
                simulationComplete(formatter != null ? formatter.Name : string.Empty);
            }
        }
Esempio n. 13
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "No sequences assembled.");
                return;
            }

            EnsureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter { AutoFlush = true };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Format(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true,
                    MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2)
                };
                foreach (ISequence seq in assembly.AssembledSequences)
                    formatter.Format(Console.OpenStandardOutput(), seq);
            }
        }
Esempio n. 14
0
 /// <summary>
 /// Writes ambiguous reads that are filtered out to the specified file.
 /// </summary>
 /// <param name="ambiguousReads">Reads with ambiguous symbols.</param>
 /// <param name="ambiguousFilename">File to write.</param>
 private static void WriteAmbiguousReads(BlockingCollection<ISequence> ambiguousReads, string ambiguousFilename)
 {
     FastAFormatter formatter = new FastAFormatter() { AutoFlush = true };
     using (formatter.Open(ambiguousFilename))
     {
         while (!ambiguousReads.IsCompleted)
         {
             ISequence seq;
             if (ambiguousReads.TryTake(out seq, -1))
             {
                 formatter.Format(seq);
             }
         }
     }
 }
Esempio n. 15
0
        public void FastAFormatterValidateWriteWithStream()
        {
            string actualSequence = string.Empty;

            var formatter = new FastAFormatter();
            {
                using (formatter.Open(Constants.FastaTempFileName))
                {
                    // Gets the actual sequence and the alphabet from the Xml
                    actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.ExpectedSequenceNode);
                    string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.AlphabetNameNode);

                    // Logs information to the log file
                    ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                           actualSequence, alpName));
                    var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence);

                    seqOriginal.ID = "";
                    Assert.IsNotNull(seqOriginal);
                    // Use the formatter to write the original sequences to a stream.
                    ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                           Constants.FastaTempFileName));
                    formatter.Format(seqOriginal);
                    formatter.Close();
                }
                IEnumerable<ISequence> seq = null;

                using (var reader = File.OpenRead(Constants.FastaTempFileName))
                {
                    // Read the new file, then compare the sequences            
                    var parser = new FastAParser();
                    {
                        parser.Alphabet = Alphabets.Protein;
                        seq = parser.Parse(reader);

                        //Create a list of sequences.
                        List<ISequence> seqsList = seq.ToList();
                        Assert.IsNotNull(seqsList);

                        var seqString = new string(seqsList[0].Select(a => (char) a).ToArray());
                        Assert.AreEqual(actualSequence, seqString);
                    }
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }
Esempio n. 16
0
        /// <summary>
        /// Write sequences to the file
        /// </summary>
        /// <param name="sequences"></param>
        private void WriteSequences(IEnumerable<ISequence> sequences)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                int count = 0;
                var formatter = new FastAFormatter { AutoFlush = true };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence sequence in sequences)
                    {
                        count++;
                        formatter.Format(sequence);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Results:");

                foreach (ISequence seq in sequences)
                {
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
Esempio n. 17
0
        /// <summary>
        /// It writes Contigs to the file.
        /// </summary>
        /// <param name="scaffolds">The list of scaffolds sequence.</param>
        private void WriteContigs(IList<ISequence> scaffolds)
        {
            if (scaffolds.Count == 0)
            {
                Output.WriteLine(OutputLevel.Information, "No Scaffolds generated.");
                return;
            }

            EnsureContigNames(scaffolds);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter { AutoFlush = true };
                using (formatter.Open(this.OutputFile))
                {
                    formatter.Format(scaffolds);
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count);
                FastAFormatter formatter = new FastAFormatter {
                    MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2),
                    AutoFlush = true
                };
                formatter.Format(Console.OpenStandardOutput(), scaffolds);
            }
        }