Ejemplo n.º 1
0
        /// <summary>
        /// The execution method for the activity.
        /// </summary>
        /// <param name="executionContext">The execution context.</param>
        /// <returns>The execution status.</returns>
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            FastAFormatter formatter = new FastAFormatter();

            formatter.Open(OutputFile);

            if ((Sequence == null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }
            }
            else if ((Sequence != null) && (SequenceList == null))
            {
                formatter.Write(Sequence);
            }
            else if ((Sequence != null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }

                formatter.Write(Sequence);
            }

            formatter.Close();
            return(ActivityExecutionStatus.Closed);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// It writes Contigs to the file.
        /// </summary>
        /// <param name="scaffolds">The list of scaffolds sequence.</param>
        private void WriteContigs(IList <ISequence> scaffolds)
        {
            if (scaffolds.Count == 0)
            {
                Output.WriteLine(OutputLevel.Information, "No Scaffolds generated.");
                return;
            }

            EnsureContigNames(scaffolds);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    formatter.Format(scaffolds);
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count);
                FastAFormatter formatter = new FastAFormatter {
                    MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2),
                    AutoFlush = true
                };
                formatter.Format(Console.OpenStandardOutput(), scaffolds);
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void writeContigs(PadenaAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "\tNo sequences assembled.");
                return;
            }
            ensureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.DiagnosticFilePrefix))
            {
                using (FastAFormatter formatter = new FastAFormatter(ContigFileName)) {
                    formatter.AutoFlush = true;
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "\tWrote {0} sequences to {1}", assembly.AssembledSequences.Count, ContigFileName);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "\tAssembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                using (FastAFormatter formatter = new FastAFormatter()) {
                    formatter.Open(new StreamWriter(Console.OpenStandardOutput()));
                    formatter.MaxSymbolsAllowedPerLine = decideOutputWidth();
                    formatter.AutoFlush = true;
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Write sequences to the file
        /// </summary>
        /// <param name="sequences"></param>
        private void WriteSequences(IEnumerable <ISequence> sequences)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                int count     = 0;
                var formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence sequence in sequences)
                    {
                        count++;
                        formatter.Format(sequence);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Results:");

                foreach (ISequence seq in sequences)
                {
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
Ejemplo n.º 5
0
        public void FastAFormatterValidateWriteWithStream()
        {
            string actualSequence = string.Empty;

            using (var formatter = new FastAFormatter())
            {
                using (var writer = new StreamWriter(Constants.FastaTempFileName))
                {
                    formatter.Open(writer);

                    // Gets the actual sequence and the alphabet from the Xml
                    actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.ExpectedSequenceNode);
                    string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.AlphabetNameNode);

                    // Logs information to the log file
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                           actualSequence, alpName));
                    var seqOriginal = new Sequence(Utility.GetAlphabet(alpName),
                                                   actualSequence);

                    seqOriginal.ID = "";
                    Assert.IsNotNull(seqOriginal);
                    // Use the formatter to write the original sequences to a stream.
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                           Constants.FastaTempFileName));
                    formatter.Write(seqOriginal);
                    formatter.Close();
                }
                IEnumerable <ISequence> seq = null;

                using (var reader = new StreamReader(Constants.FastaTempFileName))
                {
                    // Read the new file, then compare the sequences
                    using (var parser = new FastAParser())
                    {
                        parser.Alphabet = Alphabets.Protein;
                        seq             = parser.Parse(reader);

                        //Create a list of sequences.
                        List <ISequence> seqsList = seq.ToList();
                        Assert.IsNotNull(seqsList);

                        var seqString = new string(seqsList[0].Select(a => (char)a).ToArray());
                        Assert.AreEqual(actualSequence, seqString);
                    }
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }
Ejemplo n.º 6
0
        public static async void AssemblySequences(string fastqFileName)
        {
            var parser = new FastQParser();
            List <IQualitativeSequence> sequences = new List <IQualitativeSequence>();

            using (var fileStream = new FileStream(fastqFileName, FileMode.Open))
            {
                sequences = parser.Parse(fileStream).ToList();
            }
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            IDeNovoAssembly        assembly  = assembler.Assemble(sequences);

            FastAFormatter outputFormatter = new FastAFormatter();

            outputFormatter.Open("assembled_sequences.fasta");
            outputFormatter.Format(assembly.AssembledSequences);
            outputFormatter.Close();
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Writes ambiguous reads that are filtered out to the specified file.
        /// </summary>
        /// <param name="ambiguousReads">Reads with ambiguous symbols.</param>
        /// <param name="ambiguousFilename">File to write.</param>
        private static void WriteAmbiguousReads(BlockingCollection <ISequence> ambiguousReads, string ambiguousFilename)
        {
            FastAFormatter formatter = new FastAFormatter()
            {
                AutoFlush = true
            };

            using (formatter.Open(ambiguousFilename))
            {
                while (!ambiguousReads.IsCompleted)
                {
                    ISequence seq;
                    if (ambiguousReads.TryTake(out seq, -1))
                    {
                        formatter.Format(seq);
                    }
                }
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "No sequences assembled.");
                return;
            }

            EnsureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                using (FastAFormatter formatter = new FastAFormatter())
                {
                    formatter.Open(new StreamWriter(Console.OpenStandardOutput()));
                    formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2);
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IEnumerable <ISequence> assembly)
        {
            int counter = 1;

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence seq in assembly)
                    {
                        if (string.IsNullOrEmpty(seq.ID))
                        {
                            seq.ID = GenerateSequenceId(counter);
                        }
                        formatter.Format(seq);
                        counter++;
                    }
                }
                Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:");
                foreach (ISequence seq in assembly)
                {
                    if (string.IsNullOrEmpty(seq.ID))
                    {
                        seq.ID = GenerateSequenceId(counter);
                    }
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                    counter++;
                }
            }
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Filters the test data for the input file
        /// </summary>
        /// <param name="inputFile">Input File</param>
        /// <param name="outputFile">Output File</param>
        /// <param name="repeatLength">Repeat Length</param>
        static void FilterTestData(string inputFile, string outputFile, int repeatLength)
        {
            if (File.Exists(inputFile))
            {
                Console.WriteLine("Processing the file '{0}'.", inputFile);

                // Read the inputfile with the help of FastA Parser
                FastAParser    parserObj    = new FastAParser();
                FastAFormatter outputWriter = new FastAFormatter();

                using (parserObj.Open(inputFile))
                    using (outputWriter.Open(outputFile))
                    {
                        IEnumerable <ISequence> inputReads = parserObj.Parse();

                        // Going through read by read in a given file
                        foreach (ISequence seq in inputReads)
                        {
                            // Get the First read in the file
                            byte[] actualRead = seq.ToArray();

                            // Assign the temporary local variables required
                            byte previousChar   = actualRead[0];
                            int  repeatLenCount = 0;
                            bool ignoreRead     = false;

                            // Go through each and every character/byte in the read
                            for (int j = 1; j < actualRead.Length; j++)
                            {
                                // Check if the previous character is same as current.
                                if (previousChar == actualRead[j])
                                {
                                    repeatLenCount++;

                                    // if repeat length exceeds, skip this read and continue with other read
                                    if (repeatLenCount == repeatLength)
                                    {
                                        Console.WriteLine("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped",
                                                          (char)previousChar, repeatLength, seq.ID);
                                        ignoreRead = true;
                                        break;
                                    }
                                    continue;
                                }
                                repeatLenCount = 0;
                                previousChar   = actualRead[j];
                            }

                            Console.WriteLine("Read '{0}' Processed.", seq.ID);

                            // Check if the length exceeds the max length and write it to the output file
                            if (!ignoreRead)
                            {
                                outputWriter.Format(seq);
                            }
                        }
                    }
                Console.WriteLine();
                Console.WriteLine("Filtering Completed!!");
            }
            else
            {
                Console.WriteLine("Enter Valid File Path.");
            }
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Does the logic behind the sequence simulation
        /// </summary>
        public void DoSimulation(string outputFileName, Action <long, long> updateSimulationStats, Action <string> simulationComplete)
        {
            const string filePostfix = "_{0}.fa";

            FileInfo file = new FileInfo(outputFileName);

            if (file.Directory == null || !file.Directory.Exists)
            {
                throw new ArgumentException("Could not write to the output directory for " + outputFileName);
            }

            if (Settings.OutputSequenceCount <= 0)
            {
                throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero.");
            }

            if (Settings.SequenceLength <= 0)
            {
                throw new ArgumentException("'Mean Output Length' should be greater than zero.");
            }

            string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension));

            long seqCount  = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength;
            long fileCount = seqCount / Settings.OutputSequenceCount;

            if (seqCount % Settings.OutputSequenceCount != 0)
            {
                fileCount++;
            }

            // Update the UI
            updateSimulationStats(seqCount, fileCount);

            int              fileIndex             = 1;
            FastAFormatter   formatter             = null;
            List <ISequence> generatedSequenceList = new List <ISequence>();

            for (long i = 0; i < seqCount; i++)
            {
                generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings));
                if (generatedSequenceList.Count >= Settings.OutputSequenceCount)
                {
                    FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                    formatter = new FastAFormatter();
                    using (formatter.Open(outFile.FullName))
                    {
                        formatter.Format(generatedSequenceList);
                    }
                    generatedSequenceList.Clear();
                }
            }

            // Pick off any remaining sequences into the final file.
            if (generatedSequenceList.Count > 0)
            {
                FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                formatter = new FastAFormatter();
                using (formatter.Open(outFile.FullName))
                {
                    formatter.Format(generatedSequenceList);
                }
                simulationComplete(formatter.Name);
            }

            // Either we ended exactly on the boundary with no additional sequences
            // generated, OR we never generated any files.
            else
            {
                simulationComplete(formatter != null ? formatter.Name : string.Empty);
            }
        }
Ejemplo n.º 12
0
        public void FastAFormatterValidateWrite1()
        {
            var formatter = new FastAFormatter();

            using (formatter.Open(Constants.FastaTempFileName))
            {
                // Gets the actual sequence and the alphabet from the Xml
                string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                        Constants.ExpectedSequenceNode);
                string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                 Constants.AlphabetNameNode);

                // Logs information to the log file
                ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                       actualSequence, alpName));
                var seqOriginal = new Sequence(Utility.GetAlphabet(alpName),
                                               actualSequence)
                {
                    ID = ""
                };
                Assert.IsNotNull(seqOriginal);

                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                var seqList = new List <ISequence> {
                    seqOriginal, seqOriginal, seqOriginal
                };
                formatter.Format(seqList);
                formatter.Close();

                IEnumerable <ISequence> seqsNew = null;
                // Read the new file, then compare the sequences
                var parser = new FastAParser();
                {
                    parser.Alphabet = Alphabets.Protein;
                    seqsNew         = parser.Parse(Constants.FastaTempFileName);
                    char[] seqString   = seqsNew.ElementAt(0).Select(a => (char)a).ToArray();
                    var    newSequence = new string(seqString);
                    Assert.IsNotNull(seqsNew);
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: New Sequence is '{0}'.",
                                                           newSequence));

                    // Now compare the sequences.
                    int countNew = seqsNew.Count();
                    Assert.AreEqual(3, countNew);
                    ApplicationLog.WriteLine("The Number of sequences are matching.");
                    Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID);
                    Assert.AreEqual(new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray()), newSequence);

                    ApplicationLog.WriteLine(string.Format(null,
                                                           "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.",
                                                           newSequence));

                    // Passed all the tests, delete the tmp file. If we failed an Assert,
                    // the tmp file will still be there in case we need it for debugging.
                    File.Delete(Constants.FastaTempFileName);
                    ApplicationLog.WriteLine("Deleted the temp file created.");
                }
            }
        }