예제 #1
0
        /// <summary>
        /// It writes Contigs to the file.
        /// </summary>
        /// <param name="scaffolds">The list of scaffolds sequence.</param>
        private void WriteContigs(IList <ISequence> scaffolds)
        {
            if (scaffolds.Count == 0)
            {
                Output.WriteLine(OutputLevel.Information, "No Scaffolds generated.");
                return;
            }

            EnsureContigNames(scaffolds);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    formatter.Format(scaffolds);
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} scaffolds to {1}", scaffolds.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Scaffold Results: {0} sequences", scaffolds.Count);
                FastAFormatter formatter = new FastAFormatter {
                    MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2),
                    AutoFlush = true
                };
                formatter.Format(Console.OpenStandardOutput(), scaffolds);
            }
        }
예제 #2
0
        /// <summary>
        /// Write sequences to the file
        /// </summary>
        /// <param name="sequences"></param>
        private void WriteSequences(IEnumerable <ISequence> sequences)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                int count     = 0;
                var formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence sequence in sequences)
                    {
                        count++;
                        formatter.Format(sequence);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}.", count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Results:");

                foreach (ISequence seq in sequences)
                {
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
예제 #3
0
        /// <summary>
        ///     Validates general FastA Parser test cases which are further Formatted
        ///     with the xml node name specified.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateParseFormatGeneralTestCases(string nodeName)
        {
            // Gets the expected sequence from the Xml
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode).TestDir();
            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);

            Assert.IsTrue(File.Exists(filePath));
            string filepathTmp = Path.Combine(Path.GetTempPath(), "temp.fasta");

            // Ensure output is deleted
            if (File.Exists(filepathTmp))
            {
                File.Delete(filepathTmp);
            }

            List <ISequence> seqsOriginal;
            var parserObj = new FastAParser();
            {
                // Read the original file
                parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                seqsOriginal       = parserObj.Parse(filePath).ToList();
                Assert.IsFalse(seqsOriginal.Count == 0);
            }

            // Write to a new file
            var formatter = new FastAFormatter();

            formatter.Format(seqsOriginal, filepathTmp);

            try
            {
                // Compare original with new file
                var parserObjNew = new FastAParser();
                {
                    // Read the new file, then compare the sequences
                    parserObjNew.Alphabet = Utility.GetAlphabet(alphabet);
                    IEnumerable <ISequence> seqsNew = parserObjNew.Parse(filepathTmp);
                    Assert.IsNotNull(seqsNew);

                    int count = 0;
                    foreach (ISequence newSequence in seqsNew)
                    {
                        string s1 = seqsOriginal[count].ConvertToString();
                        string s2 = newSequence.ConvertToString();
                        Assert.AreEqual(s1, s2);
                        count++;
                    }

                    Assert.AreEqual(count, seqsOriginal.Count, "Number of sequences is different.");
                }
            }
            finally
            {
                // Delete new file
                File.Delete(filepathTmp);
            }
        }
예제 #4
0
        private void ValidateFormatterGeneralTestCases(string nodeName)
        {
            // Gets the actual sequence and the alphabet from the Xml
            string expectedSequence  = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode);
            string formattedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");

            string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode);

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format(null,
                                                   "FastA Formatter : Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                   expectedSequence, alphabet));

            // Replacing all the empty characters, Paragraphs and null entries added
            // while formatting the xml.
            ISequence seqOriginal = new Sequence(Utility.GetAlphabet(alphabet), formattedSequence)
            {
                ID = "test"
            };

            Assert.IsNotNull(seqOriginal);

            // Write it to a file
            var formatter = new FastAFormatter();
            {
                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format("FastA Formatter : Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                formatter.Format(seqOriginal, Constants.FastaTempFileName);
            }

            // Read the new file, then compare the sequences
            var parserObj = new FastAParser();

            {
                parserObj.Alphabet = Utility.GetAlphabet(alphabet);
                IEnumerable <ISequence> seqsNew = parserObj.Parse(Constants.FastaTempFileName);

                // Get a single sequence
                ISequence seqNew = seqsNew.FirstOrDefault();
                Assert.IsNotNull(seqNew);

                string newSequence = seqNew.ConvertToString();
                ApplicationLog.WriteLine(string.Format(null, "FastA Formatter : New Sequence is '{0}'.", newSequence));
                Assert.AreEqual(formattedSequence, newSequence);
                Assert.AreEqual(seqOriginal.ID, seqNew.ID);

                // Verify only one sequence exists.
                Assert.AreEqual(1, seqsNew.Count());
            }

            // Passed all the tests, delete the tmp file. If we failed an Assert,
            // the tmp file will still be there in case we need it for debugging.
            File.Delete(Constants.FastaTempFileName);
            ApplicationLog.WriteLine("Deleted the temp file created.");
        }
예제 #5
0
        public void FastAFormatterValidateWrite()
        {
            var formatter = new FastAFormatter();
            {
                // Gets the actual sequence and the alphabet from the Xml
                string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                        Constants.ExpectedSequenceNode);
                string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                 Constants.AlphabetNameNode);
                // Logs information to the log file
                ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                       actualSequence, alpName));
                var seqOriginal = new Sequence(Utility.GetAlphabet(alpName),
                                               actualSequence);
                seqOriginal.ID = "";
                Assert.IsNotNull(seqOriginal);
                // Use the formatter to write the original sequences to a temp file
                ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                       Constants.FastaTempFileName));
                formatter.Format(seqOriginal, Constants.FastaTempFileName);
                IEnumerable <ISequence> seqsNew = null;

                // Read the new file, then compare the sequences
                var parser = new FastAParser();
                {
                    parser.Alphabet = Alphabets.Protein;
                    seqsNew         = parser.Parse(Constants.FastaTempFileName);
                    char[] seqString   = seqsNew.ElementAt(0).Select(a => (char)a).ToArray();
                    var    newSequence = new string(seqString);
                    Assert.IsNotNull(seqsNew);

                    ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: New Sequence is '{0}'.",
                                                           newSequence));

                    // Now compare the sequences.
                    int countNew = seqsNew.Count();
                    Assert.AreEqual(1, countNew);
                    ApplicationLog.WriteLine("The Number of sequences are matching.");
                    Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID);
                    var orgSeq = new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray());

                    Assert.AreEqual(orgSeq, newSequence);

                    ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.",
                                                           newSequence));
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }
예제 #6
0
        public void FastAFormatterValidateWriteWithStream()
        {
            string actualSequence = string.Empty;

            var formatter = new FastAFormatter();
            {
                using (formatter.Open(Constants.FastaTempFileName))
                {
                    // Gets the actual sequence and the alphabet from the Xml
                    actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.ExpectedSequenceNode);
                    string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
                                                                     Constants.AlphabetNameNode);

                    // Logs information to the log file
                    ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.",
                                                           actualSequence, alpName));
                    var seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence);

                    seqOriginal.ID = "";
                    Assert.IsNotNull(seqOriginal);
                    // Use the formatter to write the original sequences to a stream.
                    ApplicationLog.WriteLine(string.Format("FastA Formatter BVT: Creating the Temp file '{0}'.",
                                                           Constants.FastaTempFileName));
                    formatter.Format(seqOriginal);
                    formatter.Close();
                }
                IEnumerable <ISequence> seq = null;

                using (var reader = File.OpenRead(Constants.FastaTempFileName))
                {
                    // Read the new file, then compare the sequences
                    var parser = new FastAParser();
                    {
                        parser.Alphabet = Alphabets.Protein;
                        seq             = parser.Parse(reader);

                        //Create a list of sequences.
                        List <ISequence> seqsList = seq.ToList();
                        Assert.IsNotNull(seqsList);

                        var seqString = new string(seqsList[0].Select(a => (char)a).ToArray());
                        Assert.AreEqual(actualSequence, seqString);
                    }
                }

                // Passed all the tests, delete the tmp file. If we failed an Assert,
                // the tmp file will still be there in case we need it for debugging.
                File.Delete(Constants.FastaTempFileName);
                ApplicationLog.WriteLine("Deleted the temp file created.");
            }
        }
예제 #7
0
        public static async void AssemblySequences(string fastqFileName)
        {
            var parser = new FastQParser();
            List <IQualitativeSequence> sequences = new List <IQualitativeSequence>();

            using (var fileStream = new FileStream(fastqFileName, FileMode.Open))
            {
                sequences = parser.Parse(fileStream).ToList();
            }
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            IDeNovoAssembly        assembly  = assembler.Assemble(sequences);

            FastAFormatter outputFormatter = new FastAFormatter();

            outputFormatter.Open("assembled_sequences.fasta");
            outputFormatter.Format(assembly.AssembledSequences);
            outputFormatter.Close();
        }
예제 #8
0
        /// <summary>
        /// Writes ambiguous reads that are filtered out to the specified file.
        /// </summary>
        /// <param name="ambiguousReads">Reads with ambiguous symbols.</param>
        /// <param name="ambiguousFilename">File to write.</param>
        private static void WriteAmbiguousReads(BlockingCollection <ISequence> ambiguousReads, string ambiguousFilename)
        {
            FastAFormatter formatter = new FastAFormatter()
            {
                AutoFlush = true
            };

            using (formatter.Open(ambiguousFilename))
            {
                while (!ambiguousReads.IsCompleted)
                {
                    ISequence seq;
                    if (ambiguousReads.TryTake(out seq, -1))
                    {
                        formatter.Format(seq);
                    }
                }
            }
        }
예제 #9
0
파일: Genome.cs 프로젝트: rmmiller22/Spritz
        public static void WriteFasta(IEnumerable <ISequence> sequences, string filePath)
        {
            FastAFormatter formatter = new FastAFormatter();

            using (FileStream stream = File.Create(filePath))
                formatter.Format(stream, sequences);
            using (StreamReader reader = new StreamReader(filePath))
                using (StreamWriter writer = new StreamWriter(filePath + ".tmp"))
                {
                    while (true)
                    {
                        string line = reader.ReadLine();
                        if (line == null)
                        {
                            break;
                        }
                        writer.Write(line + '\n');
                    }
                }
            File.Delete(filePath);
            File.Move(filePath + ".tmp", filePath);
        }
예제 #10
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IEnumerable <ISequence> assembly)
        {
            int counter = 1;

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence seq in assembly)
                    {
                        if (string.IsNullOrEmpty(seq.ID))
                        {
                            seq.ID = GenerateSequenceId(counter);
                        }
                        formatter.Format(seq);
                        counter++;
                    }
                }
                Output.WriteLine(OutputLevel.Information, Resources.OutPutWrittenToFileSpecified);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results:");
                foreach (ISequence seq in assembly)
                {
                    if (string.IsNullOrEmpty(seq.ID))
                    {
                        seq.ID = GenerateSequenceId(counter);
                    }
                    Output.WriteLine(OutputLevel.Results, seq.ID);
                    Output.WriteLine(OutputLevel.Results, new string(seq.Select(a => (char)a).ToArray()));
                    counter++;
                }
            }
        }
예제 #11
0
        /// <summary>
        /// Creates clustalw alignment job
        /// and returns it's id.
        /// </summary>
        /// <param name="subsequencesIds">
        /// Ids of subsequences selected for alignment
        /// </param>
        /// <returns>
        /// JSON containing result status (Success / Error)
        /// and remote job id or errror message.
        /// </returns>
        public string CreateAlignmentTask(long[] subsequencesIds)
        {
            try
            {
                ISequence[] bioSequences;

                using (var db = new LibiadaWebEntities())
                {
                    var subsequenceExtractor = new SubsequenceExtractor(db);
                    bioSequences = subsequenceExtractor.GetBioSequencesForFastaConverter(subsequencesIds);
                }

                string         fasta;
                FastAFormatter formatter = new FastAFormatter();
                using (MemoryStream stream = new MemoryStream())
                {
                    formatter.Format(stream, bioSequences);
                    fasta = Encoding.ASCII.GetString(stream.ToArray());
                }

                string result;
                using (var webClient = new WebClient())
                {
                    webClient.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded";
                    Uri url = new Uri("https://www.ebi.ac.uk/Tools/services/rest/clustalo/run");

                    // TODO: make email global parameter
                    result = webClient.UploadString(url, $"[email protected]&sequence={fasta}");
                }

                return(JsonConvert.SerializeObject(new { Status = "Success", Result = result }));
            }
            catch (Exception ex)
            {
                return(JsonConvert.SerializeObject(new { Status = "Error", ex.Message }));
            }
        }
예제 #12
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "No sequences assembled.");
                return;
            }

            EnsureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true
                };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Format(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true,
                    MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2)
                };
                foreach (ISequence seq in assembly.AssembledSequences)
                {
                    formatter.Format(Console.OpenStandardOutput(), seq);
                }
            }
        }
예제 #13
0
파일: Program.cs 프로젝트: slogen/bio
        /// <summary>
        /// Filters the test data for the input file
        /// </summary>
        /// <param name="inputFile">Input File</param>
        /// <param name="outputFile">Output File</param>
        /// <param name="repeatLength">Repeat Length</param>
        static void FilterTestData(string inputFile, string outputFile, int repeatLength)
        {
            if (File.Exists(inputFile))
            {
                Console.WriteLine("Processing the file '{0}'.", inputFile);

                // Read the inputfile with the help of FastA Parser
                FastAParser    parserObj    = new FastAParser();
                FastAFormatter outputWriter = new FastAFormatter();

                using (parserObj.Open(inputFile))
                    using (outputWriter.Open(outputFile))
                    {
                        IEnumerable <ISequence> inputReads = parserObj.Parse();

                        // Going through read by read in a given file
                        foreach (ISequence seq in inputReads)
                        {
                            // Get the First read in the file
                            byte[] actualRead = seq.ToArray();

                            // Assign the temporary local variables required
                            byte previousChar   = actualRead[0];
                            int  repeatLenCount = 0;
                            bool ignoreRead     = false;

                            // Go through each and every character/byte in the read
                            for (int j = 1; j < actualRead.Length; j++)
                            {
                                // Check if the previous character is same as current.
                                if (previousChar == actualRead[j])
                                {
                                    repeatLenCount++;

                                    // if repeat length exceeds, skip this read and continue with other read
                                    if (repeatLenCount == repeatLength)
                                    {
                                        Console.WriteLine("Character '{0}' repeated more than '{1}' times and read '{2}' is skipped",
                                                          (char)previousChar, repeatLength, seq.ID);
                                        ignoreRead = true;
                                        break;
                                    }
                                    continue;
                                }
                                repeatLenCount = 0;
                                previousChar   = actualRead[j];
                            }

                            Console.WriteLine("Read '{0}' Processed.", seq.ID);

                            // Check if the length exceeds the max length and write it to the output file
                            if (!ignoreRead)
                            {
                                outputWriter.Format(seq);
                            }
                        }
                    }
                Console.WriteLine();
                Console.WriteLine("Filtering Completed!!");
            }
            else
            {
                Console.WriteLine("Enter Valid File Path.");
            }
        }
예제 #14
0
        /// <summary>
        /// Does the logic behind the sequence simulation
        /// </summary>
        public void DoSimulation(string outputFileName, Action <long, long> updateSimulationStats, Action <string> simulationComplete)
        {
            const string filePostfix = "_{0}.fa";

            FileInfo file = new FileInfo(outputFileName);

            if (file.Directory == null || !file.Directory.Exists)
            {
                throw new ArgumentException("Could not write to the output directory for " + outputFileName);
            }

            if (Settings.OutputSequenceCount <= 0)
            {
                throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero.");
            }

            if (Settings.SequenceLength <= 0)
            {
                throw new ArgumentException("'Mean Output Length' should be greater than zero.");
            }

            string filePrefix = String.IsNullOrEmpty(file.Extension) ? file.FullName : file.FullName.Substring(0, file.FullName.IndexOf(file.Extension));

            long seqCount  = (Settings.DepthOfCoverage * SequenceToSplit.Count) / Settings.SequenceLength;
            long fileCount = seqCount / Settings.OutputSequenceCount;

            if (seqCount % Settings.OutputSequenceCount != 0)
            {
                fileCount++;
            }

            // Update the UI
            updateSimulationStats(seqCount, fileCount);

            int              fileIndex             = 1;
            FastAFormatter   formatter             = null;
            List <ISequence> generatedSequenceList = new List <ISequence>();

            for (long i = 0; i < seqCount; i++)
            {
                generatedSequenceList.Add(CreateSubsequence(i, SequenceToSplit, Settings));
                if (generatedSequenceList.Count >= Settings.OutputSequenceCount)
                {
                    FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                    formatter = new FastAFormatter();
                    using (formatter.Open(outFile.FullName))
                    {
                        formatter.Format(generatedSequenceList);
                    }
                    generatedSequenceList.Clear();
                }
            }

            // Pick off any remaining sequences into the final file.
            if (generatedSequenceList.Count > 0)
            {
                FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                formatter = new FastAFormatter();
                using (formatter.Open(outFile.FullName))
                {
                    formatter.Format(generatedSequenceList);
                }
                simulationComplete(formatter.Name);
            }

            // Either we ended exactly on the boundary with no additional sequences
            // generated, OR we never generated any files.
            else
            {
                simulationComplete(formatter != null ? formatter.Name : string.Empty);
            }
        }