Exemplo n.º 1
0
        /// <summary>
        /// usage: SeqcosTrimmerUtil.exe [options] <input> <output>
        /// </summary>
        /// <param name="args">Command line arguments</param>
        static void Main(string[] args)
        {
            Console.Error.WriteLine(SplashString());

            CommandLineOptions myArgs = ProcessCommandLine(args);

            #region Trimming
            // Determine parser
            InputSubmission input = new InputSubmission(myArgs.InputFile);
            input.DetermineParserUtil();

            // Create a sequence filteredFormatter object
            ISequenceFormatter filteredFormatter;
            ISequenceFormatter discardedFormatter = null;

            // If the format is FASTA, then output will be FASTA.
            // Everything else (assuming quality scores are available)
            // will be outputted to FASTQ.
            if (input.Parser is FastAParser)
            {
                if (myArgs.TrimByQuality > 0)
                {
                    Console.Error.WriteLine("Cannot trim by quality using a FASTA file.");
                    Environment.Exit(-1);
                }

                if (myArgs.DiscardedFile != null)
                {
                    discardedFormatter = new FastAFormatter(myArgs.DiscardedFile);
                }

                filteredFormatter = new FastAFormatter(myArgs.OutputFile);
            }
            else
            {
                if (myArgs.DiscardedFile != null)
                {
                    discardedFormatter = new FastQFormatter(myArgs.DiscardedFile);
                }

                filteredFormatter = new FastQFormatter(myArgs.OutputFile);
            }

            // Initialize a Trimmer object
            Trimmer myTrimmer = null;

            // By now, we should have sanity checked the command line arguments. So we should be able to
            // figure out what mode is being used simply by checking the properties.
            if (myArgs.TrimByLength > 0)
            {
                Console.Error.WriteLine("Trimming reads to length {0}", myArgs.TrimByLength);
                myTrimmer = new TrimByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByLength, myArgs.Left);
            }

            else if (myArgs.TrimByQuality > 0)
            {
                if (!(input.Parser is FastQParser))
                {
                    throw new ArgumentException("Input file must be in FASTQ format.");
                }

                Console.Error.WriteLine("Trimming reads based on quality score {0}", myArgs.TrimByQuality);
                myTrimmer = new TrimByQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.TrimByQuality, myArgs.Left, (int)Math.Round(myArgs.TrimByLength));
            }

            else if (myArgs.TrimByRegex != null)
            {
                Console.Error.WriteLine("Trimming reads based on the regular expression pattern {0}", myArgs.TrimByRegex);
                myTrimmer = new TrimByRegex(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByRegex);
            }

            else
            {
                // Should never reach this line.
                Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'.");
                Environment.Exit(-1);
            }


            myTrimmer.TrimAll();


            #endregion

            if (myArgs.Verbose)
            {
                Console.Error.WriteLine("Trimmed {0}/{1} sequences.", myTrimmer.TrimCount, myTrimmer.Counted);
                Console.Error.WriteLine("Discarded {0}/{1} sequences.", myTrimmer.DiscardCount, myTrimmer.Counted);
                Console.Error.WriteLine("Output saved in {0}.", Path.GetFullPath(myArgs.OutputFile));
                Console.Error.WriteLine("Warning: Output may not be in the same order as the original input.");
            }

            input.Parser.Close();
            filteredFormatter.Close();
            if (discardedFormatter != null)
            {
                discardedFormatter.Close();
            }
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            Console.Error.WriteLine(SplashString());
            CommandLineOptions myArgs = ProcessCommandLine(args);

            #region Discarding
            // Determine parser
            InputSubmission input = new InputSubmission(myArgs.FileList[0]);
            input.DetermineParserUtil();

            // Create a sequence formatter object
            ISequenceFormatter filteredFormatter;
            ISequenceFormatter discardedFormatter = null;

            // If the format is FASTA, then output will be FASTA.
            // Everything else (assuming quality scores are available)
            // will be outputted to FASTQ.
            if (input.Parser is FastAParser)
            {
                filteredFormatter = new FastAFormatter(myArgs.FileList[1]);

                if (myArgs.DiscardedFile != null)
                {
                    discardedFormatter = new FastAFormatter(myArgs.DiscardedFile);
                }
            }
            else
            {
                filteredFormatter = new FastQFormatter(myArgs.FileList[1]);

                if (myArgs.DiscardedFile != null)
                {
                    discardedFormatter = new FastQFormatter(myArgs.DiscardedFile);
                }
            }

            // Initialize a Trimmer object
            Discarder myDiscarder = null;

            // By now, we should have sanity checked the command line arguments. So we should be able to
            // figure out what mode is being used simply by checking the properties.
            if (myArgs.DiscardByLength > 0)
            {
                myDiscarder = new DiscardByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.DiscardByLength);
            }

            else if (myArgs.DiscardByQuality > 0)
            {
                if (!(input.Parser is FastQParser))
                {
                    Console.Error.WriteLine("Input file must be in FASTQ format.");
                    Environment.Exit(-1);
                }

                myDiscarder = new DiscardByMeanQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.DiscardByQuality);
            }

            else
            {
                // Should never reach this line.
                Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'.");
                Environment.Exit(-1);
            }

            myDiscarder.DiscardReads();

            #endregion

            if (myArgs.Verbose)
            {
                Console.Error.WriteLine("Discarded {0}/{1} sequences.", myDiscarder.DiscardCount, myDiscarder.Counted);
                Console.Error.WriteLine("Non-discarded sequences saved in {0}.", Path.GetFullPath(myArgs.FileList[1]));
                if (myArgs.DiscardedFile != null)
                {
                    Console.Error.WriteLine("Discarded sequences saved in {0}.", Path.GetFullPath(myArgs.DiscardedFile));
                    discardedFormatter.Close();
                }
                Console.Error.WriteLine("Warning: Output may not be in the same order as the original input.");
            }
            input.Parser.Close();
            filteredFormatter.Close();
            if (discardedFormatter != null)
            {
                discardedFormatter.Close();
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Run the QC application module
        /// </summary>
        /// <param name="file"></param>
        private void Run(CommandLineOptions myArgs)
        {
            #region Determine parser
            // Determine parser type
            InputSubmission input = new InputSubmission(myArgs.InputFile);
            input.DetermineParserUtil();

            if (input.Parser is Bio.IO.FastQ.FastQParser &&
                myArgs.FastqFormat == null)
            {
                myArgs.ErrorWriteLine("For FASTQ input, please provide a valid FASTQ format: [Sanger, Solexa, Illumina]");
            }
            #endregion

            myArgs.WriteLine("Processing the file " + myArgs.InputFile +
                             "...this may take a while depending on the input size. Please be patient!");

            Stopwatch sw = new Stopwatch();
            sw.Start();

            #region Run QC analysis
            // Run QC analysis
            try
            {
                qcm = new Seqcos(input.Parser, myArgs.InputFile, myArgs.ExecuteSequenceQc, myArgs.ExecuteQualityScoreQc, myArgs.ExecuteBlast, myArgs.FastqFormat, dir: myArgs.OutputDirectory);
            }
            catch (ArgumentNullException e)
            {
                myArgs.ErrorWriteLine(e.Message);
            }

            myArgs.WriteLine("Performing sequence-level QC...");
            qcm.SequenceQc.Process();
            var time = ElapsedSeconds(sw);
            Console.WriteLine(time.ToString() + " s");

            if (!(input.Parser is FastAParser))
            {
                myArgs.WriteLine("Performing quality score-level QC...");
                qcm.QualityScoreQc.Process();
                time = ElapsedSeconds(sw, (long)time);
                Console.WriteLine(time.ToString() + " s");
            }

            #endregion

            #region Display statistics to console
            if (!myArgs.silent)
            {
                qcm.WriteInputStatistics(myArgs.UseExcelHyperlinks);
                DisplayInputStatistics();
            }
            #endregion

            #region Generate plots
            myArgs.WriteLine("Generating plots and saving them to: " + qcm.OutputDirectory);

            // Do these last, so that after plotting each section you can't free up memory
            qcm.PlotSequenceLevelStats();
            qcm.FinishSequenceQc();      // free up some memory, since we won't be using this anymore

            if (!(input.Parser is FastAParser))
            {
                qcm.PlotQualityScoreLevelStats();
                qcm.FinishQualityScoreQC();
            }

            #endregion

            #region Carry out BLAST analysis

            if (myArgs.ExecuteBlast)
            {
                myArgs.WriteLine("Searching for contaminants...");
                // Convert FASTQ to FASTA
                string targetFasta = qcm.OutputDirectory + "/" + qcm.GetPrefix() + ".fa";
                BioHelper.ConvertToFASTA(qcm.ContaminationFinder.TargetSequences, targetFasta, myArgs.BlastSize, overwrite: true);

                // Run local NCBI BLAST
                qcm.ContaminationFinder.RunLocalBlast(myArgs.BlastDbPrefix, targetFasta);
                time = ElapsedSeconds(sw, (long)time);
                Console.WriteLine(time.ToString() + " s");
                File.Delete(targetFasta);

                if (!myArgs.silent)
                {
                    HighlightText("NCBI BLAST results from searching against " + myArgs.BlastDbPrefix + " database:\n");
                    DisplayBlastResults();
                }
            }

            #endregion


            input.Parser.Close();

            sw.Stop();
            myArgs.WriteLine("\nTotal time: " + ToSeconds(sw.ElapsedMilliseconds) + " s");
        }