/// <summary> /// usage: SeqcosTrimmerUtil.exe [options] <input> <output> /// </summary> /// <param name="args">Command line arguments</param> static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Trimming // Determine parser InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); // Create a sequence filteredFormatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { if (myArgs.TrimByQuality > 0) { Console.Error.WriteLine("Cannot trim by quality using a FASTA file."); Environment.Exit(-1); } if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastAFormatter(myArgs.OutputFile); } else { if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastQFormatter(myArgs.OutputFile); } // Initialize a Trimmer object Trimmer myTrimmer = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.TrimByLength > 0) { Console.Error.WriteLine("Trimming reads to length {0}", myArgs.TrimByLength); myTrimmer = new TrimByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByLength, myArgs.Left); } else if (myArgs.TrimByQuality > 0) { if (!(input.Parser is FastQParser)) { throw new ArgumentException("Input file must be in FASTQ format."); } Console.Error.WriteLine("Trimming reads based on quality score {0}", myArgs.TrimByQuality); myTrimmer = new TrimByQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.TrimByQuality, myArgs.Left, (int)Math.Round(myArgs.TrimByLength)); } else if (myArgs.TrimByRegex != null) { Console.Error.WriteLine("Trimming reads based on the regular expression pattern {0}", myArgs.TrimByRegex); myTrimmer = new TrimByRegex(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByRegex); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myTrimmer.TrimAll(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Trimmed {0}/{1} sequences.", myTrimmer.TrimCount, myTrimmer.Counted); Console.Error.WriteLine("Discarded {0}/{1} sequences.", myTrimmer.DiscardCount, myTrimmer.Counted); Console.Error.WriteLine("Output saved in {0}.", Path.GetFullPath(myArgs.OutputFile)); Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Discarding // Determine parser InputSubmission input = new InputSubmission(myArgs.FileList[0]); input.DetermineParserUtil(); // Create a sequence formatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { filteredFormatter = new FastAFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } } else { filteredFormatter = new FastQFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } } // Initialize a Trimmer object Discarder myDiscarder = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.DiscardByLength > 0) { myDiscarder = new DiscardByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.DiscardByLength); } else if (myArgs.DiscardByQuality > 0) { if (!(input.Parser is FastQParser)) { Console.Error.WriteLine("Input file must be in FASTQ format."); Environment.Exit(-1); } myDiscarder = new DiscardByMeanQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.DiscardByQuality); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myDiscarder.DiscardReads(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Discarded {0}/{1} sequences.", myDiscarder.DiscardCount, myDiscarder.Counted); Console.Error.WriteLine("Non-discarded sequences saved in {0}.", Path.GetFullPath(myArgs.FileList[1])); if (myArgs.DiscardedFile != null) { Console.Error.WriteLine("Discarded sequences saved in {0}.", Path.GetFullPath(myArgs.DiscardedFile)); discardedFormatter.Close(); } Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
/// <summary> /// Run the QC application module /// </summary> /// <param name="file"></param> private void Run(CommandLineOptions myArgs) { #region Determine parser // Determine parser type InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); if (input.Parser is Bio.IO.FastQ.FastQParser && myArgs.FastqFormat == null) { myArgs.ErrorWriteLine("For FASTQ input, please provide a valid FASTQ format: [Sanger, Solexa, Illumina]"); } #endregion myArgs.WriteLine("Processing the file " + myArgs.InputFile + "...this may take a while depending on the input size. Please be patient!"); Stopwatch sw = new Stopwatch(); sw.Start(); #region Run QC analysis // Run QC analysis try { qcm = new Seqcos(input.Parser, myArgs.InputFile, myArgs.ExecuteSequenceQc, myArgs.ExecuteQualityScoreQc, myArgs.ExecuteBlast, myArgs.FastqFormat, dir: myArgs.OutputDirectory); } catch (ArgumentNullException e) { myArgs.ErrorWriteLine(e.Message); } myArgs.WriteLine("Performing sequence-level QC..."); qcm.SequenceQc.Process(); var time = ElapsedSeconds(sw); Console.WriteLine(time.ToString() + " s"); if (!(input.Parser is FastAParser)) { myArgs.WriteLine("Performing quality score-level QC..."); qcm.QualityScoreQc.Process(); time = ElapsedSeconds(sw, (long)time); Console.WriteLine(time.ToString() + " s"); } #endregion #region Display statistics to console if (!myArgs.silent) { qcm.WriteInputStatistics(myArgs.UseExcelHyperlinks); DisplayInputStatistics(); } #endregion #region Generate plots myArgs.WriteLine("Generating plots and saving them to: " + qcm.OutputDirectory); // Do these last, so that after plotting each section you can't free up memory qcm.PlotSequenceLevelStats(); qcm.FinishSequenceQc(); // free up some memory, since we won't be using this anymore if (!(input.Parser is FastAParser)) { qcm.PlotQualityScoreLevelStats(); qcm.FinishQualityScoreQC(); } #endregion #region Carry out BLAST analysis if (myArgs.ExecuteBlast) { myArgs.WriteLine("Searching for contaminants..."); // Convert FASTQ to FASTA string targetFasta = qcm.OutputDirectory + "/" + qcm.GetPrefix() + ".fa"; BioHelper.ConvertToFASTA(qcm.ContaminationFinder.TargetSequences, targetFasta, myArgs.BlastSize, overwrite: true); // Run local NCBI BLAST qcm.ContaminationFinder.RunLocalBlast(myArgs.BlastDbPrefix, targetFasta); time = ElapsedSeconds(sw, (long)time); Console.WriteLine(time.ToString() + " s"); File.Delete(targetFasta); if (!myArgs.silent) { HighlightText("NCBI BLAST results from searching against " + myArgs.BlastDbPrefix + " database:\n"); DisplayBlastResults(); } } #endregion input.Parser.Close(); sw.Stop(); myArgs.WriteLine("\nTotal time: " + ToSeconds(sw.ElapsedMilliseconds) + " s"); }