/// <summary> /// Base constructor for filter tool event arguments /// </summary> /// <param name="input">Input information</param> /// <param name="outFile">Output filename</param> public FilterToolArgs(InputSubmission input, string outFile) { if (input == null) { throw new ArgumentNullException("input"); } if (outFile == null) { throw new ArgumentNullException("outFile"); } this.InputInfo = input; this.OutputFilename = outFile; }
static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Discarding // Determine parser InputSubmission input = new InputSubmission(myArgs.FileList[0]); input.DetermineParserUtil(); // Create a sequence formatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { filteredFormatter = new FastAFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } } else { filteredFormatter = new FastQFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } } // Initialize a Trimmer object Discarder myDiscarder = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.DiscardByLength > 0) { myDiscarder = new DiscardByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.DiscardByLength); } else if (myArgs.DiscardByQuality > 0) { if (!(input.Parser is FastQParser)) { Console.Error.WriteLine("Input file must be in FASTQ format."); Environment.Exit(-1); } myDiscarder = new DiscardByMeanQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.DiscardByQuality); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myDiscarder.DiscardReads(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Discarded {0}/{1} sequences.", myDiscarder.DiscardCount, myDiscarder.Counted); Console.Error.WriteLine("Non-discarded sequences saved in {0}.", Path.GetFullPath(myArgs.FileList[1])); if (myArgs.DiscardedFile != null) { Console.Error.WriteLine("Discarded sequences saved in {0}.", Path.GetFullPath(myArgs.DiscardedFile)); discardedFormatter.Close(); } Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
/// <summary> /// Constructor for holding Trim By Quality event arguments /// </summary> /// <param name="q"></param> /// <param name="trimFromStart"></param> /// <param name="minLength"></param> /// <param name="input"></param> /// <param name="filtered"></param> /// <param name="discarded"></param> /// <param name="outFile"></param> public TrimByQualityArgs(byte q, bool trimFromStart, int minLength, InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, string outFile) : base(input, outFile) { trimmer = new TrimByQuality(input.Parser, filtered, discarded, q, trimFromStart, minLength); }
/// <summary> /// Constructor for DiscardByMeanQuality Event Args /// </summary> /// <param name="input">Input information</param> /// <param name="filtered">Output sequence formatter</param> /// <param name="discarded">Discarded reads sequence formatter</param> /// <param name="mean">Minimum mean quality threshold</param> /// <param name="outFile">Output filename</param> public DiscardByMeanQualityArgs(InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, byte mean, string outFile) : base(input, outFile) { discarder = new DiscardByMeanQuality(input.Parser, filtered, discarded, mean); }
public TrimByRegexArgs(InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, string pattern, string outFile) : base(input, outFile) { trimmer = new TrimByRegex(input.Parser, filtered, discarded, pattern); }
/// <summary> /// Opens the OpenFileDialog browse window to select the input file /// </summary> private void LaunchBrowseInputFileWindow() { using (System.Windows.Forms.OpenFileDialog dialog = new System.Windows.Forms.OpenFileDialog()) { dialog.CheckFileExists = true; dialog.CheckPathExists = true; // Disable selection of multiple files (will support multiselection in the future) dialog.Multiselect = false; // Set filters dialog.Filter = String.Join("|", fileTypes.ToArray()); dialog.FilterIndex = 2; if (dialog.ShowDialog() == System.Windows.Forms.DialogResult.OK) { string autoParser = null; bool chooseParserManually = false; #region Validate input // Validate the input file this.input = new InputSubmission(dialog.FileNames[0]); if (this.input.CanParseInputByFileName()) { autoParser = this.input.Parser.Name; } else { chooseParserManually = true; } #endregion #region Update fields // Display filename in text box this.textInputFilename.Text = dialog.FileNames[0]; // Enable molecule type //this.comboMoleculeType.IsEnabled = true; //this.comboMoleculeType.SelectedIndex = 0; // Enable parser selection this.comboParserType.IsEnabled = true; #endregion #region Update combo box // Enable parser selection if applicable if (chooseParserManually) { this.comboParserType.SelectedIndex = 0; // Inform the user that parser type could not be detected System.Windows.MessageBox.Show(Resource.AUTOPARSE_FAIL); } else { foreach (ComboBoxItem item in this.comboParserType.Items) { string itemParser = item.Content as string; if (itemParser != null) { if (itemParser.Equals(autoParser)) { this.comboParserType.SelectedItem = item; //// Set QC Analyzer checkboxes UpdateAnalyzerCheckBoxes(itemParser); //// Set Blast configuration checkboxes EnableBlastConfig(); break; } } } if (autoParser.Equals(SequenceParsers.FastQ.Name)) { this.comboFastqType.IsEnabled = true; this.comboFastqType.SelectedIndex = 0; } else { this.btnRun.IsEnabled = true; this.btnRun.Focus(); } } #endregion } } }
/// <summary> /// Constructor for holding Trim by length event arguments /// </summary> /// <param name="trimFromStart"></param> /// <param name="input"></param> /// <param name="filtered"></param> /// <param name="discarded"></param> public TrimByLengthArgs(double newLength, bool trimFromStart, InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, string outFile) : base(input, outFile) { trimmer = new TrimByLength(input.Parser, filtered, discarded, newLength, trimFromStart); }
/// <summary> /// usage: SeqcosTrimmerUtil.exe [options] <input> <output> /// </summary> /// <param name="args">Command line arguments</param> static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Trimming // Determine parser InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); // Create a sequence filteredFormatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { if (myArgs.TrimByQuality > 0) { Console.Error.WriteLine("Cannot trim by quality using a FASTA file."); Environment.Exit(-1); } if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastAFormatter(myArgs.OutputFile); } else { if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastQFormatter(myArgs.OutputFile); } // Initialize a Trimmer object Trimmer myTrimmer = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.TrimByLength > 0) { Console.Error.WriteLine("Trimming reads to length {0}", myArgs.TrimByLength); myTrimmer = new TrimByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByLength, myArgs.Left); } else if (myArgs.TrimByQuality > 0) { if (!(input.Parser is FastQParser)) { throw new ArgumentException("Input file must be in FASTQ format."); } Console.Error.WriteLine("Trimming reads based on quality score {0}", myArgs.TrimByQuality); myTrimmer = new TrimByQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.TrimByQuality, myArgs.Left, (int)Math.Round(myArgs.TrimByLength)); } else if (myArgs.TrimByRegex != null) { Console.Error.WriteLine("Trimming reads based on the regular expression pattern {0}", myArgs.TrimByRegex); myTrimmer = new TrimByRegex(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByRegex); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myTrimmer.TrimAll(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Trimmed {0}/{1} sequences.", myTrimmer.TrimCount, myTrimmer.Counted); Console.Error.WriteLine("Discarded {0}/{1} sequences.", myTrimmer.DiscardCount, myTrimmer.Counted); Console.Error.WriteLine("Output saved in {0}.", Path.GetFullPath(myArgs.OutputFile)); Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
/// <summary> /// Run the QC application module /// </summary> /// <param name="file"></param> private void Run(CommandLineOptions myArgs) { #region Determine parser // Determine parser type InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); if (input.Parser is Bio.IO.FastQ.FastQParser && myArgs.FastqFormat == null) { myArgs.ErrorWriteLine("For FASTQ input, please provide a valid FASTQ format: [Sanger, Solexa, Illumina]"); } #endregion myArgs.WriteLine("Processing the file " + myArgs.InputFile + "...this may take a while depending on the input size. Please be patient!"); Stopwatch sw = new Stopwatch(); sw.Start(); #region Run QC analysis // Run QC analysis try { qcm = new Seqcos(input.Parser, myArgs.InputFile, myArgs.ExecuteSequenceQc, myArgs.ExecuteQualityScoreQc, myArgs.ExecuteBlast, myArgs.FastqFormat, dir: myArgs.OutputDirectory); } catch (ArgumentNullException e) { myArgs.ErrorWriteLine(e.Message); } myArgs.WriteLine("Performing sequence-level QC..."); qcm.SequenceQc.Process(); var time = ElapsedSeconds(sw); Console.WriteLine(time.ToString() + " s"); if (!(input.Parser is FastAParser)) { myArgs.WriteLine("Performing quality score-level QC..."); qcm.QualityScoreQc.Process(); time = ElapsedSeconds(sw, (long)time); Console.WriteLine(time.ToString() + " s"); } #endregion #region Display statistics to console if (!myArgs.silent) { qcm.WriteInputStatistics(myArgs.UseExcelHyperlinks); DisplayInputStatistics(); } #endregion #region Generate plots myArgs.WriteLine("Generating plots and saving them to: " + qcm.OutputDirectory); // Do these last, so that after plotting each section you can't free up memory qcm.PlotSequenceLevelStats(); qcm.FinishSequenceQc(); // free up some memory, since we won't be using this anymore if (!(input.Parser is FastAParser)) { qcm.PlotQualityScoreLevelStats(); qcm.FinishQualityScoreQC(); } #endregion #region Carry out BLAST analysis if (myArgs.ExecuteBlast) { myArgs.WriteLine("Searching for contaminants..."); // Convert FASTQ to FASTA string targetFasta = qcm.OutputDirectory + "/" + qcm.GetPrefix() + ".fa"; BioHelper.ConvertToFASTA(qcm.ContaminationFinder.TargetSequences, targetFasta, myArgs.BlastSize, overwrite: true); // Run local NCBI BLAST qcm.ContaminationFinder.RunLocalBlast(myArgs.BlastDbPrefix, targetFasta); time = ElapsedSeconds(sw, (long)time); Console.WriteLine(time.ToString() + " s"); File.Delete(targetFasta); if (!myArgs.silent) { HighlightText("NCBI BLAST results from searching against " + myArgs.BlastDbPrefix + " database:\n"); DisplayBlastResults(); } } #endregion input.Parser.Close(); sw.Stop(); myArgs.WriteLine("\nTotal time: " + ToSeconds(sw.ElapsedMilliseconds) + " s"); }
/// <summary> /// Constructor for DiscardByLength Event Args /// </summary> /// <param name="input">Input information</param> /// <param name="filtered">Output sequence formatter</param> /// <param name="discarded">Discarded reads sequence formatter</param> /// <param name="length">Length threshold for discarding reads</param> /// <param name="outFile">Output filename</param> public DiscardByLengthArgs(InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, long length, string outFile) : base(input, outFile) { discarder = new DiscardByLength(input.Parser, filtered, discarded, length); }