public void QuerySupportedFileFiltersTest() { string all = "All files (*.*)|*.*"; List<string> actual; actual = BioHelper.QuerySupportedFileFilters(); Assert.IsTrue(actual.Any(s => s.Equals(all))); }
public void GetQualityFormatTypeTest() { string formatAsString = "Solexa"; FastQFormatType expected = FastQFormatType.Solexa; FastQFormatType actual; actual = BioHelper.GetQualityFormatType(formatAsString); Assert.AreEqual(expected, actual); }
public void FastaTrimRegex2() { Sequence seqObj = new Sequence(Alphabets.DNA, "TTTAAAGATTACATTTAAA"); Sequence expected = new Sequence(Alphabets.DNA, "TTTAAAGATTACA"); TrimByRegex target = new TrimByRegex(new FastAParser(), new FastAFormatter(), new FastAFormatter(), @"TTTAAA$"); ISequence actual = target.Trim(seqObj); Assert.AreEqual(BioHelper.GetStringSequence(expected), BioHelper.GetStringSequence(actual)); }
public void GetStringSequenceTest() { ISequence seqObj = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "GGCGCACTTACACCCTACATCCATTG", "IIIIG1?II;IIIII1IIII1%.I7I"); string expected = "GGCGCACTTACACCCTACATCCATTG"; string actual; actual = BioHelper.GetStringSequence(seqObj); Assert.AreEqual(expected, actual); }
public void FastqTrimRegex1() { QualitativeSequence seqObj = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "GGGCCCGATTACATTTAAA", "ABCABCIIIIIIIABCABC"); QualitativeSequence expected = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "GGGCCCTTTAAA", "ABCABCABCABC"); TrimByRegex target = new TrimByRegex(new FastQParser(), new FastQFormatter(), new FastQFormatter(), "GATTACA"); ISequence actual = target.Trim(seqObj); Assert.AreEqual(BioHelper.GetStringSequence(expected), BioHelper.GetStringSequence(actual)); Assert.AreEqual(BioHelper.GetEncodedQualityScoreStringSequence(expected), BioHelper.GetEncodedQualityScoreStringSequence(actual as QualitativeSequence)); }
public void QueryValidFastqFormatsTest() { string[] expected = new string[] { "Illumina", "Solexa", "Sanger" }; string[] actual; actual = BioHelper.QueryValidFastqFormats(); Assert.AreEqual(expected.Length, actual.Length); foreach (string format in actual) { Assert.IsTrue(expected.Any(s => s.Equals(format))); } }
public void FastqTrimFromLeftTest2() { ISequence seqObj = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "GGCGCACTTACACCCTACATCCATTG", "IIIIG1?II;IIIII1IIII1%.I7I"); ISequence expected = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "G", "I"); double trimLength = 1; bool trimFromStart = true; TrimByLength target = new TrimByLength(new FastQParser(), new FastQFormatter(), null, trimLength, trimFromStart); ISequence actual; actual = target.Trim(seqObj); Assert.AreEqual(BioHelper.GetStringSequence(expected), BioHelper.GetStringSequence(actual)); Assert.AreEqual((expected as QualitativeSequence).QualityScores.ToString(), (actual as QualitativeSequence).QualityScores.ToString()); }
/// <summary> /// Constructor for OpenFileDialog /// </summary> /// <param name="fileTypes">List of valid file types</param> public OpenFileDialog(List <string> fileTypes) { InitializeComponent(); this.btnBrowse.Focus(); #region Parser type combo box // Populate the parser type combo box ComboBoxItem defaultItem = new ComboBoxItem(); defaultItem.Content = Resource.MANUAL_CHOICE; this.comboParserType.Items.Add(defaultItem); Collection <string> validParsers = new Collection <string>(); validParsers.Add(SequenceParsers.FastQ.Name); validParsers.Add(SequenceParsers.Fasta.Name); foreach (var parserName in validParsers) { ComboBoxItem item = new ComboBoxItem(); item.Content = parserName; item.Tag = parserName; this.comboParserType.Items.Add(item); } this.comboParserType.SelectedIndex = 0; this.fileTypes = fileTypes; #endregion #region FASTQ format combo box defaultItem = new ComboBoxItem(); defaultItem.Content = Resource.MANUAL_CHOICE; this.comboFastqType.Items.Add(defaultItem); string[] validFormats = BioHelper.QueryValidFastqFormats(); foreach (var format in validFormats) { ComboBoxItem item = new ComboBoxItem(); item.Content = format; item.Tag = format; this.comboFastqType.Items.Add(item); } this.comboFastqType.SelectedIndex = 0; #endregion #region BLAST database combo box this.availableBlastDatabases = new List <string> (); // query for available BLAST databases List <string> databases = BlastTools.QueryAvailableBlastDatabases(); foreach (var db in databases) { ComboBoxItem item = new ComboBoxItem(); item.Content = db; item.Tag = db; this.comboBlastDb.Items.Add(item); } this.comboBlastDb.SelectedIndex = 0; #endregion }
/// <summary> /// Parse command line arguments /// </summary> /// <param name="args"></param> /// <returns></returns> static CommandLineOptions ProcessCommandLine(string[] args) { CommandLineOptions myArgs = new CommandLineOptions(); CommandLineArguments parser = new CommandLineArguments(); AddParameters(parser); try { parser.Parse(args, myArgs); } catch (Exception e) { Console.Error.WriteLine("\nException while processing command line arguments [{0}]", e.Message); Environment.Exit(-1); } if (myArgs.help) { string[] validFastqFormats = BioHelper.QueryValidFastqFormats(); string helpString = "Usage: SeqcosUtil.exe [options] <input file>\n" + "\nDescription: Evaluates the quality of sequencing reads and summarizes\n" + " the results in the form of text and plots. BLAST may be optionally performed\n" + " against a custom database (i.e. when looking for sequence contamination)." + "\n\n/help (/h)\n Show this help information" + "\n\n/silent (/s)\n Show less details displayed in the console" + "\n\n/FastQFormat (/q)\n Required for FASTQ input files. Choose from [" + string.Join(",", validFastqFormats) + "]" + "\n\n/OutputDirectory:<string> (/o)\n The name of the output directory where all output files will be saved. Relative/Absolute paths are not currently supported." + "\n\n/UseExcelHyperlinks (/e)\n Outputs Excel-formatted hyperlinks in the csv file" + "\n\n\n*** BLAST Options ***\n" + "\n\n/ExecuteBlast (/B)\n Perform a BLAST of the input sequences against a custom database. Windows NCBI BLAST must be installed first" + "\n\n/BlastDbPrefix:<string> (/D)\n Database to use for BLAST. The default is UniVec (http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html)" + "\n\n/BlastSize:<int> (/S)\n Limit the number of sequences to be searched by BLAST. Default is " + Resource.BLAST_MAX_SEQUENCES_DEFAULT + "." ; Console.WriteLine(helpString); Environment.Exit(-1); } // Process all the arguments for correctness if (!File.Exists(myArgs.InputFile)) { Console.Error.WriteLine("The file {0} could not be found.", myArgs.InputFile); Environment.Exit(-1); } if (myArgs.OutputDirectory == null) { myArgs.OutputDirectory = Path.GetFileNameWithoutExtension(myArgs.InputFile); } if (!Directory.Exists(myArgs.OutputDirectory)) { Directory.CreateDirectory(myArgs.OutputDirectory); } if (myArgs.ExecuteBlast) { // check to make sure Windows BLAST is installed if (!BlastLocalHandler.IsLocalBLASTInstalled(BlastLocalHandler.BlastVersion)) { Console.Error.WriteLine("Unable to find {0} in your PATH environment variable. Please check that BLAST is correctly installed."); Environment.Exit(-1); } if (myArgs.BlastSize < 0) { Console.Error.WriteLine("/BlastSize must be greater than 0."); Environment.Exit(-1); } } return(myArgs); }
/// <summary> /// Run the QC application module /// </summary> /// <param name="file"></param> private void Run(CommandLineOptions myArgs) { #region Determine parser // Determine parser type InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); if (input.Parser is Bio.IO.FastQ.FastQParser && myArgs.FastqFormat == null) { myArgs.ErrorWriteLine("For FASTQ input, please provide a valid FASTQ format: [Sanger, Solexa, Illumina]"); } #endregion myArgs.WriteLine("Processing the file " + myArgs.InputFile + "...this may take a while depending on the input size. Please be patient!"); Stopwatch sw = new Stopwatch(); sw.Start(); #region Run QC analysis // Run QC analysis try { qcm = new Seqcos(input.Parser, myArgs.InputFile, myArgs.ExecuteSequenceQc, myArgs.ExecuteQualityScoreQc, myArgs.ExecuteBlast, myArgs.FastqFormat, dir: myArgs.OutputDirectory); } catch (ArgumentNullException e) { myArgs.ErrorWriteLine(e.Message); } myArgs.WriteLine("Performing sequence-level QC..."); qcm.SequenceQc.Process(); var time = ElapsedSeconds(sw); Console.WriteLine(time.ToString() + " s"); if (!(input.Parser is FastAParser)) { myArgs.WriteLine("Performing quality score-level QC..."); qcm.QualityScoreQc.Process(); time = ElapsedSeconds(sw, (long)time); Console.WriteLine(time.ToString() + " s"); } #endregion #region Display statistics to console if (!myArgs.silent) { qcm.WriteInputStatistics(myArgs.UseExcelHyperlinks); DisplayInputStatistics(); } #endregion #region Generate plots myArgs.WriteLine("Generating plots and saving them to: " + qcm.OutputDirectory); // Do these last, so that after plotting each section you can't free up memory qcm.PlotSequenceLevelStats(); qcm.FinishSequenceQc(); // free up some memory, since we won't be using this anymore if (!(input.Parser is FastAParser)) { qcm.PlotQualityScoreLevelStats(); qcm.FinishQualityScoreQC(); } #endregion #region Carry out BLAST analysis if (myArgs.ExecuteBlast) { myArgs.WriteLine("Searching for contaminants..."); // Convert FASTQ to FASTA string targetFasta = qcm.OutputDirectory + "/" + qcm.GetPrefix() + ".fa"; BioHelper.ConvertToFASTA(qcm.ContaminationFinder.TargetSequences, targetFasta, myArgs.BlastSize, overwrite: true); // Run local NCBI BLAST qcm.ContaminationFinder.RunLocalBlast(myArgs.BlastDbPrefix, targetFasta); time = ElapsedSeconds(sw, (long)time); Console.WriteLine(time.ToString() + " s"); File.Delete(targetFasta); if (!myArgs.silent) { HighlightText("NCBI BLAST results from searching against " + myArgs.BlastDbPrefix + " database:\n"); DisplayBlastResults(); } } #endregion input.Parser.Close(); sw.Stop(); myArgs.WriteLine("\nTotal time: " + ToSeconds(sw.ElapsedMilliseconds) + " s"); }
/// <summary> /// Creates an instance of the main window of the application /// </summary> public SeqcosMainWindow() { InitializeComponent(); this.fileTypes = BioHelper.QuerySupportedFileFilters(); }
/// <summary> /// This event is fired by analysisThread when the thread is invoked. /// This event calls the main Qc application framework and executes the analysis. /// </summary> /// <param name="sender">BackgroundWorker instance</param> /// <param name="e">Event parameters</param> private void DoQcAnalysis(object sender, DoWorkEventArgs e) { BackgroundWorker worker = sender as BackgroundWorker; if (worker != null) { try { OpenFileArgs args = e.Argument as OpenFileArgs; System.Windows.Threading.Dispatcher dispatcher = run.Dispatcher; UpdateProgressDelegate update = new UpdateProgressDelegate(UpdateProgressText); if (args != null) { dispatcher.BeginInvoke(update, 0, "Starting analysis...please be patient!"); application = new Seqcos(args.InputInfo.Parser, args.InputInfo.Filename, args.CanRunSequenceQc, args.CanRunQualityScoreQc, args.CanRunBlast, args.FastqFormat); #region Sequence-level QC /// Run sequence level QC if (args.CanRunSequenceQc) { // Content by position dispatcher.BeginInvoke(update, 25, "Performing sequence-level QC...analyzing base positions"); application.SequenceQc.ContentByPosition(); dispatcher.BeginInvoke(update, 45, "Performing sequence-level QC...analyzing base positions"); application.SequenceQc.GCContentByPosition(); System.Threading.Thread.Sleep(100); if (worker.CancellationPending) { e.Cancel = true; return; } // Content by sequence dispatcher.BeginInvoke(update, 65, "Performing sequence-level QC...analyzing sequences"); application.SequenceQc.ContentBySequence(); dispatcher.BeginInvoke(update, 85, "Performing sequence-level QC...analyzing sequences"); System.Threading.Thread.Sleep(100); if (worker.CancellationPending) { e.Cancel = true; return; } // Plot results dispatcher.BeginInvoke(update, 80, "Generating plots..."); application.PlotSequenceLevelStats(); System.Threading.Thread.Sleep(100); if (worker.CancellationPending) { e.Cancel = true; return; } dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Send, update, 100, "Done sequence-level QC!"); } #endregion #region Quality score-level QC /// Run quality score level QC if (args.CanRunQualityScoreQc) { dispatcher.BeginInvoke(update, 5, "Performing quality score-level QC...analyzing base positions"); application.QualityScoreQc.ContentByPosition(); System.Threading.Thread.Sleep(100); if (worker.CancellationPending) { e.Cancel = true; return; } dispatcher.BeginInvoke(update, 25, "Performing quality score-level QC...analyzing sequences"); application.QualityScoreQc.ContentBySequence(); System.Threading.Thread.Sleep(100); if (worker.CancellationPending) { e.Cancel = true; return; } dispatcher.BeginInvoke(update, 55, "Generating plots (the boxplot will take a while...please be patient!)"); application.PlotQualityScoreLevelStats(); System.Threading.Thread.Sleep(100); if (worker.CancellationPending) { e.Cancel = true; return; } dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Send, update, 100, "Done quality score-level QC!"); } application.WriteInputStatistics(excelFormat: false); application.FinishQualityScoreQC(); #endregion #region BLAST /// Run BLAST if (args.CanRunBlast) { dispatcher.BeginInvoke(update, 10, "Generating temporary FASTA file for BLAST..."); // execute BLAST here.. string targetFasta = application.OutputDirectory + "/" + application.GetPrefix() + ".fa"; BioHelper.ConvertToFASTA(application.ContaminationFinder.TargetSequences, targetFasta, args.BlastArgs.NumInputSequences, false); dispatcher.BeginInvoke(update, 70, "Running BLAST..."); application.ContaminationFinder.RunLocalBlast(args.BlastArgs.Database, targetFasta); dispatcher.BeginInvoke(update, 100, "Finished! Deleting FASTA file..."); File.Delete(targetFasta); } #endregion } } catch (ArgumentNullException ex) { e.Cancel = true; if (worker.CancellationPending) { return; } MessageBox.Show(ex.TargetSite + ": " + ex.Message); } catch (Exception ex) { e.Cancel = true; if (worker.CancellationPending) { return; } MessageBox.Show(ex.TargetSite + ": " + ex.Message); } } }