/// <summary> /// Returns formatter which supports the specified file. /// </summary> /// <param name="fileName">File name for which the formatter is required.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByFile(string fileName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (Helper.IsGenBank(fileName)) { formatter = new GenBankFormatter(); } else if (fileName.EndsWith(Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase)) { formatter = new GffFormatter(); } else if (Helper.IsFasta(fileName)) { formatter = new FastaFormatter(); } else if (Helper.IsFastQ(fileName)) { formatter = new FastQFormatter(); } else { formatter = null; } } return(formatter); }
public void FindFastaFromater() { string dummyFileName = "dummy.fasta"; ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); dummyFileName = "dummy.fa"; formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); dummyFileName = "dummy.mpfa"; formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); dummyFileName = "dummy.fna"; formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); dummyFileName = "dummy.faa"; formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); dummyFileName = "dummy.fsa"; formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); dummyFileName = "dummy.fas"; formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(FastaFormatter), formatter); }
/// <summary> /// Save in one of .NET Bio supported formats like fasta or GenBank. /// </summary> /// <param name="path">Filename.</param> public void SaveAsBio(String path) { ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(path); formatter.Write(this.Sequence); formatter.Close(); }
private void saveButton_Click(object sender, RoutedEventArgs e) { if (sequenceTextBox.Text != null) { InitializeSaveSequenceFileDialog(); if (saveSequenceFileDialog.ShowDialog() == true) { try { Fragment toSave = new Fragment(); toSave.Name = sequenceName; ISequence seq = new Bio.Sequence(Alphabets.DNA, sequence); toSave.Sequence = seq; toSave.Length = sequence.Length; ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(saveSequenceFileDialog.FileName); formatter.Write(seq); formatter.Close(); //toSave.Construct.SaveAsBio(saveSequenceFileDialog.FileName); } catch (Exception ex) { MessageBoxResult result = ModernDialog.ShowMessage(ex.Message, "Exception", MessageBoxButton.OK); } } } }
public void ReturnNoFormatter() { string dummyFileName = "dummy.abc"; ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.AreEqual(formatter, null); }
/// <summary> /// When implemented in a derived class, performs the execution of the activity. /// </summary> /// <param name="context">The execution context under which the activity executes.</param> protected override void Execute(CodeActivityContext context) { string filename = Filename.Get(context); ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(filename); if (formatter == null) { throw new ArgumentException("Could not determine formatter for " + filename); } if (LogOutput) { var tw = context.GetExtension <TextWriter>() ?? Console.Out; tw.WriteLine("Writing sequences to " + filename); } try { foreach (var s in Sequences.Get(context)) { formatter.Format(s); } } finally { formatter.Close(); } }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="formatterName">Name of the formatter to use.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (formatterName == Properties.Resource.FastAName) { formatter = new FastAFormatter(fileName); } else if (formatterName == Properties.Resource.FastQName) { formatter = new FastQFormatter(fileName); } else if (formatterName == Properties.Resource.GENBANK_NAME) { formatter = new GenBankFormatter(fileName); } else if (formatterName == Properties.Resource.GFF_NAME) { formatter = new GffFormatter(fileName); } else { formatter = null; } } return(formatter); }
public void FindGenBankFromatter() { string dummyFileName = "dummy.gb"; ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFile(dummyFileName); Assert.IsInstanceOf(typeof(GenBankFormatter), formatter); }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="formatterName">Name of the formatter to use.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (formatterName == Properties.Resource.FastAName) { formatter = fasta; } else if (formatterName == Properties.Resource.FastQName) { formatter = fastq; } else if (formatterName == Properties.Resource.GENBANK_NAME) { formatter = genBank; } else if (formatterName == Properties.Resource.GFF_NAME) { formatter = gff; } else { // Do a search through the known formatters to pick up custom formatters added through add-in. formatter = All.FirstOrDefault(p => p.Name == formatterName); } } return(formatter); }
/// <summary> /// Returns formatter which supports the specified file. /// </summary> /// <param name="fileName">File name for which the formatter is required.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByFileName(string fileName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (IsFasta(fileName)) { formatter = fasta; } else if (IsFastQ(fileName)) { formatter = fastq; } else if (IsGenBank(fileName)) { formatter = genBank; } else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.OrdinalIgnoreCase)) { formatter = gff; } else { // Do a search through the known formatters to pick up custom formatters added through add-in. string fileExtension = Path.GetExtension(fileName); if (!string.IsNullOrEmpty(fileExtension)) { formatter = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension)); } } } return(formatter); }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="formatterName">Name of the formatter to use.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (formatterName == Properties.Resource.FastAName) { formatter = new FastAFormatter(fileName); } else if (formatterName == Properties.Resource.FastQName) { formatter = new FastQFormatter(fileName); } else if (formatterName == Properties.Resource.GENBANK_NAME) { formatter = new GenBankFormatter(fileName); } else { // Do a search through the known formatters to pick up custom formatters added through add-in. formatter = All.FirstOrDefault(p => p.Name == formatterName); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific formatter was created for // the passed filename - the formatter is opened automatically in the constructor. if (formatter != null) { formatter.Open(fileName); } } } return(formatter); }
/// <summary> /// Constructor for discarding reads based on minimum length. /// </summary> /// <param name="parser">Input sequence parser</param> /// <param name="filtered">Output sequence formatter</param> /// <param name="discarded">Output discarded sequences formatter</param> /// <param name="length">The minimum length that reads must satisfy in /// order to not be discarded.</param> public DiscardByLength(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, long length) : base(parser, filtered, discarded) { if (length <= 0) { throw new ArgumentOutOfRangeException("Minimum length must be > 0."); } minLengthThreshold = length; }
/// <summary> /// If all input parameters are satisfactory, this method is called to /// prepare the parameters for analysis/post-QC filtering /// </summary> /// <param name="sender">Run button element</param> /// <param name="e">Routed event args</param> protected override void PrepareRun(object sender, RoutedEventArgs e) { #region Prepare event arguments for trimming // Build event arguments bool trimFromStart = this.comboItemFromLeft.IsSelected; // Verify input parser bool canAutoParse = base.VerifyInputParser(this.ioControl.Input, this.ioControl.SelectedInputParserType); if (!canAutoParse) { MessageBox.Show(Resource.AUTOPARSE_FAIL); } else { // Verify output sequence formatters ISequenceFormatter filtered = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.OutputFilename); ISequenceFormatter discarded = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.DiscardedFilename); if (filtered == null) { // The program shouldn't reach here throw new ApplicationException(Resource.NonsenseError); } FilterToolArgs args; if (this.IsInByLengthMode) { args = new TrimByLengthArgs(Convert.ToDouble(this.trimLengthValue.Text), trimFromStart, this.ioControl.Input, filtered, discarded, this.ioControl.OutputFilename); } else if (this.IsInByQualityMode) { int minLength = this.trimQualityMinLengthValue.Text.Equals("") ? TrimByQuality.MIN_LENGTH_DEFAULT : Convert.ToInt32(this.trimQualityMinLengthValue.Text); args = new TrimByQualityArgs(Convert.ToByte(this.trimQualityValue.Text), trimFromStart, minLength, this.ioControl.Input, filtered, discarded, this.ioControl.OutputFilename); } else if (this.IsInByRegexMode) { args = new TrimByRegexArgs(this.ioControl.Input, filtered, discarded, this.trimRegexPattern.Text, this.ioControl.OutputFilename); } else { // The program shouldn't reach here either. throw new ApplicationException(Resource.NonsenseError); } this.PrepareToTrim(sender, args); // release files from memory filtered.Dispose(); if (discarded != null) { discarded.Dispose(); } } #endregion }
/// <summary> /// Constructor for trimming sequences based on length or percentage /// </summary> /// <param name="parser">Input sequences parser</param> /// <param name="filtered">Output sequences formatter</param> /// <param name="discarded">Discarded sequences formatter</param> /// <param name="newLength">If > 1, this is the minimum length (rounded to the nearest integer) /// sequences will be trimmed at. If between 0 and 1, this will be treated as /// a percentage and reads are trimmed based on this amount (i.e. if newLength = 0.5, 50% /// of the read will be trimmed.</param> /// <param name="fromStart">Trim from the start of the read</param> public TrimByLength(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, double newLength, bool fromStart) : base(parser, filtered, discarded, fromStart) { if (newLength <= 0) { throw new ArgumentOutOfRangeException("Trim length cannot be less than zero."); } this.TrimLength = newLength; }
/// <summary> /// If all input parameters are satisfactory, this method is called to /// prepare the parameters for analysis/post-QC filtering /// </summary> /// <param name="sender">Run button element</param> /// <param name="e">Routed event args</param> protected override void PrepareRun(object sender, RoutedEventArgs e) { #region Prepare event arguments for discarding // Verify input parser bool canAutoParse = base.VerifyInputParser(this.ioControl.Input, this.ioControl.SelectedInputParserType); if (!canAutoParse) { MessageBox.Show(Resource.AUTOPARSE_FAIL); } else { // Verify output sequence formatters ISequenceFormatter filtered = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.OutputFilename); ISequenceFormatter discarded = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.DiscardedFilename); if (filtered == null) { // The program shouldn't reach here throw new ApplicationException(Resource.NonsenseError); } FilterToolArgs args; if (this.IsInByLengthMode) { args = new DiscardByLengthArgs(this.ioControl.Input, filtered, discarded, Convert.ToInt64(this.discardLengthValue.Text), this.ioControl.OutputFilename); } else if (this.IsInByQualityMode) { args = new DiscardByMeanQualityArgs(this.ioControl.Input, filtered, discarded, Convert.ToByte(this.discardQualityValue), this.ioControl.OutputFilename); } else if (this.IsInByRegexMode) { args = new DiscardByRegexArgs(this.ioControl.Input, filtered, discarded, this.discardRegexPattern.Text, this.ioControl.OutputFilename); } else { // The program shouldn't reach here either. throw new ApplicationException(Resource.NonsenseError); } this.PrepareToDiscard(sender, args); filtered.Dispose(); if (discarded != null) { discarded.Dispose(); } } #endregion }
/// <summary> /// Constructor for discarding reads based on minimum mean quality score. /// /// NOTE: /// Require Sanger Phred-base scores (i.e. ASCII-33) /// </summary> /// <param name="parser">Input sequence parser</param> /// <param name="filtered">Formatter for filtered reads</param> /// <param name="discarded">Formatter for discarded reads</param> /// <param name="mean">Sanger Phred-based mean quality score</param> public DiscardByMeanQuality(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, byte mean) : base(parser, filtered, discarded) { if (!(parser is FastQParser)) { throw new ArgumentException("Invalid SequenceParser type."); } if (mean < 0 || mean > QualitativeSequence.SangerMaxQualScore - QualitativeSequence.SangerMinQualScore) { throw new ArgumentOutOfRangeException("Invalid Phred-based quality score threshold."); } this.MeanQualityThreshold = mean; }
public void ValidateFormatterExceptions(ISequenceFormatter formatter) { try { var formatterTypes = formatter.GetType(); Assert.IsNotNull(formatterTypes); } catch (NullReferenceException exception) { // Log to VSTest GUI. ApplicationLog.WriteLine(string.Format((IFormatProvider) null, "Sequence Formatter P2 : Validated Exception {0} successfully", exception.Message)); } }
static void Main(string[] args) { if (args.Length != 2) { Console.WriteLine("Need source and destination filenames."); return; } string sourceFilename = args[0]; string destFilename = args[1]; ISequenceParser parser = SequenceParsers.FindParserByFileName(sourceFilename); if (parser == null) { parser = SequenceParsers.All.FirstOrDefault( sp => sp.SupportedFileTypes.Contains(Path.GetExtension(sourceFilename))); if (parser == null) { Console.WriteLine("Failed to locate parser for {0}", sourceFilename); return; } parser.Open(sourceFilename); } ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(destFilename); if (formatter == null) { formatter = SequenceFormatters.All.FirstOrDefault( sp => sp.SupportedFileTypes.Contains(Path.GetExtension(destFilename))); if (formatter == null) { Console.WriteLine("Failed to locate formatter for {0}", destFilename); return; } formatter.Open(destFilename); } foreach (var sequence in parser.Parse()) { formatter.Write(sequence); } parser.Close(); formatter.Close(); }
/// <summary> /// Base constructor for discarding sequences /// </summary> /// <param name="parser">SequenceParser for input data</param> /// <param name="filtered">SequenceFormatter for filtered data</param> /// <param name="discarded">SequenceFormatter for discarded data</param> public Discarder(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded) { if (parser == null) { throw new ArgumentNullException("parser"); } if (filtered == null) { throw new ArgumentNullException("filtered"); } this.Sequences = parser.Parse(); this.FilteredWriter = filtered; this.DiscardedWriter = discarded; this.Counted = 0; this.DiscardCount = 0; }
/// <summary> /// convert input file to output file using the specified format conversion /// </summary> public void ConvertFile() { //make sure input file is valid if (!File.Exists(this.InputFile)) { throw new Exception("Input file does not exist."); } //Finds a parser and opens the file ISequenceParser inputParser = SequenceParsers.FindParserByFileName(this.InputFile); if (inputParser == null) { throw new Exception("Input file not a valid file format to parse."); } //Finds a formatter and opens the file ISequenceFormatter outputFormatter = SequenceFormatters.FindFormatterByFileName(this.OutputFile); if (outputFormatter == null) { throw new Exception("Output file not a valid file format for conversion."); } try { foreach (ISequence sequence in inputParser.Parse()) { outputFormatter.Format(sequence); } } catch { throw new OperationCanceledException( string.Format( "Unable to convert sequences from {0} to {1} - verify that the input sequences have the appropriate type of data to convert to the output formatter.", inputParser.Name, outputFormatter.Name)); } finally { outputFormatter.Close(); inputParser.Close(); } }
/// <summary> /// Validates general Sequence Parser. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="IsParser">IsParser is true if testcases is validating Parsers, /// false if formatter validation</param> void ValidateSequenceFileParser(string nodeName, bool IsParser) { // Gets the expected sequence from the Xml string[] filePaths = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.FilePathsNode); string parserDescription = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DescriptionNode); string parserName = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ParserNameNode); string fileTypes = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FileTypesNode); // Get a default parser for the file types. for (int i = 0; i < filePaths.Length; i++) { if (IsParser) { using (ISequenceParser parser = SequenceParsers.FindParserByFileName(filePaths[i])) { string description = parser.Description.Replace("\r", "").Replace("\n", ""); // Validate parser name, description and the file type supported by parser. Assert.AreEqual(parserName, parser.Name); Assert.AreEqual(parserDescription, description); Assert.AreEqual(fileTypes, parser.SupportedFileTypes); } } else { using (ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(filePaths[i])) { string description = formatter.Description.Replace("\r", "").Replace("\n", ""); // Validate parser name, description and the file type supported by parser. Assert.AreEqual(parserName, formatter.Name); Assert.AreEqual(parserDescription, description); Assert.AreEqual(fileTypes, formatter.SupportedFileTypes); } } } Console.WriteLine(string.Format((IFormatProvider)null, "SequenceParser : Type of the parser is validated successfully")); ApplicationLog.WriteLine("Type of the parser is validated successfully"); }
/// <summary> /// Base constructor for trimming sequences /// </summary> /// <param name="parser">SequenceParser for input data</param> /// <param name="filtered">SequenceFormatter for output data</param> /// <param name="fromLeft">Trim from the start of the read</param> public Trimmer(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, bool fromLeft) { if (parser == null) { throw new ArgumentNullException("parser"); } if (filtered == null) { throw new ArgumentNullException("filtered"); } this.Sequences = parser.Parse(); this.FilteredWriter = filtered; this.DiscardedWriter = discarded; this.Counted = 0; this.TrimCount = 0; this.DiscardCount = 0; this.TrimFromStart = fromLeft; }
/// <summary> /// Create a SequenceFormatter object based on the given format type. /// </summary> /// <param name="parserName"></param> /// <param name="outputFilename"></param> /// <returns></returns> internal ISequenceFormatter DetermineSequenceFormatter(string parserName, string outputFilename) { ISequenceFormatter formatter = null; if (outputFilename.Equals("")) { return(null); } if (parserName.Equals(SequenceFormatters.Fasta.Name)) { formatter = new FastAFormatter(outputFilename); } else if (parserName.Equals(SequenceFormatters.FastQ.Name)) { formatter = new FastQFormatter(outputFilename); } return(formatter); }
/// <summary> /// Returns formatter which supports the specified file. /// </summary> /// <param name="fileName">File name for which the formatter is required.</param> /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns> public static ISequenceFormatter FindFormatterByFileName(string fileName) { ISequenceFormatter formatter = null; if (!string.IsNullOrEmpty(fileName)) { if (IsFasta(fileName)) { formatter = new FastAFormatter(fileName); } else if (IsFastQ(fileName)) { formatter = new FastQFormatter(fileName); } else if (IsGenBank(fileName)) { formatter = new GenBankFormatter(fileName); } else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase)) { formatter = new GffFormatter(fileName); } else { // Do a search through the known formatters to pick up custom formatters added through add-in. string fileExtension = Path.GetExtension(fileName); if (!string.IsNullOrEmpty(fileExtension)) { formatter = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension)); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific formatter was created for // the passed filename - the formatter is opened automatically in the constructor. if (formatter != null) { formatter.Open(fileName); } } } } return(formatter); }
/// <summary> /// Trim sequences based on quality /// /// NOTE: /// Currently, the application assumes the FASTQ format is SANGER /// (Illumina 1.8+ is adopting Sanger format anyways). Thus, the /// Phred quality score threshold must be within 0 and 93. /// </summary> /// <param name="parser">Input sequences parser</param> /// <param name="filtered">Output sequences formatter</param> /// <param name="discarded">Discarded sequences formatter</param> /// <param name="q">Sanger Phred-based quality score threshold</param> /// <param name="minLength">Minimum trim length</param> /// <param name="fromStart">Indicates whether trimming from the start of the read is permitted</param> public TrimByQuality(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, byte q, bool fromStart, int minLength = 1) : base(parser, filtered, discarded, fromStart) { if (q < 0 || q > QualitativeSequence.SangerMaxQualScore - QualitativeSequence.SangerMinQualScore) { throw new ArgumentOutOfRangeException("Invalid Phred-based quality score threshold."); } if (minLength < 0) { throw new ArgumentOutOfRangeException("Minimum length cannot be less than zero."); } if (q < 0) { throw new ArgumentOutOfRangeException("Quality score threshold must be greater than zero."); } this.QualityThreshold = q; this.MinLength = minLength; }
/// <summary> /// Gets all registered formatters in core folder and addins (optional) folders. /// </summary> /// <returns>List of registered formatters.</returns> private static IEnumerable <ISequenceFormatter> GetSequenceFormatters() { var registeredFormatters = new List <ISequenceFormatter>(); var implementations = BioRegistrationService.LocateRegisteredParts <ISequenceFormatter>(); foreach (var impl in implementations) { try { ISequenceFormatter formatter = Activator.CreateInstance(impl) as ISequenceFormatter; if (formatter != null) { registeredFormatters.Add(formatter); } } catch { // Cannot create - no default ctor? } } return(registeredFormatters); }
private void save_Click(object sender, RoutedEventArgs e) { if (changeTextBox.Text != null) { InitializeSaveSequenceFileDialog(); if (saveEditSequenceFileDialog.ShowDialog() == true) { try { string sequenceToSave = changeTextBox.Text; ISequence seq = new Bio.Sequence(Alphabets.DNA, sequenceToSave); ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(saveEditSequenceFileDialog.FileName); formatter.Write(seq); formatter.Close(); } catch (Exception ex) { MessageBoxResult result = ModernDialog.ShowMessage(ex.Message, "Exception", MessageBoxButton.OK); } } } }
static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Discarding // Determine parser InputSubmission input = new InputSubmission(myArgs.FileList[0]); input.DetermineParserUtil(); // Create a sequence formatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { filteredFormatter = new FastAFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } } else { filteredFormatter = new FastQFormatter(myArgs.FileList[1]); if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } } // Initialize a Trimmer object Discarder myDiscarder = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.DiscardByLength > 0) { myDiscarder = new DiscardByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.DiscardByLength); } else if (myArgs.DiscardByQuality > 0) { if (!(input.Parser is FastQParser)) { Console.Error.WriteLine("Input file must be in FASTQ format."); Environment.Exit(-1); } myDiscarder = new DiscardByMeanQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.DiscardByQuality); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myDiscarder.DiscardReads(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Discarded {0}/{1} sequences.", myDiscarder.DiscardCount, myDiscarder.Counted); Console.Error.WriteLine("Non-discarded sequences saved in {0}.", Path.GetFullPath(myArgs.FileList[1])); if (myArgs.DiscardedFile != null) { Console.Error.WriteLine("Discarded sequences saved in {0}.", Path.GetFullPath(myArgs.DiscardedFile)); discardedFormatter.Close(); } Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }
/// <summary> /// Method called when the user clicks Ok button on InputSelectionDialog. /// Takes care of parsing the selections and returning the result to the user. /// In case there was an error parsing, it will show the input selection dialog again with the sequence highlighted. /// </summary> /// <param name="selectionDialog">InputSequenceDialog object which raised this event</param> private void OnExportSequenceDialogSubmit(ISelectionDialog dialog) { ExportSelectionDialog selectionDialog = dialog as ExportSelectionDialog; List <ISequence> parsedSequences = new List <ISequence>(); List <Range> rangesInCurrentSequenceItem; List <InputSequenceItem> sequenceItems = selectionDialog.GetSequences(); ISequenceFormatter formatterUsed = argsForCallback[0] as ISequenceFormatter; try { foreach (InputSequenceItem currentSequenceItem in sequenceItems) { try { ISequence sequenceForCurrentItem = null; // Parse sequence if (formatterUsed is GffFormatter && string.IsNullOrWhiteSpace(currentSequenceItem.SequenceAddress)) { sequenceForCurrentItem = new Sequence(Alphabets.DNA, ""); } else { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.SequenceAddress); if (rangesInCurrentSequenceItem.Count > 0) { // get from cache with default UI options. sequenceForCurrentItem = SequenceCache.TryGetSequence(rangesInCurrentSequenceItem, selectionDialog.InputParamsAsKey) as ISequence; if (sequenceForCurrentItem == null) // if not in cache { sequenceForCurrentItem = ExcelSelectionParser.RangeToSequence(rangesInCurrentSequenceItem, selectionDialog.TreatBlankCellsAsGaps, selectionDialog.MoleculeType, currentSequenceItem.SequenceName); //added default from UI as auto detect and ignore space SequenceCache.Add(rangesInCurrentSequenceItem, sequenceForCurrentItem, selectionDialog.InputParamsAsKey); } else { // Set the ID sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, currentSequenceItem.SequenceName); } } else { currentSequenceItem.SetErrorStatus(false); } } //Parse metadata if (formatterUsed is Bio.IO.FastQ.FastQFormatter) { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress); if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null) { sequenceForCurrentItem = ExcelSelectionParser.RangeToQualitativeSequence(rangesInCurrentSequenceItem, sequenceForCurrentItem); } } else if (formatterUsed is GenBankFormatter) { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress); if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null) { try { GenBankMetadata metadata = ExcelSelectionParser.RangeToGenBankMetadata(rangesInCurrentSequenceItem); sequenceForCurrentItem.Metadata[Helper.GenBankMetadataKey] = metadata; if (string.IsNullOrEmpty(sequenceForCurrentItem.ID)) { // Set the ID sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, metadata.Locus.Name); } } catch { throw new Exception(Properties.Resources.GenbankMetadataParseError); } } } else if (formatterUsed is GffFormatter) { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress); if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null) { ExcelSelectionParser.RangeToGffMetadata(sequenceForCurrentItem, rangesInCurrentSequenceItem); } } // Add the parsed sequence to the list of parsed sequences parsedSequences.Add(sequenceForCurrentItem); } catch { // Set error status on item and re-throw currentSequenceItem.SetErrorStatus(true); throw; } } // On successful parsing... if (inputSequenceSelectionComplete != null) { inputSequenceSelectionComplete(parsedSequences, this.argsForCallback); } selectionDialog.InputSelectionDialogSubmitting -= OnInputSequenceDialogSubmit; selectionDialog.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Error); selectionDialog.ShowDialog(); } }
/// <summary> /// Constructor for holding Trim By Quality event arguments /// </summary> /// <param name="q"></param> /// <param name="trimFromStart"></param> /// <param name="minLength"></param> /// <param name="input"></param> /// <param name="filtered"></param> /// <param name="discarded"></param> /// <param name="outFile"></param> public TrimByQualityArgs(byte q, bool trimFromStart, int minLength, InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, string outFile) : base(input, outFile) { trimmer = new TrimByQuality(input.Parser, filtered, discarded, q, trimFromStart, minLength); }
/// <summary> /// Constructor for DiscardByMeanQuality Event Args /// </summary> /// <param name="input">Input information</param> /// <param name="filtered">Output sequence formatter</param> /// <param name="discarded">Discarded reads sequence formatter</param> /// <param name="mean">Minimum mean quality threshold</param> /// <param name="outFile">Output filename</param> public DiscardByMeanQualityArgs(InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, byte mean, string outFile) : base(input, outFile) { discarder = new DiscardByMeanQuality(input.Parser, filtered, discarded, mean); }
public static void ConvertFromOneFormatToAnother(string inputFileName, string outputFileName, ISequenceFormatter targetFormatter) { var parser = SequenceParsers.FindParserByFileName(inputFileName); var sequenceList = parser.Parse(); var sequences = Helper.ConvertIenumerableToList(sequenceList); targetFormatter.Open(outputFileName); foreach (var sequence in sequences) { targetFormatter.Write(sequence); } targetFormatter.Close(); targetFormatter.Dispose(); }