Exemplo n.º 1
0
        /// <summary>
        /// Returns formatter which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the formatter is required.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByFile(string fileName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (Helper.IsGenBank(fileName))
                {
                    formatter = new GenBankFormatter();
                }
                else if (fileName.EndsWith(Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase))
                {
                    formatter = new GffFormatter();
                }
                else if (Helper.IsFasta(fileName))
                {
                    formatter = new FastaFormatter();
                }
                else if (Helper.IsFastQ(fileName))
                {
                    formatter = new FastQFormatter();
                }
                else
                {
                    formatter = null;
                }
            }

            return(formatter);
        }
Exemplo n.º 2
0
        public void FindFastaFromater()
        {
            string             dummyFileName = "dummy.fasta";
            ISequenceFormatter formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);

            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);

            dummyFileName = "dummy.fa";
            formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);
            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);

            dummyFileName = "dummy.mpfa";
            formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);
            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);

            dummyFileName = "dummy.fna";
            formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);
            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);

            dummyFileName = "dummy.faa";
            formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);
            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);

            dummyFileName = "dummy.fsa";
            formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);
            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);

            dummyFileName = "dummy.fas";
            formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);
            Assert.IsInstanceOf(typeof(FastaFormatter), formatter);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Save in one of .NET Bio supported formats like fasta or GenBank.
        /// </summary>
        /// <param name="path">Filename.</param>
        public void SaveAsBio(String path)
        {
            ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(path);

            formatter.Write(this.Sequence);
            formatter.Close();
        }
Exemplo n.º 4
0
        private void saveButton_Click(object sender, RoutedEventArgs e)
        {
            if (sequenceTextBox.Text != null)
            {
                InitializeSaveSequenceFileDialog();

                if (saveSequenceFileDialog.ShowDialog() == true)
                {
                    try
                    {
                        Fragment toSave = new Fragment();
                        toSave.Name = sequenceName;
                        ISequence seq = new Bio.Sequence(Alphabets.DNA, sequence);
                        toSave.Sequence = seq;
                        toSave.Length   = sequence.Length;
                        ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(saveSequenceFileDialog.FileName);
                        formatter.Write(seq);
                        formatter.Close();
                        //toSave.Construct.SaveAsBio(saveSequenceFileDialog.FileName);
                    }
                    catch (Exception ex)
                    {
                        MessageBoxResult result = ModernDialog.ShowMessage(ex.Message, "Exception", MessageBoxButton.OK);
                    }
                }
            }
        }
Exemplo n.º 5
0
        public void ReturnNoFormatter()
        {
            string             dummyFileName = "dummy.abc";
            ISequenceFormatter formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);

            Assert.AreEqual(formatter, null);
        }
Exemplo n.º 6
0
        /// <summary>
        /// When implemented in a derived class, performs the execution of the activity.
        /// </summary>
        /// <param name="context">The execution context under which the activity executes.</param>
        protected override void Execute(CodeActivityContext context)
        {
            string             filename  = Filename.Get(context);
            ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(filename);

            if (formatter == null)
            {
                throw new ArgumentException("Could not determine formatter for " + filename);
            }

            if (LogOutput)
            {
                var tw = context.GetExtension <TextWriter>() ?? Console.Out;
                tw.WriteLine("Writing sequences to " + filename);
            }

            try
            {
                foreach (var s in Sequences.Get(context))
                {
                    formatter.Format(s);
                }
            }
            finally
            {
                formatter.Close();
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// Returns parser which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <param name="formatterName">Name of the formatter to use.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (formatterName == Properties.Resource.FastAName)
                {
                    formatter = new FastAFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.FastQName)
                {
                    formatter = new FastQFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.GENBANK_NAME)
                {
                    formatter = new GenBankFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.GFF_NAME)
                {
                    formatter = new GffFormatter(fileName);
                }
                else
                {
                    formatter = null;
                }
            }

            return(formatter);
        }
Exemplo n.º 8
0
        public void FindGenBankFromatter()
        {
            string             dummyFileName = "dummy.gb";
            ISequenceFormatter formatter     = SequenceFormatters.FindFormatterByFile(dummyFileName);

            Assert.IsInstanceOf(typeof(GenBankFormatter), formatter);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Returns parser which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <param name="formatterName">Name of the formatter to use.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (formatterName == Properties.Resource.FastAName)
                {
                    formatter = fasta;
                }
                else if (formatterName == Properties.Resource.FastQName)
                {
                    formatter = fastq;
                }
                else if (formatterName == Properties.Resource.GENBANK_NAME)
                {
                    formatter = genBank;
                }
                else if (formatterName == Properties.Resource.GFF_NAME)
                {
                    formatter = gff;
                }
                else
                {
                    // Do a search through the known formatters to pick up custom formatters added through add-in.
                    formatter = All.FirstOrDefault(p => p.Name == formatterName);
                }
            }

            return(formatter);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Returns formatter which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the formatter is required.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByFileName(string fileName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (IsFasta(fileName))
                {
                    formatter = fasta;
                }
                else if (IsFastQ(fileName))
                {
                    formatter = fastq;
                }
                else if (IsGenBank(fileName))
                {
                    formatter = genBank;
                }
                else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.OrdinalIgnoreCase))
                {
                    formatter = gff;
                }
                else
                {
                    // Do a search through the known formatters to pick up custom formatters added through add-in.
                    string fileExtension = Path.GetExtension(fileName);
                    if (!string.IsNullOrEmpty(fileExtension))
                    {
                        formatter = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension));
                    }
                }
            }

            return(formatter);
        }
        /// <summary>
        /// Returns parser which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <param name="formatterName">Name of the formatter to use.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (formatterName == Properties.Resource.FastAName)
                {
                    formatter = new FastAFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.FastQName)
                {
                    formatter = new FastQFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.GENBANK_NAME)
                {
                    formatter = new GenBankFormatter(fileName);
                }
                else
                {
                    // Do a search through the known formatters to pick up custom formatters added through add-in.
                    formatter = All.FirstOrDefault(p => p.Name == formatterName);
                    // If we found a match based on extension, then open the file - this
                    // matches the above behavior where a specific formatter was created for
                    // the passed filename - the formatter is opened automatically in the constructor.
                    if (formatter != null)
                    {
                        formatter.Open(fileName);
                    }
                }
            }

            return(formatter);
        }
Exemplo n.º 12
0
 /// <summary>
 /// Constructor for discarding reads based on minimum length.
 /// </summary>
 /// <param name="parser">Input sequence parser</param>
 /// <param name="filtered">Output sequence formatter</param>
 /// <param name="discarded">Output discarded sequences formatter</param>
 /// <param name="length">The minimum length that reads must satisfy in
 /// order to not be discarded.</param>
 public DiscardByLength(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, long length)
     : base(parser, filtered, discarded)
 {
     if (length <= 0)
     {
         throw new ArgumentOutOfRangeException("Minimum length must be > 0.");
     }
     minLengthThreshold = length;
 }
Exemplo n.º 13
0
        /// <summary>
        /// If all input parameters are satisfactory, this method is called to
        /// prepare the parameters for analysis/post-QC filtering
        /// </summary>
        /// <param name="sender">Run button element</param>
        /// <param name="e">Routed event args</param>
        protected override void PrepareRun(object sender, RoutedEventArgs e)
        {
            #region Prepare event arguments for trimming

            // Build event arguments
            bool trimFromStart = this.comboItemFromLeft.IsSelected;

            // Verify input parser
            bool canAutoParse = base.VerifyInputParser(this.ioControl.Input, this.ioControl.SelectedInputParserType);
            if (!canAutoParse)
            {
                MessageBox.Show(Resource.AUTOPARSE_FAIL);
            }
            else
            {
                // Verify output sequence formatters
                ISequenceFormatter filtered  = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.OutputFilename);
                ISequenceFormatter discarded = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.DiscardedFilename);
                if (filtered == null)
                {
                    // The program shouldn't reach here
                    throw new ApplicationException(Resource.NonsenseError);
                }

                FilterToolArgs args;

                if (this.IsInByLengthMode)
                {
                    args = new TrimByLengthArgs(Convert.ToDouble(this.trimLengthValue.Text), trimFromStart, this.ioControl.Input, filtered, discarded, this.ioControl.OutputFilename);
                }
                else if (this.IsInByQualityMode)
                {
                    int minLength = this.trimQualityMinLengthValue.Text.Equals("") ? TrimByQuality.MIN_LENGTH_DEFAULT : Convert.ToInt32(this.trimQualityMinLengthValue.Text);
                    args = new TrimByQualityArgs(Convert.ToByte(this.trimQualityValue.Text), trimFromStart, minLength, this.ioControl.Input, filtered, discarded, this.ioControl.OutputFilename);
                }
                else if (this.IsInByRegexMode)
                {
                    args = new TrimByRegexArgs(this.ioControl.Input, filtered, discarded, this.trimRegexPattern.Text, this.ioControl.OutputFilename);
                }
                else
                {
                    // The program shouldn't reach here either.
                    throw new ApplicationException(Resource.NonsenseError);
                }

                this.PrepareToTrim(sender, args);

                // release files from memory
                filtered.Dispose();
                if (discarded != null)
                {
                    discarded.Dispose();
                }
            }
            #endregion
        }
Exemplo n.º 14
0
        /// <summary>
        /// Constructor for trimming sequences based on length or percentage
        /// </summary>
        /// <param name="parser">Input sequences parser</param>
        /// <param name="filtered">Output sequences formatter</param>
        /// <param name="discarded">Discarded sequences formatter</param>
        /// <param name="newLength">If > 1, this is the minimum length (rounded to the nearest integer)
        /// sequences will be trimmed at. If between 0 and 1, this will be treated as
        /// a percentage and reads are trimmed based on this amount (i.e. if newLength = 0.5, 50%
        /// of the read will be trimmed.</param>
        /// <param name="fromStart">Trim from the start of the read</param>
        public TrimByLength(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, double newLength, bool fromStart)
            : base(parser, filtered, discarded, fromStart)
        {
            if (newLength <= 0)
            {
                throw new ArgumentOutOfRangeException("Trim length cannot be less than zero.");
            }

            this.TrimLength = newLength;
        }
Exemplo n.º 15
0
        /// <summary>
        /// If all input parameters are satisfactory, this method is called to
        /// prepare the parameters for analysis/post-QC filtering
        /// </summary>
        /// <param name="sender">Run button element</param>
        /// <param name="e">Routed event args</param>
        protected override void PrepareRun(object sender, RoutedEventArgs e)
        {
            #region Prepare event arguments for discarding

            // Verify input parser
            bool canAutoParse = base.VerifyInputParser(this.ioControl.Input, this.ioControl.SelectedInputParserType);
            if (!canAutoParse)
            {
                MessageBox.Show(Resource.AUTOPARSE_FAIL);
            }
            else
            {
                // Verify output sequence formatters
                ISequenceFormatter filtered  = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.OutputFilename);
                ISequenceFormatter discarded = DetermineSequenceFormatter(this.ioControl.SelectedOutputParserType, this.ioControl.DiscardedFilename);
                if (filtered == null)
                {
                    // The program shouldn't reach here
                    throw new ApplicationException(Resource.NonsenseError);
                }

                FilterToolArgs args;

                if (this.IsInByLengthMode)
                {
                    args = new DiscardByLengthArgs(this.ioControl.Input, filtered, discarded, Convert.ToInt64(this.discardLengthValue.Text), this.ioControl.OutputFilename);
                }
                else if (this.IsInByQualityMode)
                {
                    args = new DiscardByMeanQualityArgs(this.ioControl.Input, filtered, discarded, Convert.ToByte(this.discardQualityValue), this.ioControl.OutputFilename);
                }
                else if (this.IsInByRegexMode)
                {
                    args = new DiscardByRegexArgs(this.ioControl.Input, filtered, discarded, this.discardRegexPattern.Text, this.ioControl.OutputFilename);
                }
                else
                {
                    // The program shouldn't reach here either.
                    throw new ApplicationException(Resource.NonsenseError);
                }

                this.PrepareToDiscard(sender, args);

                filtered.Dispose();
                if (discarded != null)
                {
                    discarded.Dispose();
                }
            }

            #endregion
        }
Exemplo n.º 16
0
        /// <summary>
        /// Constructor for discarding reads based on minimum mean quality score.
        ///
        /// NOTE:
        /// Require Sanger Phred-base scores (i.e. ASCII-33)
        /// </summary>
        /// <param name="parser">Input sequence parser</param>
        /// <param name="filtered">Formatter for filtered reads</param>
        /// <param name="discarded">Formatter for discarded reads</param>
        /// <param name="mean">Sanger Phred-based mean quality score</param>
        public DiscardByMeanQuality(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, byte mean)
            : base(parser, filtered, discarded)
        {
            if (!(parser is FastQParser))
            {
                throw new ArgumentException("Invalid SequenceParser type.");
            }
            if (mean < 0 || mean > QualitativeSequence.SangerMaxQualScore - QualitativeSequence.SangerMinQualScore)
            {
                throw new ArgumentOutOfRangeException("Invalid Phred-based quality score threshold.");
            }

            this.MeanQualityThreshold = mean;
        }
 public void ValidateFormatterExceptions(ISequenceFormatter formatter)
 {
     try
     {
         var formatterTypes = formatter.GetType();
         Assert.IsNotNull(formatterTypes);
     }
     catch (NullReferenceException exception)
     {
         // Log to VSTest GUI.
         ApplicationLog.WriteLine(string.Format((IFormatProvider) null,
                                                "Sequence Formatter P2 : Validated Exception {0} successfully",
                                                exception.Message));
     }
 }
        static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine("Need source and destination filenames.");
                return;
            }

            string sourceFilename = args[0];
            string destFilename   = args[1];

            ISequenceParser parser = SequenceParsers.FindParserByFileName(sourceFilename);

            if (parser == null)
            {
                parser = SequenceParsers.All.FirstOrDefault(
                    sp => sp.SupportedFileTypes.Contains(Path.GetExtension(sourceFilename)));
                if (parser == null)
                {
                    Console.WriteLine("Failed to locate parser for {0}", sourceFilename);
                    return;
                }
                parser.Open(sourceFilename);
            }

            ISequenceFormatter formatter = SequenceFormatters.FindFormatterByFileName(destFilename);

            if (formatter == null)
            {
                formatter = SequenceFormatters.All.FirstOrDefault(
                    sp => sp.SupportedFileTypes.Contains(Path.GetExtension(destFilename)));
                if (formatter == null)
                {
                    Console.WriteLine("Failed to locate formatter for {0}", destFilename);
                    return;
                }
                formatter.Open(destFilename);
            }

            foreach (var sequence in parser.Parse())
            {
                formatter.Write(sequence);
            }

            parser.Close();
            formatter.Close();
        }
Exemplo n.º 19
0
        /// <summary>
        /// Base constructor for discarding sequences
        /// </summary>
        /// <param name="parser">SequenceParser for input data</param>
        /// <param name="filtered">SequenceFormatter for filtered data</param>
        /// <param name="discarded">SequenceFormatter for discarded data</param>
        public Discarder(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded)
        {
            if (parser == null)
            {
                throw new ArgumentNullException("parser");
            }
            if (filtered == null)
            {
                throw new ArgumentNullException("filtered");
            }

            this.Sequences       = parser.Parse();
            this.FilteredWriter  = filtered;
            this.DiscardedWriter = discarded;
            this.Counted         = 0;
            this.DiscardCount    = 0;
        }
Exemplo n.º 20
0
        /// <summary>
        /// convert input file to output file using the specified format conversion
        /// </summary>
        public void ConvertFile()
        {
            //make sure input file is valid
            if (!File.Exists(this.InputFile))
            {
                throw new Exception("Input file does not exist.");
            }

            //Finds a parser and opens the file
            ISequenceParser inputParser = SequenceParsers.FindParserByFileName(this.InputFile);

            if (inputParser == null)
            {
                throw new Exception("Input file not a valid file format to parse.");
            }

            //Finds a formatter and opens the file
            ISequenceFormatter outputFormatter = SequenceFormatters.FindFormatterByFileName(this.OutputFile);

            if (outputFormatter == null)
            {
                throw new Exception("Output file not a valid file format for conversion.");
            }

            try
            {
                foreach (ISequence sequence in inputParser.Parse())
                {
                    outputFormatter.Format(sequence);
                }
            }
            catch
            {
                throw new OperationCanceledException(
                          string.Format(
                              "Unable to convert sequences from {0} to {1} - verify that the input sequences have the appropriate type of data to convert to the output formatter.",
                              inputParser.Name,
                              outputFormatter.Name));
            }
            finally
            {
                outputFormatter.Close();
                inputParser.Close();
            }
        }
Exemplo n.º 21
0
        /// <summary>
        /// Validates general Sequence Parser.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="IsParser">IsParser is true if testcases is validating Parsers,
        /// false if formatter validation</param>
        void ValidateSequenceFileParser(string nodeName, bool IsParser)
        {
            // Gets the expected sequence from the Xml
            string[] filePaths = utilityObj.xmlUtil.GetTextValues(nodeName,
                                                                  Constants.FilePathsNode);
            string parserDescription = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                       Constants.DescriptionNode);
            string parserName = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                Constants.ParserNameNode);
            string fileTypes = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                               Constants.FileTypesNode);

            // Get a default parser for the file types.
            for (int i = 0; i < filePaths.Length; i++)
            {
                if (IsParser)
                {
                    using (ISequenceParser parser = SequenceParsers.FindParserByFileName(filePaths[i]))
                    {
                        string description = parser.Description.Replace("\r", "").Replace("\n", "");
                        // Validate parser name, description and the file type supported by parser.
                        Assert.AreEqual(parserName, parser.Name);
                        Assert.AreEqual(parserDescription, description);
                        Assert.AreEqual(fileTypes, parser.SupportedFileTypes);
                    }
                }
                else
                {
                    using (ISequenceFormatter formatter =
                               SequenceFormatters.FindFormatterByFileName(filePaths[i]))
                    {
                        string description =
                            formatter.Description.Replace("\r", "").Replace("\n", "");
                        // Validate parser name, description and the file type supported by parser.
                        Assert.AreEqual(parserName, formatter.Name);
                        Assert.AreEqual(parserDescription, description);
                        Assert.AreEqual(fileTypes, formatter.SupportedFileTypes);
                    }
                }
            }

            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "SequenceParser : Type of the parser is validated successfully"));
            ApplicationLog.WriteLine("Type of the parser is validated successfully");
        }
Exemplo n.º 22
0
        /// <summary>
        /// Base constructor for trimming sequences
        /// </summary>
        /// <param name="parser">SequenceParser for input data</param>
        /// <param name="filtered">SequenceFormatter for output data</param>
        /// <param name="fromLeft">Trim from the start of the read</param>
        public Trimmer(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, bool fromLeft)
        {
            if (parser == null)
            {
                throw new ArgumentNullException("parser");
            }
            if (filtered == null)
            {
                throw new ArgumentNullException("filtered");
            }

            this.Sequences       = parser.Parse();
            this.FilteredWriter  = filtered;
            this.DiscardedWriter = discarded;
            this.Counted         = 0;
            this.TrimCount       = 0;
            this.DiscardCount    = 0;
            this.TrimFromStart   = fromLeft;
        }
Exemplo n.º 23
0
        /// <summary>
        /// Create a SequenceFormatter object based on the given format type.
        /// </summary>
        /// <param name="parserName"></param>
        /// <param name="outputFilename"></param>
        /// <returns></returns>
        internal ISequenceFormatter DetermineSequenceFormatter(string parserName, string outputFilename)
        {
            ISequenceFormatter formatter = null;

            if (outputFilename.Equals(""))
            {
                return(null);
            }
            if (parserName.Equals(SequenceFormatters.Fasta.Name))
            {
                formatter = new FastAFormatter(outputFilename);
            }
            else if (parserName.Equals(SequenceFormatters.FastQ.Name))
            {
                formatter = new FastQFormatter(outputFilename);
            }

            return(formatter);
        }
Exemplo n.º 24
0
        /// <summary>
        /// Returns formatter which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the formatter is required.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByFileName(string fileName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (IsFasta(fileName))
                {
                    formatter = new FastAFormatter(fileName);
                }
                else if (IsFastQ(fileName))
                {
                    formatter = new FastQFormatter(fileName);
                }
                else if (IsGenBank(fileName))
                {
                    formatter = new GenBankFormatter(fileName);
                }
                else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase))
                {
                    formatter = new GffFormatter(fileName);
                }
                else
                {
                    // Do a search through the known formatters to pick up custom formatters added through add-in.
                    string fileExtension = Path.GetExtension(fileName);
                    if (!string.IsNullOrEmpty(fileExtension))
                    {
                        formatter = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension));
                        // If we found a match based on extension, then open the file - this
                        // matches the above behavior where a specific formatter was created for
                        // the passed filename - the formatter is opened automatically in the constructor.
                        if (formatter != null)
                        {
                            formatter.Open(fileName);
                        }
                    }
                }
            }

            return(formatter);
        }
Exemplo n.º 25
0
        /// <summary>
        /// Trim sequences based on quality
        ///
        /// NOTE:
        /// Currently, the application assumes the FASTQ format is SANGER
        /// (Illumina 1.8+ is adopting Sanger format anyways). Thus, the
        /// Phred quality score threshold must be within 0 and 93.
        /// </summary>
        /// <param name="parser">Input sequences parser</param>
        /// <param name="filtered">Output sequences formatter</param>
        /// <param name="discarded">Discarded sequences formatter</param>
        /// <param name="q">Sanger Phred-based quality score threshold</param>
        /// <param name="minLength">Minimum trim length</param>
        /// <param name="fromStart">Indicates whether trimming from the start of the read is permitted</param>
        public TrimByQuality(ISequenceParser parser, ISequenceFormatter filtered, ISequenceFormatter discarded, byte q, bool fromStart, int minLength = 1)
            : base(parser, filtered, discarded, fromStart)
        {
            if (q < 0 || q > QualitativeSequence.SangerMaxQualScore - QualitativeSequence.SangerMinQualScore)
            {
                throw new ArgumentOutOfRangeException("Invalid Phred-based quality score threshold.");
            }

            if (minLength < 0)
            {
                throw new ArgumentOutOfRangeException("Minimum length cannot be less than zero.");
            }

            if (q < 0)
            {
                throw new ArgumentOutOfRangeException("Quality score threshold must be greater than zero.");
            }

            this.QualityThreshold = q;
            this.MinLength        = minLength;
        }
Exemplo n.º 26
0
        /// <summary>
        /// Gets all registered formatters in core folder and addins (optional) folders.
        /// </summary>
        /// <returns>List of registered formatters.</returns>
        private static IEnumerable <ISequenceFormatter> GetSequenceFormatters()
        {
            var registeredFormatters = new List <ISequenceFormatter>();
            var implementations      = BioRegistrationService.LocateRegisteredParts <ISequenceFormatter>();

            foreach (var impl in implementations)
            {
                try
                {
                    ISequenceFormatter formatter = Activator.CreateInstance(impl) as ISequenceFormatter;
                    if (formatter != null)
                    {
                        registeredFormatters.Add(formatter);
                    }
                }
                catch
                {
                    // Cannot create - no default ctor?
                }
            }

            return(registeredFormatters);
        }
        private void save_Click(object sender, RoutedEventArgs e)
        {
            if (changeTextBox.Text != null)
            {
                InitializeSaveSequenceFileDialog();

                if (saveEditSequenceFileDialog.ShowDialog() == true)
                {
                    try
                    {
                        string             sequenceToSave = changeTextBox.Text;
                        ISequence          seq            = new Bio.Sequence(Alphabets.DNA, sequenceToSave);
                        ISequenceFormatter formatter      = SequenceFormatters.FindFormatterByFileName(saveEditSequenceFileDialog.FileName);
                        formatter.Write(seq);
                        formatter.Close();
                    }
                    catch (Exception ex)
                    {
                        MessageBoxResult result = ModernDialog.ShowMessage(ex.Message, "Exception", MessageBoxButton.OK);
                    }
                }
            }
        }
Exemplo n.º 28
0
        static void Main(string[] args)
        {
            Console.Error.WriteLine(SplashString());
            CommandLineOptions myArgs = ProcessCommandLine(args);

            #region Discarding
            // Determine parser
            InputSubmission input = new InputSubmission(myArgs.FileList[0]);
            input.DetermineParserUtil();

            // Create a sequence formatter object
            ISequenceFormatter filteredFormatter;
            ISequenceFormatter discardedFormatter = null;

            // If the format is FASTA, then output will be FASTA.
            // Everything else (assuming quality scores are available)
            // will be outputted to FASTQ.
            if (input.Parser is FastAParser)
            {
                filteredFormatter = new FastAFormatter(myArgs.FileList[1]);

                if (myArgs.DiscardedFile != null)
                {
                    discardedFormatter = new FastAFormatter(myArgs.DiscardedFile);
                }
            }
            else
            {
                filteredFormatter = new FastQFormatter(myArgs.FileList[1]);

                if (myArgs.DiscardedFile != null)
                {
                    discardedFormatter = new FastQFormatter(myArgs.DiscardedFile);
                }
            }

            // Initialize a Trimmer object
            Discarder myDiscarder = null;

            // By now, we should have sanity checked the command line arguments. So we should be able to
            // figure out what mode is being used simply by checking the properties.
            if (myArgs.DiscardByLength > 0)
            {
                myDiscarder = new DiscardByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.DiscardByLength);
            }

            else if (myArgs.DiscardByQuality > 0)
            {
                if (!(input.Parser is FastQParser))
                {
                    Console.Error.WriteLine("Input file must be in FASTQ format.");
                    Environment.Exit(-1);
                }

                myDiscarder = new DiscardByMeanQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.DiscardByQuality);
            }

            else
            {
                // Should never reach this line.
                Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'.");
                Environment.Exit(-1);
            }

            myDiscarder.DiscardReads();

            #endregion

            if (myArgs.Verbose)
            {
                Console.Error.WriteLine("Discarded {0}/{1} sequences.", myDiscarder.DiscardCount, myDiscarder.Counted);
                Console.Error.WriteLine("Non-discarded sequences saved in {0}.", Path.GetFullPath(myArgs.FileList[1]));
                if (myArgs.DiscardedFile != null)
                {
                    Console.Error.WriteLine("Discarded sequences saved in {0}.", Path.GetFullPath(myArgs.DiscardedFile));
                    discardedFormatter.Close();
                }
                Console.Error.WriteLine("Warning: Output may not be in the same order as the original input.");
            }
            input.Parser.Close();
            filteredFormatter.Close();
            if (discardedFormatter != null)
            {
                discardedFormatter.Close();
            }
        }
Exemplo n.º 29
0
        /// <summary>
        /// Method called when the user clicks Ok button on InputSelectionDialog.
        /// Takes care of parsing the selections and returning the result to the user.
        /// In case there was an error parsing, it will show the input selection dialog again with the sequence highlighted.
        /// </summary>
        /// <param name="selectionDialog">InputSequenceDialog object which raised this event</param>
        private void OnExportSequenceDialogSubmit(ISelectionDialog dialog)
        {
            ExportSelectionDialog    selectionDialog = dialog as ExportSelectionDialog;
            List <ISequence>         parsedSequences = new List <ISequence>();
            List <Range>             rangesInCurrentSequenceItem;
            List <InputSequenceItem> sequenceItems = selectionDialog.GetSequences();
            ISequenceFormatter       formatterUsed = argsForCallback[0] as ISequenceFormatter;

            try
            {
                foreach (InputSequenceItem currentSequenceItem in sequenceItems)
                {
                    try
                    {
                        ISequence sequenceForCurrentItem = null;

                        // Parse sequence
                        if (formatterUsed is GffFormatter && string.IsNullOrWhiteSpace(currentSequenceItem.SequenceAddress))
                        {
                            sequenceForCurrentItem = new Sequence(Alphabets.DNA, "");
                        }
                        else
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.SequenceAddress);

                            if (rangesInCurrentSequenceItem.Count > 0)
                            {
                                // get from cache with default UI options.
                                sequenceForCurrentItem = SequenceCache.TryGetSequence(rangesInCurrentSequenceItem, selectionDialog.InputParamsAsKey) as ISequence;
                                if (sequenceForCurrentItem == null) // if not in cache
                                {
                                    sequenceForCurrentItem = ExcelSelectionParser.RangeToSequence(rangesInCurrentSequenceItem, selectionDialog.TreatBlankCellsAsGaps, selectionDialog.MoleculeType, currentSequenceItem.SequenceName);
                                    //added default from UI as auto detect and ignore space
                                    SequenceCache.Add(rangesInCurrentSequenceItem, sequenceForCurrentItem, selectionDialog.InputParamsAsKey);
                                }
                                else
                                {
                                    // Set the ID
                                    sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, currentSequenceItem.SequenceName);
                                }
                            }
                            else
                            {
                                currentSequenceItem.SetErrorStatus(false);
                            }
                        }
                        //Parse metadata
                        if (formatterUsed is Bio.IO.FastQ.FastQFormatter)
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress);
                            if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null)
                            {
                                sequenceForCurrentItem = ExcelSelectionParser.RangeToQualitativeSequence(rangesInCurrentSequenceItem, sequenceForCurrentItem);
                            }
                        }
                        else if (formatterUsed is GenBankFormatter)
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress);
                            if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null)
                            {
                                try
                                {
                                    GenBankMetadata metadata = ExcelSelectionParser.RangeToGenBankMetadata(rangesInCurrentSequenceItem);
                                    sequenceForCurrentItem.Metadata[Helper.GenBankMetadataKey] = metadata;
                                    if (string.IsNullOrEmpty(sequenceForCurrentItem.ID))
                                    {
                                        // Set the ID
                                        sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, metadata.Locus.Name);
                                    }
                                }
                                catch
                                {
                                    throw new Exception(Properties.Resources.GenbankMetadataParseError);
                                }
                            }
                        }
                        else if (formatterUsed is GffFormatter)
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress);
                            if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null)
                            {
                                ExcelSelectionParser.RangeToGffMetadata(sequenceForCurrentItem, rangesInCurrentSequenceItem);
                            }
                        }

                        // Add the parsed sequence to the list of parsed sequences
                        parsedSequences.Add(sequenceForCurrentItem);
                    }
                    catch
                    {
                        // Set error status on item and re-throw
                        currentSequenceItem.SetErrorStatus(true);
                        throw;
                    }
                }

                // On successful parsing...
                if (inputSequenceSelectionComplete != null)
                {
                    inputSequenceSelectionComplete(parsedSequences, this.argsForCallback);
                }
                selectionDialog.InputSelectionDialogSubmitting -= OnInputSequenceDialogSubmit;
                selectionDialog.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Error);
                selectionDialog.ShowDialog();
            }
        }
Exemplo n.º 30
0
 /// <summary>
 /// Constructor for holding Trim By Quality event arguments
 /// </summary>
 /// <param name="q"></param>
 /// <param name="trimFromStart"></param>
 /// <param name="minLength"></param>
 /// <param name="input"></param>
 /// <param name="filtered"></param>
 /// <param name="discarded"></param>
 /// <param name="outFile"></param>
 public TrimByQualityArgs(byte q, bool trimFromStart, int minLength, InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, string outFile)
     : base(input, outFile)
 {
     trimmer = new TrimByQuality(input.Parser, filtered, discarded, q, trimFromStart, minLength);
 }
Exemplo n.º 31
0
 /// <summary>
 /// Constructor for DiscardByMeanQuality Event Args
 /// </summary>
 /// <param name="input">Input information</param>
 /// <param name="filtered">Output sequence formatter</param>
 /// <param name="discarded">Discarded reads sequence formatter</param>
 /// <param name="mean">Minimum mean quality threshold</param>
 /// <param name="outFile">Output filename</param>
 public DiscardByMeanQualityArgs(InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, byte mean, string outFile)
     : base(input, outFile)
 {
     discarder = new DiscardByMeanQuality(input.Parser, filtered, discarded, mean);
 }
Exemplo n.º 32
0
        public static void ConvertFromOneFormatToAnother(string inputFileName, string outputFileName, ISequenceFormatter targetFormatter)
        {
            var parser = SequenceParsers.FindParserByFileName(inputFileName);
            var sequenceList = parser.Parse();
            var sequences = Helper.ConvertIenumerableToList(sequenceList);

            targetFormatter.Open(outputFileName);

            foreach (var sequence in sequences)
            {
                targetFormatter.Write(sequence);
            }

            targetFormatter.Close();
            targetFormatter.Dispose();
        }