public void FastaTrimRegex2() { Sequence seqObj = new Sequence(Alphabets.DNA, "TTTAAAGATTACATTTAAA"); Sequence expected = new Sequence(Alphabets.DNA, "TTTAAAGATTACA"); TrimByRegex target = new TrimByRegex(new FastAParser(), new FastAFormatter(), new FastAFormatter(), @"TTTAAA$"); ISequence actual = target.Trim(seqObj); Assert.AreEqual(BioHelper.GetStringSequence(expected), BioHelper.GetStringSequence(actual)); }
public void FastqTrimRegex1() { QualitativeSequence seqObj = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "GGGCCCGATTACATTTAAA", "ABCABCIIIIIIIABCABC"); QualitativeSequence expected = new QualitativeSequence(Alphabets.DNA, FastQFormatType.Sanger, "GGGCCCTTTAAA", "ABCABCABCABC"); TrimByRegex target = new TrimByRegex(new FastQParser(), new FastQFormatter(), new FastQFormatter(), "GATTACA"); ISequence actual = target.Trim(seqObj); Assert.AreEqual(BioHelper.GetStringSequence(expected), BioHelper.GetStringSequence(actual)); Assert.AreEqual(BioHelper.GetEncodedQualityScoreStringSequence(expected), BioHelper.GetEncodedQualityScoreStringSequence(actual as QualitativeSequence)); }
public TrimByRegexArgs(InputSubmission input, ISequenceFormatter filtered, ISequenceFormatter discarded, string pattern, string outFile) : base(input, outFile) { trimmer = new TrimByRegex(input.Parser, filtered, discarded, pattern); }
/// <summary> /// usage: SeqcosTrimmerUtil.exe [options] <input> <output> /// </summary> /// <param name="args">Command line arguments</param> static void Main(string[] args) { Console.Error.WriteLine(SplashString()); CommandLineOptions myArgs = ProcessCommandLine(args); #region Trimming // Determine parser InputSubmission input = new InputSubmission(myArgs.InputFile); input.DetermineParserUtil(); // Create a sequence filteredFormatter object ISequenceFormatter filteredFormatter; ISequenceFormatter discardedFormatter = null; // If the format is FASTA, then output will be FASTA. // Everything else (assuming quality scores are available) // will be outputted to FASTQ. if (input.Parser is FastAParser) { if (myArgs.TrimByQuality > 0) { Console.Error.WriteLine("Cannot trim by quality using a FASTA file."); Environment.Exit(-1); } if (myArgs.DiscardedFile != null) { discardedFormatter = new FastAFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastAFormatter(myArgs.OutputFile); } else { if (myArgs.DiscardedFile != null) { discardedFormatter = new FastQFormatter(myArgs.DiscardedFile); } filteredFormatter = new FastQFormatter(myArgs.OutputFile); } // Initialize a Trimmer object Trimmer myTrimmer = null; // By now, we should have sanity checked the command line arguments. So we should be able to // figure out what mode is being used simply by checking the properties. if (myArgs.TrimByLength > 0) { Console.Error.WriteLine("Trimming reads to length {0}", myArgs.TrimByLength); myTrimmer = new TrimByLength(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByLength, myArgs.Left); } else if (myArgs.TrimByQuality > 0) { if (!(input.Parser is FastQParser)) { throw new ArgumentException("Input file must be in FASTQ format."); } Console.Error.WriteLine("Trimming reads based on quality score {0}", myArgs.TrimByQuality); myTrimmer = new TrimByQuality(input.Parser, filteredFormatter, discardedFormatter, (byte)myArgs.TrimByQuality, myArgs.Left, (int)Math.Round(myArgs.TrimByLength)); } else if (myArgs.TrimByRegex != null) { Console.Error.WriteLine("Trimming reads based on the regular expression pattern {0}", myArgs.TrimByRegex); myTrimmer = new TrimByRegex(input.Parser, filteredFormatter, discardedFormatter, myArgs.TrimByRegex); } else { // Should never reach this line. Console.Error.WriteLine("Invalid trim mode. Use '-l' or '-q'."); Environment.Exit(-1); } myTrimmer.TrimAll(); #endregion if (myArgs.Verbose) { Console.Error.WriteLine("Trimmed {0}/{1} sequences.", myTrimmer.TrimCount, myTrimmer.Counted); Console.Error.WriteLine("Discarded {0}/{1} sequences.", myTrimmer.DiscardCount, myTrimmer.Counted); Console.Error.WriteLine("Output saved in {0}.", Path.GetFullPath(myArgs.OutputFile)); Console.Error.WriteLine("Warning: Output may not be in the same order as the original input."); } input.Parser.Close(); filteredFormatter.Close(); if (discardedFormatter != null) { discardedFormatter.Close(); } }