/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="parserName">Name of the parser to use.</param> /// <returns>If found returns the parser as IParser else returns null.</returns> public static ISequenceParser FindParserByName(string fileName, string parserName) { ISequenceParser parser = null; if (!string.IsNullOrEmpty(fileName)) { if (parserName == Properties.Resource.FastAName) { parser = new FastAParser(fileName); } else if (parserName == Properties.Resource.FastQName) { parser = new FastQParser(fileName); } else if (parserName == Properties.Resource.GENBANK_NAME) { parser = new GenBankParser(fileName); } else if (parserName == Properties.Resource.GFF_NAME) { parser = new GffParser(fileName); } else { parser = null; } } return(parser); }
/// <summary> /// Parses a FastA file which has one or more sequences. /// </summary> /// <param name="filename">Path to the file to be parsed.</param> /// <returns>ISequence objects</returns> static IEnumerable <ISequence> ParseFastA(string filename) { // A new parser to import a file FastAParser parser = new FastAParser(filename); return(parser.Parse()); }
/// <summary> /// Finds a suitable parser that supports the specified file, opens the file and returns the parser. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <returns>If found returns the parser as ISequenceParser else returns null.</returns> public static ISequenceParser FindParserByFileName(string fileName) { ISequenceParser parser = null; if (!string.IsNullOrEmpty(fileName)) { if (IsFasta(fileName)) { parser = new FastAParser(fileName); } else if (IsFastQ(fileName)) { parser = new FastQParser(fileName); } else if (IsGenBank(fileName)) { parser = new GenBankParser(fileName); } else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase)) { parser = new GffParser(fileName); } else { parser = null; } } return(parser); }
public void ValidateSequenceStatisticsToString() { ISequence seq = new Sequence(Alphabets.DNA, "ATCGATCG"); var seqStats = new SequenceStatistics(seq); string actualString = seqStats.ToString(); string expectedString = "A - 2\r\nC - 2\r\nG - 2\r\nT - 2\r\n".Replace("\r\n", System.Environment.NewLine); Assert.AreEqual(actualString, expectedString); // Gets the expected sequence from the Xml List <ISequence> seqsList; IEnumerable <ISequence> sequences = null; string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); using (var reader = File.OpenRead(filePath)) { var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; sequences = parser.Parse(reader); //Create a list of sequences. seqsList = sequences.ToList(); } } foreach (ISequence Sequence in seqsList) { seqStats = new SequenceStatistics(Sequence); string seqStatStr = seqStats.ToString(); Assert.IsTrue(seqStatStr.Contains(" - ")); } }
public void testBug2() { //Test on DNA benchmark dataset string filepath = @"TestUtils\122_raw.afa".TestDir(); FastAParser parser = new FastAParser(); IList <ISequence> orgSequences = parser.Parse(filepath).ToList(); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Assert.IsNotNull(msa.AlignedSequences); }
public void ValidateNUCmerGetClusters() { // NOTE: Nigel ran this test with the same data through mmummer and mgaps and got the same result. // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.FilePathNode); // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.SearchSequenceFilePathNode); FastAParser parser = new FastAParser(); IEnumerable <ISequence> seqs1 = parser.Parse(filePath); IEnumerable <ISequence> seqs2 = parser.Parse(queryFilePath); var nuc = new Bio.Algorithms.Alignment.NUCmer(seqs1.First()) { LengthOfMUM = 5, MinimumScore = 0, }; var clusts = nuc.GetClusters(seqs2.First()); string clustCount1 = this.utilityObj.xmlUtil.GetTextValue( Constants.MediumSizeSequenceNodeName, Constants.ClustCount1Node); Assert.AreEqual(clustCount1, clusts.Count.ToString(CultureInfo.InvariantCulture)); }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="parserName">Name of the parser to use.</param> /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns> public static ISequenceParser FindParserByName(string fileName, string parserName) { ISequenceParser parser = null; if (!string.IsNullOrEmpty(fileName) && !string.IsNullOrEmpty(parserName)) { if (parserName == Properties.Resource.FastAName) { parser = new FastAParser(fileName); } else if (parserName == Properties.Resource.FastQName) { parser = new FastQParser(fileName); } else if (parserName == Properties.Resource.GENBANK_NAME) { parser = new GenBankParser(fileName); } else { // Do a search through the known parsers to pick up custom parsers added through add-in. parser = All.FirstOrDefault(p => p.Name == parserName); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific parser was created for // the passed filename - the parser is opened automatically in the constructor. if (parser != null) { parser.Open(fileName); } } } return(parser); }
public void ValidateNUCmerGetClusters() { // Gets the reference sequence from the FastA file string filePath = utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.FilePathNode); // Gets the query sequence from the FastA file string queryFilePath = utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.SearchSequenceFilePathNode); using (FastAParser parser1 = new FastAParser(filePath)) { using (FastAParser parser2 = new FastAParser(queryFilePath)) { IEnumerable <ISequence> seqs1 = parser1.Parse(); IEnumerable <ISequence> seqs2 = parser2.Parse(); NUCmer nuc = new NUCmer((Sequence)seqs1.ElementAt(0)); nuc.LengthOfMUM = 5; nuc.MinimumScore = 0; nuc.BreakLength = 0; IList <Cluster> clusts = nuc.GetClusters(seqs2.ElementAt(0), true); string clustCount1 = utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.ClustCount1Node); Assert.AreEqual(clustCount1, clusts.Count.ToString((IFormatProvider)null)); } } }
public void FastAFormatterValidateWrite() { using (FastAFormatter formatter = new FastAFormatter(Constants.FastaTempFileName)) { // Gets the actual sequence and the alphabet from the Xml string actualSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string alpName = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: Validating with Sequence '{0}' and Alphabet '{1}'.", actualSequence, alpName)); Sequence seqOriginal = new Sequence(Utility.GetAlphabet(alpName), actualSequence); seqOriginal.ID = ""; Assert.IsNotNull(seqOriginal); // Use the formatter to write the original sequences to a temp file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: Creating the Temp file '{0}'.", Constants.FastaTempFileName)); formatter.Write(seqOriginal); formatter.Close(); IEnumerable <ISequence> seqsNew = null; // Read the new file, then compare the sequences using (FastAParser parser = new FastAParser(Constants.FastaTempFileName)) { parser.Alphabet = Alphabets.Protein; seqsNew = parser.Parse(); char[] seqString = seqsNew.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.IsNotNull(seqsNew); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: New Sequence is '{0}'.", newSequence)); // Now compare the sequences. int countNew = seqsNew.Count(); Assert.AreEqual(1, countNew); ApplicationLog.WriteLine("The Number of sequences are matching."); Assert.AreEqual(seqOriginal.ID, seqsNew.ElementAt(0).ID); string orgSeq = new string(seqsNew.ElementAt(0).Select(a => (char)a).ToArray()); Assert.AreEqual(orgSeq, newSequence); Console.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method and is as expected.", newSequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Formatter BVT: The FASTA sequences '{0}' are matching with Format() method.", newSequence)); } // Passed all the tests, delete the tmp file. If we failed an Assert, // the tmp file will still be there in case we need it for debugging. File.Delete(Constants.FastaTempFileName); ApplicationLog.WriteLine("Deleted the temp file created."); } }
/// <summary> /// Parses the File. /// </summary> protected IEnumerable <ISequence> ParseFile() { // TODO: Add other parsers. FastAParser parser = new FastAParser(this.Filename); return(parser.Parse()); }
public void TranslateHardReverseStrand() { Genome genome = new Genome(Path.Combine(TestContext.CurrentContext.TestDirectory, "Homo_sapiens.GRCh38.dna.chromosome.14.fa")); GeneModel geneModel = new GeneModel(genome, Path.Combine(TestContext.CurrentContext.TestDirectory, "HardReverseStrand", "reverse.gff3")); List <Protein> proteins = geneModel.Translate(true).ToList(); ISequence codingSequence = new FastAParser().Parse(Path.Combine(TestContext.CurrentContext.TestDirectory, "HardReverseStrand", "codingSeq.fa")).First(); Assert.AreEqual(SequenceExtensions.ConvertToString(codingSequence), SequenceExtensions.ConvertToString(geneModel.Genes[0].Transcripts[0].RetrieveCodingSequence())); Assert.AreEqual("MNLQAQPKAQNKRKRCLFGGQEPAPKEQPPPLQPPQQSIRVKEEQYLGHEGPGGAVSTSQ" + "PVELPPPSSLALLNSVVYGPERTSAAMLSQQVASVKWPNSVMAPGRGPERGGGGGVSDSS" + "WQQQPGQPPPHSTWNCHSLSLYSATKGSPHPGVGVPTYYNHPEALKREKAGGPQLDRYVR" + "PMMPQKVQLEVGRPQAPLNSFHAAKKPPNQSLPLQPFQLAFGHQVNRQVFRQGPPPPNPV" + "AAFPPQKQQQQQQPQQQQQQQQAALPQMPLFENFYSMPQQPSQQPQDFGLQPAGPLGQSH" + "LAHHSMAPYPFPPNPDMNPELRKALLQDSAPQPALPQVQIPFPRRSRRLSKEGILPPSAL" + "DGAGTQPGQEATGNLFLHHWPLQQPPPGSLGQPHPEALGFPLELRESQLLPDGERLAPNG" + "REREAPAMGSEEGMRAVSTGDCGQVLRGGVIQSTRRRRRASQEANLLTLAQKAVELASLQ" + "NAKDGSGSEEKRKSVLASTTKCGVEFSEPSLATKRAREDSGMVPLIIPVSVPVRTVDPTE" + "AAQAGGLDEDGKGPEQNPAEHKPSVIVTRRRSTRIPGTDAQAQAEDMNVKLEGEPSVRKP" + "KQRPRPEPLIIPTKAGTFIAPPVYSNITPYQSHLRSPVRLADHPSERSFELPPYTPPPIL" + "SPVREGSGLYFNAIISTSTIPAPPPITPKSAHRTLLRTNSAEVTPPVLSVMGEATPVSIE" + "PRINVGSRFQAEIPLMRDRALAAADPHKADLVWQPWEDLESSREKQRQVEDLLTAACSSI" + "FPGAGTNQELALHCLHESRGDILETLNKLLLKKPLRPHNHPLATYHYTGSDQWKMAERKL" + "FNKGIAIYKKDFFLVQKLIQTKTVAQCVEFYYTYKKQVKIGRNGTLTFGDVDTSDEKSAQ" + "EEVEVDIKTSQKFPRVPLPRRESPSEERLEPKREVKEPRKEGEEEVPEIQEKEEQEEGRE" + "RSRRAAAVKATQTLQANESASDILILRSHESNAPGSAGGQASEKPREGTGKSRRALPFSE" + "KKKKTETFSKTQNQENTFPCKKCGR", proteins[0].BaseSequence); }
public void TestFastaFor186972391() { string expectedSequence = "IFYEPVEILGYDNKSSLVLVKRLITRMYQQKSLISSLNDSNQNEFWGHKNSFSSHFSSQMVSEGFGVILE" + "IPFSSRLVSSLEEKRIPKSQNLRSIHSIFPFLEDKLSHLNYVSDLLIPHPIHLEILVQILQCWIKDVPSL" + "HLLRLFFHEYHNLNSLITLNKSIYVFSKRKKRFFGFLHNSYVYECEYLFLFIRKKSSYLRSISSGVFLER" + "THFYGKIKYLLVVCCNSFQRILWFLKDTFIHYVRYQGKAIMASKGTLILMKKWKFHLVNFWQSYFHFWFQ" + "PYRINIKQLPNYSFSFLGYFSSVRKNPLVVRNQMLENSFLINTLTQKLDTIVPAISLIGSLSKAQFCTVL" + "GHPISKPIWTDLSDSDILDRFCRICRNLCRYHSGSSKKQVLYRIKYIFRLSCARTLARKHKSTVRTFMRR" + "LGSGFLEEFFLEEE"; // parse string filepath = System.IO.Path.Combine("TestUtils", "Fasta", "186972391.fasta").TestDir(); Assert.IsTrue(File.Exists(filepath)); FastAParser parser = new FastAParser { Alphabet = Alphabets.Protein }; foreach (ISequence seq in parser.Parse(filepath)) { Assert.IsNotNull(seq); Assert.AreEqual(434, seq.Count); string actual = seq.Aggregate("", (current, b) => current + (char)b); Assert.AreEqual(expectedSequence, actual); Assert.AreEqual(seq.Alphabet.Name, "Protein"); Assert.AreEqual("gi|186972391|gb|ACC99454.1| maturase K [Scaphosepalum rapax]", seq.ID); } }
public void ValidateDeltaAlignmentToString() { ISequence refSeq = new Sequence(Alphabets.DNA, "ATCGGGGGGGGAAAAAAATTTTCCCCGGGGG"); ISequence qrySeq = new Sequence(Alphabets.DNA, "GGGGG"); var delta = new DeltaAlignment(refSeq, qrySeq) { FirstSequenceEnd = 21, SecondSequenceEnd = 20 }; string actualString = delta.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpectedNode); Assert.AreEqual(expectedString, actualString); // Gets the expected sequence from the Xml List <ISequence> seqsList; string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); using (var reader = File.OpenRead(filePath)) { var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seqsList = parser.Parse(reader).ToList(); } } delta = new DeltaAlignment(seqsList[0], qrySeq) { FirstSequenceEnd = 21, SecondSequenceStart = 20, QueryDirection = Cluster.ReverseDirection }; actualString = delta.ToString(); expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.DeltaAlignmentExpected2Node); Assert.AreEqual(expectedString, actualString); }
public void ValidateSequenceConvertToString() { string seqLargeString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.seqLargeStringNode); ISequence seqLarge = new Sequence(Alphabets.DNA, seqLargeString); string ActualLargeString = seqLarge.ToString(); string seqLargeExpected = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.seqLargeExpected2Node); string expectedLargeString = string.Format(CultureInfo.CurrentCulture, seqLargeExpected, (seqLarge.Count - Helper.AlphabetsToShowInToString)); Assert.AreEqual(expectedLargeString, ActualLargeString); List <ISequence> seqsList; // Gets the expected sequence from the Xml string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); var parser = new FastAParser { Alphabet = Alphabets.Protein }; IEnumerable <ISequence> seq = parser.Parse(filePath); //Create a list of sequences. seqsList = seq.ToList(); var seqString = new string(seqsList[0].Select(a => (char)a).ToArray()); Assert.AreEqual(seqString.Substring(2, 5), ((Sequence)seqsList[0]).ConvertToString(2, 5)); }
public void TestFastaForMemoryMapFiles() { int sequenceCount = 300 * 1024 * 1024; // 300 MB of data string filePath = CreateData(sequenceCount); Assert.IsTrue(File.Exists(filePath)); FastAParser parser = null; try { parser = new FastAParser(filePath); parser.Alphabet = (IAlphabet)Alphabets.Protein; foreach (ISequence seq in parser.Parse()) { Assert.IsNotNull(seq); Assert.AreEqual(sequenceCount, seq.Count); Assert.AreEqual(seq.Alphabet.Name, "Protein"); } } finally { File.Delete(filePath); parser.Dispose(); } }
public void TestFastaWhenParsingOneOfMany() { // parse string relativepath = @"\TestUtils\Fasta\5_sequences.fasta"; string assemblypath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().GetName().CodeBase).Substring(6); string filepath = assemblypath + relativepath; using (FastAParser parser = new FastAParser(filepath)) { parser.Alphabet = (IAlphabet)Alphabets.Protein; int[] sequenceCountArray = new int[5]; sequenceCountArray[0] = 27; sequenceCountArray[1] = 29; sequenceCountArray[2] = 30; sequenceCountArray[3] = 35; sequenceCountArray[4] = 32; int i = 0; foreach (ISequence seq in parser.Parse()) { Assert.IsNotNull(seq); Assert.AreEqual(seq.Count, sequenceCountArray[i]); i++; } } }
public void TestLargeFasta() { if (Environment.Is64BitProcess) { int sequenceCount = 300 * 1024 * 1024; // 300 MB of data string filePath = CreateData(sequenceCount); Assert.IsTrue(File.Exists(filePath)); try { var parser = new FastAParser { Alphabet = Alphabets.Protein }; int count = 0; foreach (ISequence seq in parser.Parse(filePath)) { Assert.IsNotNull(seq); Assert.AreEqual(sequenceCount, seq.Count); Assert.AreEqual(seq.Alphabet.Name, "Protein"); count++; } Assert.AreEqual(1, count); } finally { File.Delete(filePath); } } }
/// <summary> /// Validates general Parse test cases with the xml node name specified. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateParseGeneralTestCases(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode).TestDir(); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(filePath)); var parserObj = new FastAParser(); { parserObj.Alphabet = Utility.GetAlphabet(alphabet); IList <ISequence> seqs = parserObj.Parse(filePath).ToList(); Assert.AreEqual(1, seqs.Count); // Gets the expected sequence from the Xml string expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode); ISequence seq = seqs[0]; var newSequence = seq.ConvertToString(); Assert.AreEqual(expectedSequence, newSequence); var tmpEncodedSeq = new byte[seq.Count]; seq.ToArray().CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name.ToLower(CultureInfo.CurrentCulture), utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture)); Assert.AreEqual(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceIdNode), seq.ID); } }
/// <summary> /// It parses the file. /// </summary> private IEnumerable <ISequence> ParseFile(string fileName) { // TODO: Add other parsers. FastAParser parser = new FastAParser(fileName); return(parser.Parse()); }
public void ValidateSeqParserProperties() { // Gets the expected sequence from the Xml string fastaParserName = this.utilityObj.xmlUtil.GetTextValue(Constants.FastAFileParserNode, Constants.ParserNameNode); string genBankParserName = this.utilityObj.xmlUtil.GetTextValue(Constants.GenBankFileParserNode, Constants.ParserNameNode); string gffParserName = this.utilityObj.xmlUtil.GetTextValue(Constants.GffFileParserNode, Constants.ParserNameNode); string fastQParserName = this.utilityObj.xmlUtil.GetTextValue(Constants.FastQFileParserNode, Constants.ParserNameNode); // Get SequenceParser class properties. FastAParser actualFastAParser = SequenceParsers.Fasta; IReadOnlyList <ISequenceParser> allParser = SequenceParsers.All; GenBankParser actualgenBankParserName = SequenceParsers.GenBank; FastQParser actualFastQParserName = SequenceParsers.FastQ; GffParser actualGffParserName = SequenceParsers.Gff; // Validate Sequence parsers Assert.AreEqual(fastaParserName, actualFastAParser.Name); Assert.AreEqual(genBankParserName, actualgenBankParserName.Name); Assert.AreEqual(gffParserName, actualGffParserName.Name); Assert.AreEqual(fastQParserName, actualFastQParserName.Name); Assert.IsNotNull(allParser); ApplicationLog.WriteLine("Type of the parser is validated successfully"); }
public void FastAParserValidateMoveNext() { string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: File Exists in the Path '{0}'.", filePath)); IEnumerable <ISequence> seqsList = null; using (FastAParser parser = new FastAParser(filePath)) { parser.Alphabet = Alphabets.Protein; seqsList = parser.Parse(); Assert.IsNotNull(seqsList); Assert.AreEqual(1, seqsList.Count()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser BVT: Number of Sequences found are '{0}'.", seqsList.Count())); string expectedSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); Assert.IsNotNull(seqsList.ElementAt(0)); Assert.AreEqual(expectedSequence, new string(seqsList.ElementAt(0).Select(a => (char)a).ToArray())); } }
/// <summary> /// Validate formatter all format method overloads with filePath\textwriter /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="formatTypes">enum type to execute different overload</param> void ValidateSAMFormatter(string nodeName, ParseOrFormatTypes formatTypes) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedSequenceFile = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequence); ISequenceAlignmentParser parser = new SAMParser(); try { IList <ISequenceAlignment> alignments = parser.Parse(filePath); SAMFormatter formatter = new SAMFormatter(); switch (formatTypes) { case ParseOrFormatTypes.ParseOrFormatText: using (TextWriter writer = new StreamWriter(Constants.SAMTempFileName)) { formatter.Format(alignments[0], writer); } break; case ParseOrFormatTypes.ParseOrFormatFileName: formatter.Format(alignments[0], Constants.SAMTempFileName); break; } alignments = parser.Parse(Constants.SAMTempFileName); // Get expected sequences using (FastAParser parserObj = new FastAParser(expectedSequenceFile)) { IEnumerable <ISequence> expectedSequences = parserObj.Parse(); IList <ISequence> expectedSequencesList = expectedSequences.ToList(); // Validate parsed output with expected output int count = 0; for (int index = 0; index < alignments.Count; index++) { for (int ialigned = 0; ialigned < alignments[index].AlignedSequences.Count; ialigned++) { for (int iseq = 0; iseq < alignments[index].AlignedSequences[ialigned].Sequences.Count; iseq++) { Assert.AreEqual(new string(expectedSequencesList[count].Select(a => (char)a).ToArray()), new string(alignments[index].AlignedSequences[ialigned].Sequences[iseq].Select(a => (char)a).ToArray())); count++; } } } } } finally { (parser as SAMParser).Dispose(); } }
/// <summary> /// Validates general FastA Parser test cases which are further Formatted /// with the xml node name specified. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateParseFormatGeneralTestCases(string nodeName) { // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string alphabet = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(filePath)); string filepathTmp = Path.Combine(Path.GetTempPath(), "temp.fasta"); // Ensure output is deleted if (File.Exists(filepathTmp)) { File.Delete(filepathTmp); } List <ISequence> seqsOriginal; using (var parserObj = new FastAParser(filePath)) { // Read the original file parserObj.Alphabet = Utility.GetAlphabet(alphabet); seqsOriginal = parserObj.Parse().ToList(); Assert.IsFalse(seqsOriginal.Count == 0); } // Write to a new file using (var formatter = new FastAFormatter(filepathTmp)) { formatter.Write(seqsOriginal); } try { // Compare original with new file using (var parserObjNew = new FastAParser(filepathTmp)) { // Read the new file, then compare the sequences parserObjNew.Alphabet = Utility.GetAlphabet(alphabet); IEnumerable <ISequence> seqsNew = parserObjNew.Parse(); Assert.IsNotNull(seqsNew); int count = 0; foreach (ISequence newSequence in seqsNew) { string s1 = seqsOriginal[count].ConvertToString(); string s2 = newSequence.ConvertToString(); Assert.AreEqual(s1, s2); count++; } Assert.AreEqual(count, seqsOriginal.Count, "Number of sequences is different."); } } finally { // Delete new file File.Delete(filepathTmp); } }
/// <summary> /// Validate Submit Job and Fetch ResultSync() using multiple input sequences /// </summary> /// <param name="nodeName">xml node name</param> void ValidateFetchResultSync(string nodeName) { // Read input from config file string filepath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string emailId = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.EmailIDNode); string clusterOption = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ClusterOptionNode); string actionAlign = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ActionAlignNode); // Initialize with parser and config params ConfigParameters configparams = new ConfigParameters(); ClustalWParser clustalparser = new ClustalWParser(); configparams.UseBrowserProxy = true; TestIClustalWServiceHandler handler = new TestIClustalWServiceHandler(clustalparser, configparams); ClustalWParameters parameters = new ClustalWParameters(); parameters.Values[ClustalWParameters.Email] = emailId; parameters.Values[ClustalWParameters.ClusterOption] = clusterOption; parameters.Values[ClustalWParameters.ActionAlign] = actionAlign; IEnumerable <ISequence> sequence = null; // Get the input sequences using (FastAParser parser = new FastAParser(filepath)) { sequence = parser.Parse(); // Submit job and validate it returned valid job id and control id ServiceParameters svcparameters = handler.SubmitRequest(sequence.ToList(), parameters); Assert.IsFalse(string.IsNullOrEmpty(svcparameters.JobId)); ApplicationLog.WriteLine(string.Concat("JobId", svcparameters.JobId)); foreach (string key in svcparameters.Parameters.Keys) { Assert.IsFalse(string.IsNullOrEmpty(svcparameters.Parameters[key].ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}:{1}", key, svcparameters.Parameters[key].ToString())); } // Get the results and validate it is not null. ClustalWResult result = handler.FetchResultsSync(svcparameters); Assert.IsNotNull(result); Assert.IsNotNull(result.SequenceAlignment); foreach (IAlignedSequence alignSeq in result.SequenceAlignment.AlignedSequences) { ApplicationLog.WriteLine("Aligned Sequence Sequences :"); foreach (ISequence seq in alignSeq.Sequences) { ApplicationLog.WriteLine(string.Concat("Sequence:", seq.ToString())); } } } ApplicationLog.WriteLine(@"ClustalWServiceHandler BVT : Submit job and Get Results is successfully completed using FetchResultSync()"); }
/// <summary> /// Downloads sequence as fasta file from ncbi. /// </summary> /// <param name="fastaFileStream"> /// The fasta file stream. /// </param> /// <returns> /// The <see cref="ISequence"/>. /// </returns> public static ISequence GetFastaSequence(Stream fastaFileStream) { var fastaParser = new FastAParser(); ISequence result = fastaParser.ParseOne(fastaFileStream); fastaFileStream.Dispose(); return(result); }
void ValidateParserGeneralTestCases(FastAParser parserObj) { IEnumerable <ISequence> seqs = null; seqs = parserObj.Parse(); Assert.IsNotNull(seqs); Assert.AreEqual(1, seqs.Count()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: Number of Sequences found are '{0}'.", seqs.Count())); // Gets the expected sequence from the Xml string expectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleFastaDnaNodeName, Constants.ExpectedSequenceNode); Sequence seq = (Sequence)seqs.ElementAt(0); Assert.IsNotNull(seq); char[] seqString = seqs.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.AreEqual(expectedSequence, newSequence); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: DNA Sequence is '{0}' and is as expected.", newSequence)); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: DNA Sequence is '{0}' and is as expected.", newSequence)); byte[] tmpEncodedSeq = new byte[seq.Count]; (seq as IEnumerable <byte>).ToArray().CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: DNA Sequence Length is '{0}' and is as expected.", expectedSequence.Length)); Assert.IsNotNull(seq.Alphabet); Assert.AreEqual(seq.Alphabet.Name.ToLower(CultureInfo.CurrentCulture), utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaDnaNodeName, Constants.AlphabetNameNode).ToLower(CultureInfo.CurrentCulture)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: The Sequence Alphabet is '{0}' and is as expected.", seq.Alphabet.Name)); Assert.AreEqual(utilityObj.xmlUtil.GetTextValue( Constants.SimpleFastaDnaNodeName, Constants.SequenceIdNode), seq.ID); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: DNA Sequence ID is '{0}' and is as expected.", seq.ID)); // Logs to the NUnit GUI (Console.Out) window Console.WriteLine(string.Format((IFormatProvider)null, "FastA Parser with Alphabet: DNA Sequence ID is '{0}' and is as expected.", seq.ID)); }
public void ValidateFastaAFileSequence() { // Gets the expected sequence from the Xml string expectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); string fastAFilePath = utilityObj.xmlUtil.GetTextValue( Constants.SimpleFastaNodeName, Constants.FilePathNode); string alphabet = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode); Assert.IsTrue(File.Exists(fastAFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The File exist in the Path ", fastAFilePath)); IEnumerable <ISequence> sequence = null; using (FastAParser parser = new FastAParser(fastAFilePath)) { // Parse a FastA file Using Parse method and convert the same to sequence. parser.Alphabet = Utility.GetAlphabet(alphabet); sequence = parser.Parse(); Assert.IsNotNull(sequence); Sequence fastASequence = (Sequence)sequence.ElementAt(0); Assert.IsNotNull(fastASequence); char[] seqString = sequence.ElementAt(0).Select(a => (char)a).ToArray(); string newSequence = new string(seqString); Assert.AreEqual(expectedSequence, newSequence); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The Sequence is as expected.")); byte[] tmpEncodedSeq = new byte[fastASequence.Count]; (fastASequence as IEnumerable <byte>).ToArray().CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence Length is as expected.")); Assert.AreEqual(utilityObj.xmlUtil.GetTextValue( Constants.SimpleProteinAlphabetNode, Constants.SequenceIdNode), fastASequence.ID); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: SequenceID is as expected.")); Assert.AreEqual(fastASequence.Alphabet.Name, utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence Alphabet is as expected.")); // Logs to Nunit GUI. Console.WriteLine( "Sequence BVT: Validation of FastaA file Sequence is completed successfully."); } }
private void ValidateMUMmerAlignGeneralTestCases(string nodeName) { // Gets the reference sequence from the configuration file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode).TestDir(); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath)); var fastaParserObj = new FastAParser(); IEnumerable <ISequence> referenceSeqs = fastaParserObj.Parse(filePath); ISequence referenceSeq = referenceSeqs.ElementAt(0); // Gets the reference sequence from the configuration file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode).TestDir(); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath)); var fastaParserObj1 = new FastAParser(); IEnumerable <ISequence> querySeqs = fastaParserObj1.Parse(queryFilePath); string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mum = new MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; IList <IPairwiseSequenceAlignment> align = mum.Align(referenceSeq, querySeqs); // Validate FinalMUMs and MUMs Properties. Assert.IsNotNull(mum.MUMs); string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(AlignmentHelpers.CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); }
public void FastAParserValidateParseWithStream() { List <ISequence> seqsList; // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode).TestDir(); Assert.IsTrue(File.Exists(filePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: File Exists in the Path '{0}'.", filePath)); using (var reader = File.OpenRead(filePath)) { IEnumerable <ISequence> seq = null; var parser = new FastAParser(); { parser.Alphabet = Alphabets.Protein; seq = parser.Parse(reader); //Create a list of sequences. seqsList = seq.ToList(); } } Assert.IsNotNull(seqsList); string expectedSequence = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode); var seqString = new string(seqsList[0].Select(a => (char)a).ToArray()); Assert.AreEqual(expectedSequence, seqString); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The FASTA sequence '{0}' validation after Parse(Stream) is found to be as expected.", seqString)); //Validate Alphabet type for a sequence. Assert.IsNotNull(seqsList[0].Alphabet); Assert.AreEqual(seqsList[0].Alphabet.Name.ToLower(CultureInfo.CurrentCulture), utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode) .ToLower(CultureInfo.CurrentCulture)); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence Alphabet is '{0}' and is as expected.", seqsList[0].Alphabet.Name)); //Validate ID for the sequence. Assert.AreEqual(utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.SequenceIdNode), seqsList[0].ID); ApplicationLog.WriteLine(string.Format(null, "FastA Parser BVT: The Sequence ID is '{0}' and is as expected.", seqsList[0].ID)); }
public void ValidateSequenceToString() { ISequence seqSmall = new Sequence(Alphabets.DNA, "ATCG"); string seqLargeString = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.seqLargeStringNode); ISequence seqLarge = new Sequence(Alphabets.DNA, seqLargeString); string ActualSmallString = seqSmall.ToString(); string ActualLargeString = seqLarge.ToString(); string ExpectedSmallString = "ATCG"; string seqLargeExpected = utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.seqLargeExpected2Node); string expectedLargeString = string.Format(CultureInfo.CurrentCulture, seqLargeExpected, (seqLarge.Count - Helper.AlphabetsToShowInToString)); Assert.AreEqual(ExpectedSmallString, ActualSmallString); Assert.AreEqual(expectedLargeString, ActualLargeString); //check with blank sequence var seqBlank = new Sequence(Alphabets.DNA, ""); string blankString = seqBlank.ToString(); Assert.AreEqual(string.Empty, blankString); //read sequence from file List <ISequence> seqsList; // Gets the expected sequence from the Xml string filePath = utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.FilePathNode); using (var reader = new StreamReader(filePath)) { IEnumerable <ISequence> seq = null; using (var parser = new FastAParser()) { parser.Alphabet = Alphabets.Protein; seq = parser.Parse(reader); //Create a list of sequences. seqsList = seq.ToList(); } } var seqString = new string(seqsList[0].Select(a => (char)a).ToArray()); if (seqString.Length > Helper.AlphabetsToShowInToString) { //check if the whole sequence string contains the string retrieved from ToString Assert.IsTrue(seqString.Contains(seqsList[0].ToString().Substring(0, Helper.AlphabetsToShowInToString))); Assert.IsTrue(seqsList[0].ToString().Contains("... +[")); } else { Assert.AreEqual(seqString, seqsList[0].ToString()); } }