public void TestReadSampleGenbankFileAsTextReaderObj () { using ( reader = new BioPatMBF_Reader() ) { System.Diagnostics.Debug.WriteLine( reader.Read( Global.GetResourceReader( _sampleGenBankFile1 ) ) ); } }
public void TestMotifPattern_Motif () { BiopatMLFilePath = "BioPaperTestData/MotifPattern/Motif.xml"; using ( BioPatMBF_Reader gbReader = new BioPatMBF_Reader() ) { BioList = gbReader.Read( Global.GetResourceReader( _singleDnaSeqGenBankFilename ) ); } MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) ); FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern ); //According to Jacobi library total matches should be 57 Assert.AreEqual( 57, Matches.Count ); Assert.AreEqual( "Pribnow-box", Matches.Name ); //Perform some random checks from the 57 list Match matched = (Match) Matches[10]; //try get the 11th matched Assert.AreEqual( 0.66, matched.Similarity, 1e-2 ); Assert.AreEqual( 6, matched.Length ); Assert.AreEqual( "ttttat", matched.Letters() ); //try the first match matched = (Match) Matches[0]; Assert.AreEqual( AlphabetFactory.Instance( AlphabetType.DNA ), matched.Alphabet ); Assert.AreEqual( 6, matched.Length ); Assert.AreEqual( 0.5, matched.Similarity, 1e-2 ); // Check the last match matched = (Match) Matches[56]; Assert.AreEqual( 0.5, matched.Similarity, 1e-2 ); Assert.AreEqual( "tttctt", matched.Letters() ); }
public void TestStructuredPattern_SeriesAll() { BiopatMLFilePath = "BioPaperTestData/StructuredPattern/SeriesAll.xml"; using (BioPatMBF_Reader gbReader = new BioPatMBF_Reader()) { BioList = gbReader.Read( Global.GetResourceReader( _singleDnaSeqGenBankFilename ) ); } MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) ); FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern ); //expecting 49 matches based on the old jacobi result Assert.AreEqual(49, Matches.Count); Match matched = (Match)Matches[0]; //Query the 1st matched from the list of matches Assert.AreEqual(3, matched.SubMatches.Count); //should have 3 sub matches Assert.AreEqual("aattt", matched.SubMatches[0].Letters()); Assert.AreEqual("tataagtg", matched.SubMatches[1].Letters()); Assert.AreEqual("ttcaa", matched.SubMatches[2].Letters()); //And finally the main matched Assert.AreEqual("aattttataagtgttcaa", matched.Letters()); }
public void TestReadSampleGenbankFileAsFilePath () { using ( reader = new BioPatMBF_Reader() ) { // TODO: this is not a unit test. We need some way of automatically determining if it worked. System.Diagnostics.Debug.WriteLine( reader.Read( Global.GetResourceReader( _sampleGenBankFile2 ) ) ); } }
public void TestRegionalPattern_Gap () { BiopatMLFilePath = "BioPaperTestData/RegionalPattern/RegionalGap.xml"; BioPatMBF_Reader gbReader = new BioPatMBF_Reader(); BioList = gbReader.Read( Global.GetResourceReader( _singleProteinSeqGenBankFilename ) ); MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) ); FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern ); //Total matches according to old jacobi is 309 Assert.AreEqual( 410, Matches.Count ); //Checks the first match Assert.AreEqual( "MT", Matches[0].MainSequence.Letters( Matches[0].Start, Matches[0].End ) ); //Checks if the last Match is in correect start and end pos Assert.AreEqual( "CE", Matches[409].MainSequence.Letters( Matches[409].Start, Matches[409].End ) ); }
public SequenceContract ParseGenbankFile(string gbContent) { StringReader sr = new StringReader(gbContent); SequenceContract sequence = new SequenceContract(); SequenceList bioSeqList = null; //BioPatML's data structure using (ReaderBase reader = new BioPatMBF_Reader()) { bioSeqList = reader.Read(sr); } //for now we only always use the first sequence SequenceName sequence.Name = bioSeqList[0].Annotations("SequenceName").AnnotationValue as string; //sequence.Name = bioSeqList[0].Name; sequence.AlphabetName = bioSeqList[0].Alphabet.Name; sequence.Characters = bioSeqList[0].Letters(); return sequence; }
public void TestRegionalPattern_Composition () { BiopatMLFilePath = "BioPaperTestData/RegionalPattern/RegionalComposition.xml"; using ( BioPatMBF_Reader gbReader = new BioPatMBF_Reader() ) { BioList = gbReader.Read( Global.GetResourceReader( _singleProteinSeqGenBankFilename ) ); } MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) ); FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern ); //Total matches according to old jacobi is 309 Assert.AreEqual( 49, Matches.Count ); //Checks the first match Assert.AreEqual( "GATLFKTRCLQCHTV", Matches[0].MainSequence.Letters( Matches[0].Start, Matches[0].End ) ); //Check see if the pattern used to match has the correct name for its matches Assert.AreEqual( "transmembrane domain", Matches.Name ); Assert.AreEqual( 12, Matches[0].Start ); Assert.AreEqual( 26, Matches[0].End ); //Checks if the last Match is in correect start and end pos Assert.AreEqual( "KDRNDLITYLKKACE", Matches[48].MainSequence.Letters( Matches[48].Start, Matches[48].End ) ); }
/** Tests the match method of a series of patterns with two gaps */ public void TestMatchSigma70InChlamydia () { SequenceList sequences = null; using ( BioPatMBF_Reader reader = new BioPatMBF_Reader() ) { sequences = reader.Read( Global.GetResourceReader( "data/GenBank/NC_000117-Chlamydia trachomatis D-UW-3CX.gbk" ) ); } Sequence seq = sequences[0]; SeriesAll series = new SeriesAll(); series.Add( new Motif( "", AlphabetType.DNA, "attata", 1.0 ) ); series.Add( new Gap( "", 1, 200, 1 ) ); series.Add( new Motif( "", AlphabetType.DNA, "tgtcaa", 1.0 ) ); FeatureList matches = seq.Search( 2, seq.Length, series ); Assert.AreEqual( 11, matches.Count ); //Assert.AreEqual( "taaaagcc", matches[0].Letters() ); //Assert.AreEqual( 1.0, ( (Match) matches[0] ).Similarity, 1e-1 ); //Assert.AreEqual( "taaaagccc", matches[1].Letters() ); //Assert.AreEqual( 1.0, ( (Match) matches[1] ).Similarity, 1e-1 ); //Assert.AreEqual( "taaaagccc", matches[2].Letters() ); //Assert.AreEqual( 0.9, ( (Match) matches[2] ).Similarity, 1e-1 ); }
public void TestMotifPattern_PWM () { BiopatMLFilePath = "BioPaperTestData/MotifPattern/PWM.xml"; using ( BioPatMBF_Reader gbReader = new BioPatMBF_Reader() ) { BioList = gbReader.Read( Global.GetResourceReader( _sampleGenBankFile2 ) ); } MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) ); FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern ); Assert.AreEqual( 14, Matches.Count ); Assert.AreEqual( "Pribnow-box", Matches.Name ); Match matched = (Match) Matches[0]; Assert.AreEqual( 17, matched.Start ); Assert.AreEqual( 22, matched.End ); Assert.AreEqual( 0.67, matched.Similarity, 1e-2 ); Assert.AreEqual( "tctcct", matched.Letters() ); matched = (Match) Matches[1]; Assert.AreEqual( 25, matched.Start ); Assert.AreEqual( 30, matched.End ); Assert.AreEqual( 0.61, matched.Similarity, 1e-2 ); Assert.AreEqual( "ttggct", matched.Letters() ); matched = (Match) Matches[13]; Assert.AreEqual( 268, matched.Start ); Assert.AreEqual( 273, matched.End ); Assert.AreEqual( 0.67, matched.Similarity, 1e-2 ); Assert.AreEqual( "tgtgct", matched.Letters() ); }
public void TestMotifPattern_Block () { BiopatMLFilePath = "BioPaperTestData/MotifPattern/Block.xml"; BioPatMBF_Reader gbReader = new BioPatMBF_Reader(); BioList = gbReader.Read( Global.GetResourceReader( _sampleGenBankFile2 ) ); MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) ); FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern ); Assert.AreEqual( 3, Matches.Count ); Assert.AreEqual( "Pribnow-box", Matches.Name ); Match matched = (Match) Matches[0]; Assert.AreEqual( 0.83, matched.Similarity, 1e-2 ); Assert.AreEqual( "tataac", matched.Letters() ); matched = (Match) Matches[1]; Assert.AreEqual( 0.76, matched.Similarity, 1e-2 ); Assert.AreEqual( "taacat", matched.Letters() ); matched = (Match) Matches[2]; Assert.AreEqual( 0.72, matched.Similarity, 1e-2 ); Assert.AreEqual( "cataaa", matched.Letters() ); }