public void SetUp()
 {
     this.BiopatMLFilePath = string.Empty;
     this.BioList = null;
     this.MyPatterns = null;
     SearchPosition = 1;
 }
コード例 #2
0
        /// <summary>
        /// Reads the Genbank file and have it parsed by MBF library.
        /// </summary>
        /// <param name="genbankFileURL">Your genbank file path</param>
        /// <returns></returns>
        private SequenceList ParseSequencePath
                                    (string genbankFileURL)
        {
            if (IsOnline)  
                throw new NotImplementedException
                    ("online genbank reading is not supported in this version!"); 
            
            //Download the file and parse it

            //Create the parser first
            ISequenceParser gbParser = new GenBankParser();

            //Always Try parsing multi sequence in a file
            List<ISequence> mbfSequences = gbParser.Parse(genbankFileURL);

            SequenceList bioSeqList = new SequenceList();

            foreach (Sequence mbfseq in mbfSequences)
            {
                ConvertToBioPatMLSeq(mbfseq);
                bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq));
            }

            return bioSeqList;
        }
		public void TestMotifPattern_Motif () {
			BiopatMLFilePath = "BioPaperTestData/MotifPattern/Motif.xml";

			using ( BioPatMBF_Reader gbReader = new BioPatMBF_Reader() ) {
				BioList = gbReader.Read( Global.GetResourceReader( _singleDnaSeqGenBankFilename ) );
			}

			MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) );

			FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern );

			//According to Jacobi library total matches should be 57 
			Assert.AreEqual( 57, Matches.Count );
			Assert.AreEqual( "Pribnow-box", Matches.Name );
			//Perform some random checks from the 57 list

			Match matched = (Match) Matches[10]; //try get the 11th matched

			Assert.AreEqual( 0.66, matched.Similarity, 1e-2 );
			Assert.AreEqual( 6, matched.Length );
			Assert.AreEqual( "ttttat", matched.Letters() );

			//try the first match
			matched = (Match) Matches[0];
			Assert.AreEqual( AlphabetFactory.Instance( AlphabetType.DNA ), matched.Alphabet );
			Assert.AreEqual( 6, matched.Length );
			Assert.AreEqual( 0.5, matched.Similarity, 1e-2 );

			// Check the last match
			matched = (Match) Matches[56];
			Assert.AreEqual( 0.5, matched.Similarity, 1e-2 );
			Assert.AreEqual( "tttctt", matched.Letters() );

		}
        public void TestStructuredPattern_SeriesAll()
        {
            BiopatMLFilePath = "BioPaperTestData/StructuredPattern/SeriesAll.xml";

            using (BioPatMBF_Reader gbReader = new BioPatMBF_Reader())
            {
				BioList = gbReader.Read( Global.GetResourceReader( _singleDnaSeqGenBankFilename ) );
            }

			MyPatterns = DefinitionIO.Read( Global.GetResourceReader(   BiopatMLFilePath ) );

			FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern );

            //expecting 49 matches based on the old jacobi result
            Assert.AreEqual(49, Matches.Count);

            Match matched = (Match)Matches[0]; //Query the 1st matched from the list of matches
            Assert.AreEqual(3, matched.SubMatches.Count); //should have 3 sub matches

            Assert.AreEqual("aattt", matched.SubMatches[0].Letters());
            Assert.AreEqual("tataagtg", matched.SubMatches[1].Letters());
            Assert.AreEqual("ttcaa", matched.SubMatches[2].Letters());

            //And finally the main matched
            Assert.AreEqual("aattttataagtgttcaa", matched.Letters());
        }
コード例 #5
0
 /// <summary>
 /// Constructs a Block of aligned sequences
 /// (<see cref="QUT.Bio.BioPatML.Patterns.PWM"> PWM </see>).
 /// </summary>
 /// <param name="name">Name for element block</param>
 /// <param name="sequenceList"> List of aligned sequences. </param>
 /// <param name="background"> Histogram with base counts of the background
 /// sequences.</param>
 /// <param name="threshold"> Similarity threshold. </param>
 public Block
     (String name, SequenceList sequenceList,
         HistogramSymbol background, double threshold)
     : base(name, sequenceList[0].Alphabet, threshold)
 {
     Estimate(sequenceList, background);
 }
コード例 #6
0
        /** Tests the adding of a sequence list to the histogram */
        public void TestAddSequenceList()
        {
            SequenceList list = new SequenceList();
            list.Add(new Sequence(AlphabetType.DNA, "actga"));
            list.Add(new Sequence(AlphabetType.DNA, "ctaca"));

            histo.Add(list);

            Assert.AreEqual(4, histo.HistoValue(alpha['a']));
            Assert.AreEqual(3, histo.HistoValue(alpha['c']));
            Assert.AreEqual(2, histo.HistoValue(alpha['t']));
            Assert.AreEqual(1, histo.HistoValue(alpha['g']));
        }
コード例 #7
0
        /** Tests constructor */
        public void TestConstructor()
        {
            SequenceList list = new SequenceList();
            list.Add(new Sequence(AlphabetType.DNA, "aa", false));
			list.Add( new Sequence( AlphabetType.DNA, "at", false ) );
            Block block = new Block("test", list, null, 0.0);
            Assert.AreEqual(1.000, block.Get('a', 0), 1e-3);
            Assert.AreEqual(-0.584, block.Get('c', 0), 1e-3);
            Assert.AreEqual(-0.584, block.Get('t', 0), 1e-3);
            Assert.AreEqual(-0.584, block.Get('g', 0), 1e-3);
            Assert.AreEqual(0.415, block.Get('a', 1), 1e-3);
            Assert.AreEqual(-0.584, block.Get('c', 1), 1e-3);
            Assert.AreEqual(0.415, block.Get('t', 1), 1e-3);
            Assert.AreEqual(-0.584, block.Get('g', 1), 1e-3);
        }
		public void TestRegionalPattern_Gap () {
			BiopatMLFilePath = "BioPaperTestData/RegionalPattern/RegionalGap.xml";

			BioPatMBF_Reader gbReader = new BioPatMBF_Reader();
			BioList = gbReader.Read( Global.GetResourceReader( _singleProteinSeqGenBankFilename ) );
			MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) );

			FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern );

			//Total matches according to old jacobi is 309
			Assert.AreEqual( 410, Matches.Count );
			//Checks the first match 
			Assert.AreEqual( "MT", Matches[0].MainSequence.Letters( Matches[0].Start, Matches[0].End ) );
			//Checks if the last Match is in correect start and end pos
			Assert.AreEqual( "CE", Matches[409].MainSequence.Letters( Matches[409].Start, Matches[409].End ) );
		}
コード例 #9
0
        /// <summary>
        /// Reads in the fasta file.
        /// </summary>
        /// <param name="reader">your local filepath for genbank</param>
        /// <returns>list of BioPatML Sequences</returns>
        public override SequenceList Read(TextReader reader)
        {
            //Create the parser first
            ISequenceParser fastaParser = new FastaParser();

            List<ISequence> mbfSequences = fastaParser.Parse(reader);

            SequenceList bioSeqList = new SequenceList();

            foreach (Sequence mbfseq in mbfSequences)
            {
                bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq));
            }

            return bioSeqList;
        }
コード例 #10
0
        /// <summary>
        /// The param could also be a stringreader.
        /// </summary>
        /// <param name="reader"></param>
        /// <returns></returns>
        private SequenceList ParseSequencePath
                                    (TextReader reader)
        {
            //Create the parser first
            ISequenceParser gbParser = new GenBankParser();

            //Always Try parsing multi sequence in a reader
            List<ISequence> mbfSequences = gbParser.Parse(reader);

            SequenceList bioSeqList = new SequenceList();

            foreach (Sequence mbfseq in mbfSequences)
            {
                ConvertToBioPatMLSeq(mbfseq);
                bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq));
            }

            return bioSeqList;
        }
コード例 #11
0
        /// <summary>
        /// Estimates the weights of the PWM that's behind a Block pattern.
        /// </summary>
        /// <exception cref="System.ArgumentException">
        /// Thrown when sequences length are not equal</exception>
        /// <param name="sequenceList"> List of aligned sequences. </param>
        /// <param name="background"> Histogram with base counts of the background
        /// sequences. Can be null. In that case all frequencies are set equally.</param>
        private void Estimate
            (SequenceList sequenceList, HistogramSymbol background)
        {
            int length = sequenceList.MinLength();

            if (sequenceList.MaxLength() != length)
                throw new ArgumentException
                    ("Sequences must be of equal length!");

            if (background == null)
            {
                background = new HistogramSymbol();

                foreach (Symbol sym in PWMalphabet)
                    background.Add(sym);
            }

            base.Init(length);
            base.Estimate(sequenceList, 1, background);
        }
		public void TestRegionalPattern_Composition () {
			BiopatMLFilePath = "BioPaperTestData/RegionalPattern/RegionalComposition.xml";

			using ( BioPatMBF_Reader gbReader = new BioPatMBF_Reader() ) {
				BioList = gbReader.Read( Global.GetResourceReader( _singleProteinSeqGenBankFilename ) );
			}

			MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) );

			FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern );

			//Total matches according to old jacobi is 309
			Assert.AreEqual( 49, Matches.Count );
			//Checks the first match 
			Assert.AreEqual( "GATLFKTRCLQCHTV", Matches[0].MainSequence.Letters( Matches[0].Start, Matches[0].End ) );
			//Check see if the pattern used to match has the correct name for its matches
			Assert.AreEqual( "transmembrane domain", Matches.Name );
			Assert.AreEqual( 12, Matches[0].Start );
			Assert.AreEqual( 26, Matches[0].End );
			//Checks if the last Match is in correect start and end pos
			Assert.AreEqual( "KDRNDLITYLKKACE", Matches[48].MainSequence.Letters( Matches[48].Start, Matches[48].End ) );
		}
コード例 #13
0
 /// <summary>
 ///  Constructs a histogram based on the provided list of sequencs.
 /// </summary>
 /// <param name="sequenceList"> List of sequences. </param>
 public void Add(SequenceList sequenceList)
 {
     for (int i = 0; i < sequenceList.Count; i++)
         Add(sequenceList[i]);
 }
コード例 #14
0
 /// <summary>
 ///  Constructs a histogram based on the provided list of sequencs.
 /// </summary>
 /// <param name="sequenceList"> List of sequences. </param>
 public HistogramSymbol(SequenceList sequenceList)
 {
     Add(sequenceList);
 }
コード例 #15
0
        /// <summary>
        /// Reads the parameters and populate the attributes for this pattern.
        /// </summary>
        /// <exception cref="System.ArgumentNullException">
        /// Thrown when sequences in blocks are missing.</exception>
        /// <param name="node"></param>
        /// <param name="definition">The Definition element where the node sits in</param>
        public override void ReadNode
            (XmlNode node, Definition definition)
        {
            PatternName = (XMLHelper.GetAttrValueString(node, "name"));
            Threshold = (XMLHelper.GetAttrValDouble(node, "threshold"));
            Impact = (XMLHelper.GetAttrValDouble(node, "impact"));

            PWMalphabet = AlphabetFactory.Instance
                        (XMLHelper.GetAttrValueString(node, "alphabet"));
            SequenceList seqList = new SequenceList();

            node = node.FirstChild;
            while (node != null)
            {
                if (node.Name.Equals("Sequence"))
                {
                    String letters = node.InnerText.Trim();

                    if (letters == null)
                        throw new ArgumentNullException
                            ("Sequences in Block are missing!");

                    seqList.Add(new Sequence(PWMalphabet, letters, false));
                }
                node = node.NextSibling;
            }

            Estimate(seqList, null);
        }
コード例 #16
0
		public void TestMotifPattern_Block () {
			BiopatMLFilePath = "BioPaperTestData/MotifPattern/Block.xml";

			BioPatMBF_Reader gbReader = new BioPatMBF_Reader();
			BioList = gbReader.Read( Global.GetResourceReader( _sampleGenBankFile2 ) );

			MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) );
			FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern );

			Assert.AreEqual( 3, Matches.Count );
			Assert.AreEqual( "Pribnow-box", Matches.Name );

			Match matched = (Match) Matches[0];
			Assert.AreEqual( 0.83, matched.Similarity, 1e-2 );
			Assert.AreEqual( "tataac", matched.Letters() );

			matched = (Match) Matches[1];
			Assert.AreEqual( 0.76, matched.Similarity, 1e-2 );
			Assert.AreEqual( "taacat", matched.Letters() );

			matched = (Match) Matches[2];
			Assert.AreEqual( 0.72, matched.Similarity, 1e-2 );
			Assert.AreEqual( "cataaa", matched.Letters() );
		}
コード例 #17
0
		public void TestMotifPattern_PWM () {
			BiopatMLFilePath = "BioPaperTestData/MotifPattern/PWM.xml";

			using ( BioPatMBF_Reader gbReader = new BioPatMBF_Reader() ) {
				BioList = gbReader.Read( Global.GetResourceReader( _sampleGenBankFile2 ) );
			}

			MyPatterns = DefinitionIO.Read( Global.GetResourceReader( BiopatMLFilePath ) );
			FeatureList Matches = BioList[0].Search( SearchPosition, BioList[0].Length, MyPatterns.Pattern );

			Assert.AreEqual( 14, Matches.Count );
			Assert.AreEqual( "Pribnow-box", Matches.Name );

			Match matched = (Match) Matches[0];
			Assert.AreEqual( 17, matched.Start );
			Assert.AreEqual( 22, matched.End );
			Assert.AreEqual( 0.67, matched.Similarity, 1e-2 );
			Assert.AreEqual( "tctcct", matched.Letters() );

			matched = (Match) Matches[1];

			Assert.AreEqual( 25, matched.Start );
			Assert.AreEqual( 30, matched.End );
			Assert.AreEqual( 0.61, matched.Similarity, 1e-2 );
			Assert.AreEqual( "ttggct", matched.Letters() );

			matched = (Match) Matches[13];
			Assert.AreEqual( 268, matched.Start );
			Assert.AreEqual( 273, matched.End );
			Assert.AreEqual( 0.67, matched.Similarity, 1e-2 );
			Assert.AreEqual( "tgtgct", matched.Letters() );
		}