public void TestMatchIncrement() { Sequence seq = new Sequence(AlphabetType.DNA, "actgactg"); Composition composition = new Composition( "Composition", AlphabetType.DNA, 1, 7, 1.7, Composition.MatchMode.ALL, 0.0 ); Match match; match = composition.Match(seq, 1); Assert.AreEqual(1, match.Length); Assert.AreEqual(0, composition.Increment); Assert.AreEqual("a", match.Letters()); match = composition.Match(seq, 1); Assert.AreEqual(3, match.Length); Assert.AreEqual(0, composition.Increment); Assert.AreEqual("act", match.Letters()); match = composition.Match(seq, 1); Assert.AreEqual(4, match.Length); Assert.AreEqual(0, composition.Increment); Assert.AreEqual("actg", match.Letters()); match = composition.Match(seq, 1); Assert.AreEqual(6, match.Length); Assert.AreEqual(0, composition.Increment); Assert.AreEqual("actgac", match.Letters()); match = composition.Match(seq, 1); Assert.AreEqual(7, match.Length); Assert.AreEqual(1, composition.Increment); Assert.AreEqual("actgact", match.Letters()); }
public void TestMatch() { Sequence seq = new Sequence(AlphabetType.DNA, "acctccgg"); RegularExp regx = new RegularExp("test","ctc."); Match myMatch = regx.Match(seq, 1); Assert.AreEqual(null, myMatch); myMatch = regx.Match(seq, 3); Assert.AreEqual(3, myMatch.Start); Assert.AreEqual(4, myMatch.Length); Assert.AreEqual(1, myMatch.Strand); Assert.AreEqual(1.0, myMatch.Similarity, 1e-2); regx = new RegularExp("test", "acc"); myMatch = regx.Match(seq, 1); Assert.AreEqual(1, myMatch.Start); regx = new RegularExp("test", "cgg"); myMatch = regx.Match(seq, 6); Assert.AreEqual(8, myMatch.End); //last test //ISSUE: could be star problem regx = new RegularExp("test", "c.*g"); FeatureList matches = seq.Search(0, 0, regx); Assert.AreEqual(4, matches.Count); Assert.AreEqual("cctccgg", matches[0].Letters()); Assert.AreEqual("ctccgg", matches[1].Letters()); Assert.AreEqual("ccgg", matches[2].Letters()); Assert.AreEqual("cgg", matches[3].Letters()); }
/// <summary> /// Creates a match object. /// </summary> /// <param name="pattern">The referenced matching pattern</param> /// <param name="sequence">Sequence the match was found on.</param> /// <param name="start">Start position of the match.</param> /// <param name="length">Length of the match.</param> /// <param name="strand">Strand the match belongs to. +1 = forward strand, /// -1 = backward strand, 0 = n.a. or unknown.</param> /// <param name="similarity"></param> public Match (IPattern pattern, Sequence sequence, int start, int length, int strand, double similarity) : base() { this.MatchPattern = pattern; Set(sequence, start, length, strand, similarity); }
/// <summary> /// Implementation of the IMatcher interface. An any pattern matches any sequence. /// <see cref="QUT.Bio.BioPatML.Patterns.IMatcher">IMatcher interface</see>. /// </summary> /// <param name="sequence">Sequence to compare with.</param> /// <param name="position">Matching position.</param> /// <returns>The matched item</returns> public Match Match(Sequence sequence, int position) { Match match = Composition.Matched; CurrLength = (int)(Composition.MinLength + Counter * Composition.IncLength + 0.5); CurrLength = Math.Min(CurrLength, Composition.MaxLength); increment = 0; Counter++; double sum = 0.0; for (int len = 0; len < CurrLength; len++) sum += Composition.Weight(sequence.GetSymbol(position + len)); if (CurrLength >= Composition.MaxLength) { increment = 1; Counter = 0; } double similarity = sum / (CurrLength * Composition.maxWeight); if (similarity < Composition.Threshold) return null; match.Set(sequence, position, CurrLength, sequence.Strand, similarity); return match; }
/// <summary> /// Implementation of the IMatcher interface. An any pattern matches any sequence. /// <see cref="QUT.Bio.BioPatML.Patterns.IMatcher">IMatcher interface</see>. /// </summary> /// <param name="sequence">Sequence to compare with.</param> /// <param name="position">Matching position.</param> /// <returns>A match object containning the search result</returns> public override Match Match (Sequence sequence, int position) { Matched.Set(sequence, position, NextLength(), sequence.Strand, 1.0); return (Matched); }
/// <summary> /// Setter for sequence, start, length, strand and similarity /// </summary> /// <param name="seq">Sequence the match belongs to.</param> /// <param name="start">Start position of the match.</param> /// <param name="length">Length of the match,</param> /// <param name="strand">Strand the match belongs to. +1 = forward strand, /// -1 = backward strand, 0 = n.a. or unknown.</param> /// <param name="similarity">Similarity of the match. Should be in interval [0,1].</param> public void Set (Sequence seq, int start, int length, int strand, double similarity) { base.Set(start, start + length - 1, strand); this.SetSequence(seq); this.Similarity = similarity; }
public void TestMatch1 () { Sequence seq = new Sequence( AlphabetType.DNA, "taaacc" ); SeriesAll series = new SeriesAll(); FeatureList matches; series.Add( new Motif( "motif1", AlphabetType.DNA, "aa", 0.5 ) ); matches = seq.Search( 1, seq.Length, series ); Assert.AreEqual( 4, matches.Count ); Assert.AreEqual( "ta", matches[ 0 ].Letters() ); //There might be some error with data structure Assert.AreEqual( 1, matches[ 0 ].Start ); Assert.AreEqual( "aa", matches[ 1 ].Letters() ); Assert.AreEqual( 2, matches[ 1 ].Start ); Assert.AreEqual( "aa", matches[ 2 ].Letters() ); Assert.AreEqual( 3, matches[ 2 ].Start ); Assert.AreEqual( "ac", matches[ 3 ].Letters() ); Assert.AreEqual( 4, matches[ 3 ].Start ); series.Add( new Gap( "gap1", 1, 2, 1 ) ); series.Add( new Motif( "motif2", AlphabetType.DNA, "cc", 0.5 ) ); matches = seq.Search( 1, seq.Length, series ); Assert.AreEqual( 3, matches.Count() ); Assert.AreEqual( "taaac", matches[ 0 ].Letters() ); Assert.AreEqual( 0.666, ( (Match) matches[ 0 ] ).Similarity, 1e-3 ); Assert.AreEqual( "taaacc", matches[ 1 ].Letters() ); Assert.AreEqual( 0.833, ( (Match) matches[ 1 ] ).Similarity, 1e-3 ); Assert.AreEqual( "aaacc", matches[ 2 ].Letters() ); Assert.AreEqual( 1.000, ( (Match) matches[ 2 ] ).Similarity, 1e-3 ); }
public void TestMatch() { Sequence seq = new Sequence(AlphabetType.DNA,"tttaagaacaagttt"); Motif motif = new Motif("motif", AlphabetType.DNA,"aag", 0.5); Iteration iteration; Match match; iteration = new Iteration("test",motif,1,3,0.0); match = iteration.Match(seq, 4); Assert.AreEqual(4, match.Start); Assert.AreEqual(9, match.Length); Assert.AreEqual(1, match.Strand); Assert.AreEqual("aagaacaag", match.Letters()); Assert.AreEqual(seq, match.BaseSequence); Assert.AreEqual(0.888, match.Similarity, 1e-3); iteration = new Iteration("test",motif,1,1,0.0); match = iteration.Match(seq, 4); Assert.AreEqual("aag", match.Letters()); Assert.AreEqual(1.0, match.Similarity, 1e-3); iteration = new Iteration("test",motif,1,2,0.0); match = iteration.Match(seq, 4); Assert.AreEqual("aagaac", match.Letters()); Assert.AreEqual(0.833, match.Similarity, 1e-3); iteration = new Iteration("test",motif,4,5,0.0); Assert.AreEqual(null, iteration.Match(seq, 4)); }
/// <summary> /// Implementation of the IMatcher interface. An any pattern matches any sequence. /// <see cref="QUT.Bio.BioPatML.Patterns.IMatcher">IMatcher interface</see>. /// </summary> /// <param name="sequence">Sequence to compare with.</param> /// <param name="position">Matching position.</param> /// <returns>The matched item</returns> public Match Match(Sequence sequence, int position) { Match match = Matched; int maxLen = Composition.MaxLength; int minLen = Composition.MinLength; double incLen = Composition.IncLength; double sum = 0.0; for (int len = 1; len <= minLen; len++) sum += Composition.Weight(sequence.GetSymbol(position + len - 1)); double bestSum = sum; int bestLen = minLen; for (int len = minLen + 1; len <= maxLen; len++) { sum += Composition.Weight(sequence.GetSymbol(position + len - 1)); double diff = len - minLen; double rest = Math.Abs(diff - incLen * (int)(diff / incLen + 0.5)); if (sum / len >= bestSum / bestLen && rest < 0.5) { bestSum = sum; bestLen = len; } } double similarity = bestSum / (bestLen * Composition.MaxWeight); if (similarity < Composition.Threshold) return null; match.Set(sequence, position, bestLen, sequence.Strand, similarity); return match; }
public void TestHasAnnotations () { Sequence seq = new Sequence( AlphabetType.DNA, "ACTG" ); Assert.AreEqual( false, seq.HasAnnotations() ); seq.Annotations.Add( new Annotation( "name", "value" ) ); Assert.AreEqual( true, seq.HasAnnotations() ); }
public void TestConstructor () { Sequence seq = new Sequence( AlphabetType.AA, "ArTGü" ); Assert.AreEqual( 5, seq.Length ); Assert.AreEqual( "AA", seq.Alphabet.Name ); Assert.AreEqual( "ARTGX", seq.Letters() ); seq = new Sequence( AlphabetType.RNA, "AcUGz" ); Assert.AreEqual( "acugn", seq.Letters() ); seq = new Sequence( AlphabetType.RNA, seq ); Assert.AreEqual( "acugn", seq.Letters() ); seq = new Sequence( AlphabetType.UNKNOWN, "actgactg" ); Assert.AreEqual( "actgactg", seq.Letters() ); Assert.AreEqual( AlphabetFactory.Instance( AlphabetType.DNA ), seq.Alphabet ); seq = new Sequence( AlphabetType.UNKNOWN, "AMCMKQQRTAYWY" ); Assert.AreEqual( "AMCMKQQRTAYWY", seq.Letters() ); Assert.AreEqual( AlphabetFactory.Instance( AlphabetType.PROTEIN ), seq.Alphabet ); Alphabet alpha = DnaAlphabet.Instance(); Symbol[] symbols = { alpha['A'], alpha['C'], alpha['T'] }; seq = new Sequence( alpha, symbols, false ); symbols[0] = alpha['G']; //shouldnt be able to change the symbol Assert.AreEqual( "act", seq.Letters() ); }
/// <summary> /// The implementation ensures that /// a match fails for a given position if there is no match. Otherwise the /// matcher might return a match at a different position. /// <see cref="QUT.Bio.BioPatML.Patterns.IPattern">IPattern Match(Sequence, int) method</see> /// </summary> /// <param name="seq"> The sequence for comparing</param> /// <param name="position"> Matching position</param> /// <returns></returns> public override Match Match(Sequence seq, int position) { Match maxMatch = null; int min_inc = int.MaxValue; for (int i = 0; i < base.Count; i++) { IPattern pattern = this[i]; Match match = pattern.Match(seq, position); // store minimum increment int inc = pattern.Increment; if (inc < min_inc) min_inc = inc; // store match with maximum similarity above threshold if (match != null && (match.Similarity >= Threshold) && (maxMatch == null || match.Similarity > maxMatch.Similarity)) { maxMatch = match; } } increment = min_inc; return (maxMatch); }
public void MotifTestMatchAlternatives () { Sequence seq = new Sequence(AlphabetType.DNA, "atgc"); Motif motif = new Motif("test", AlphabetType.DNA,"[ga]tg[c]", 0.0); Match match = motif.Match(seq, 1); Assert.AreEqual(1.0, match.Similarity, 1e-2); }
/** Tests the weigthed matching */ public void TestMatchWeighted () { Gap gap = new Gap( "test", 1, 4, 1, new double[] { 0.5, 0.5, 0.5 }, 0.0 ); Sequence seq = new Sequence( AlphabetType.DNA, "actga" ); Match match = gap.Match( seq, 1 ); Assert.AreEqual( "a", match.Letters() ); Assert.AreEqual( 1.0, match.Similarity, 1e-1 ); }
/** Tests the construction of a feature list */ public void TestConstructor () { Sequence seq = new Sequence( AlphabetType.DNA, "acgt" ); FeatureList featureList = new FeatureList( "Test" ); seq.AddFeatures( featureList ); Assert.AreEqual( 1, seq.FeatureLists.Count ); Assert.AreEqual( "Test", featureList.Name ); // Assert.AreEqual(seq, featureList.GetSequence()); }
public void TestGetFeatureSequence() { Sequence seq = new Sequence(AlphabetType.DNA, "acgta"); FeatureList featureList = new FeatureList("test"); Feature feature = new Feature("feature1", 2, 4, +1); featureList.Add(feature); seq.AddFeatures(featureList); Assert.AreEqual("cgt", feature.Letters()); }
public void SetUp() { seq = new Sequence(AlphabetType.DNA, "ctgcagatgaaa"); logic = new Logic("logic", Logic.OperationType.AND, 0.0); logic.Add(new Motif("motif1", AlphabetType.DNA, "ntg", 1.0)); logic.Add(new Motif("motif2", AlphabetType.DNA, "cng", 1.0)); logic.Add(new Motif("motif3", AlphabetType.DNA, "cnn", 1.0)); }
public void TestMatchStart() { Sequence seq = new Sequence(AlphabetType.DNA, "atgc"); VoidPattern pattern = new VoidPattern("Void"); Match match = pattern.Match(seq, 1); Assert.AreEqual(1, match.Start); Assert.AreEqual(0, match.End); Assert.AreEqual(0, match.Length); Assert.AreEqual("", match.Letters()); }
public void TestConstructor () { Sequence seq = new Sequence( AlphabetType.DNA, "atcg" ); Match m = new Match( voidPattern, seq, 3, 2, +1, 1 ); Assert.AreEqual( 3, m.Start ); Assert.AreEqual( 2, m.Length ); Assert.AreEqual( 1, m.Strand ); Assert.AreEqual( seq, m.BaseSequence ); Assert.AreEqual( "cg", m.Letters() ); Assert.AreEqual( 1.0, m.Similarity, 1e-3 ); }
/** Tests the calc. of increments */ public void TestGetIncrement () { Sequence seq = new Sequence( AlphabetType.DNA, "aggtccagtccagcgt" ); Profile profile = new ProfileAll(); profile.Add( new RegularExp( "regex1", "ag" ) ); profile.Add( -1, 1, new RegularExp( "regex2", "gt" ) ); FeatureList matches = seq.Search( 0, 0, profile ); Assert.AreEqual( 3, matches.Count ); Assert.AreEqual( "aggt", matches[0].Letters() ); Assert.AreEqual( "agt", matches[1].Letters() ); Assert.AreEqual( "agcgt", matches[2].Letters() ); }
public void MotifTestMatchAtStart () { Sequence seq = new Sequence( AlphabetType.DNA, "aTtgattaca" ); Motif motif = new Motif( "test", AlphabetType.DNA, "attg", 0.0 ); Match match = motif.Match( seq, 1 ); Assert.AreEqual( 1, match.Start ); Assert.AreEqual( 4, match.Length ); Assert.AreEqual( 1, match.Strand ); Assert.AreEqual( 1.0, match.Similarity, 1e-2 ); Assert.AreEqual( "attg", match.Letters() ); }
public void TestMatchVariablePattern() { Sequence seq = new Sequence(AlphabetType.DNA, "tttaagaacaagttt"); Gap gap = new Gap("gap", 1, 4, 1, new double[] { 0.0, 0.1, 1.0, 0.5 }, 0.0); Iteration iteration; Match match; iteration = new Iteration("test", gap, 1, 3, 0.0); match = iteration.Match(seq, 4); Assert.AreEqual("aagaacaag", match.Letters()); Assert.AreEqual(1.0, match.Similarity, 1e-3); }
public void TestDistanceStartEnd() { Sequence seq = new Sequence(AlphabetType.DNA, "acgtactg"); Feature feature1 = new Feature("feature1", 2, 4, +1); Feature feature2 = new Feature("feature2", 5, 7, +1); feature1.SetSequence(seq); feature2.SetSequence(seq); Assert.AreEqual(5, feature1.DistanceStartEnd(feature2)); Assert.AreEqual(7, feature2.DistanceStartEnd(feature1)); }
public void TestConstructor() { Feature feature = new Feature("test", 1, 2, +1); Assert.AreEqual("test", feature.Name); Sequence seq = new Sequence(AlphabetType.DNA, "acgta"); seq.Annotations.Add( new Annotation("Name", "test")); feature = new Feature(seq, 2, 4, +1); Assert.AreEqual("test", feature.Name); Assert.AreEqual("cgt", feature.Letters()); }
/** Tests the adding of features */ public void TestAdd () { Sequence seq = new Sequence( AlphabetType.DNA, "acgt" ); FeatureList featureList = new FeatureList( "Test" ); seq.AddFeatures( featureList ); Feature feature1 = new Feature( "Test1", 1, 3, +1 ); Feature feature2 = new Feature( "Test2", 1, 3, +1 ); featureList.Add( feature1, false ); featureList.Add( feature2, true ); Assert.AreEqual( 2, featureList.Count ); Assert.AreEqual( null, feature1.BaseSequence ); Assert.AreEqual( seq, feature2.BaseSequence ); }
public void TestSetGet () { Sequence seq = new Sequence( AlphabetType.DNA, "atcg" ); Match m = new Match( null ); m.Set( seq, 3, 4, +1, 0.75 ); Assert.AreEqual( seq, m.BaseSequence ); Assert.AreEqual( 3, m.Start ); Assert.AreEqual( 4, m.Length ); Assert.AreEqual( 1, m.Strand ); Assert.AreEqual( 0.75, m.Similarity, 1e-3 ); Assert.AreEqual( 1, m.CalcMismatches() ); Assert.AreEqual( 3, m.Matches ); }
public void TestTest() { Sequence seq = new Sequence(AlphabetType.DNA, "acctccctcccgacgg"); RegularExp regx = new RegularExp("test","ctc."); Match myMatch = regx.Match(seq, 3); Assert.IsNotNull(myMatch); FeatureList matches = seq.Search(0, 0, regx); Assert.IsNotNull(matches); }
/** Tests the match method of a series of patterns with a weighted gap */ public void TestMatchWeightedGap () { Sequence seq = new Sequence( AlphabetType.DNA, "taaacc" ); SeriesAll series = new SeriesAll(); series.Add( new Motif( "motif1", AlphabetType.DNA, "ta", 1.0 ) ); series.Add( new Gap( "gap", 1, 2, 1, new double[] { 0, 1 }, 0.0 ) ); series.Add( new Motif( "motif2", AlphabetType.DNA, "cc", 0.5 ) ); FeatureList matches = seq.Search( 1, seq.Length, series ); Assert.AreEqual( "taaac", matches[ 0 ].Letters() ); Assert.AreEqual( 0.500, ( (Match) matches[ 0 ] ).Similarity, 1e-3 ); Assert.AreEqual( "taaacc", matches[ 1 ].Letters() ); Assert.AreEqual( 1.000, ( (Match) matches[ 1 ] ).Similarity, 1e-3 ); }
/// <summary> /// The standard override match method that performs match base on /// inverted match algortihm /// </summary> /// <param name="sequence">sequence to compare</param> /// <param name="position">matching position</param> /// <returns></returns> public override Match Match(Sequence sequence, int position) { Init(); for (int i = 0; i < matchLen; i++) { remSim -= (1.0 - Compare(matchSeq.GetSymbol(matchLen - i), sequence.GetSymbol(position + i))); if (remSim / matchLen < repeat.Threshold) return (null); } repeat.Matched.Set(sequence, position, matchLen, matchSeq.Strand, remSim / matchLen); return repeat.Matched; }
public void TestMatch2 () { Sequence seq = new Sequence( AlphabetType.DNA, "taaaccc" ); SeriesBest series = new SeriesBest(); Match match; series.Add( new Motif( "motif1", AlphabetType.DNA, "ta", 0.5 ) ); series.Add( new Gap( "gap", 1, 3, 1, new double[] { 1, 2, 1 }, 0.0 ) ); series.Add( new Motif( "motif2", AlphabetType.DNA, "cc", 0.5 ) ); match = seq.SearchBest( 1, seq.Length, series ); Assert.AreEqual( "taaacc", match.Letters() ); Assert.AreEqual( 1.00, match.Similarity, 1e-2 ); }