/// <summary> /// Pairwise alignment of two sequences using an affine gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with an affine gap penalty. /// </summary> /// <param name="similarityMatrix">Scoring matrix.</param> /// <param name="gapOpenPenalty">Gap open penalty (by convention, use a negative number for this.)</param> /// <param name="gapExtensionPenalty">Gap extension penalty (by convention, use a negative number for this.)</param> /// <param name="aInput">First input sequence.</param> /// <param name="bInput">Second input sequence.</param> /// <returns>A list of sequence alignments.</returns> public IList <IPairwiseSequenceAlignment> Align( SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtensionPenalty, ISequence aInput, ISequence bInput) { // Initialize and perform validations for alignment // In addition, initialize gap extension penalty. SimpleAlignPrimer(similarityMatrix, gapOpenPenalty, aInput, bInput); _gapExtensionCost = gapExtensionPenalty; FillMatrixAffine(); ////DumpF(); // Writes matrix to application log, used for development and testing List <byte[]> alignedSequences; List <int> offsets; List <int> startOffsets; List <int> endOffsets; List <int> insertions; int optScore = Traceback(out alignedSequences, out offsets, out startOffsets, out endOffsets, out insertions); return(CollateResults(aInput, bInput, alignedSequences, offsets, optScore, startOffsets, endOffsets, insertions)); }
public void testBug3() { //Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"TestUtils\122_raw.afa"; MoleculeType mt = MoleculeType.DNA; IList <ISequence> orgSequences = parser.Parse(filepath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = null; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new InvalidDataException("Invalid molecular type"); } //DateTime startTime = DateTime.Now; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Assert.IsNotNull(msa.AlignedSequences); ((FastaParser)parser).Dispose(); }
/// <summary> /// Pairwise alignment of two sequences using an affine gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with an affine gap penalty. /// </summary> /// <param name="localSimilarityMatrix">Scoring matrix.</param> /// <param name="gapOpenPenalty">Gap open penalty (by convention, use a negative number for this.).</param> /// <param name="gapExtensionPenalty">Gap extension penalty (by convention, use a negative number for this.).</param> /// <param name="inputA">First input sequence.</param> /// <param name="inputB">Second input sequence.</param> /// <returns>A list of sequence alignments.</returns> public IList <IPairwiseSequenceAlignment> Align( SimilarityMatrix localSimilarityMatrix, int gapOpenPenalty, int gapExtensionPenalty, ISequence inputA, ISequence inputB) { // Initialize and perform validations for alignment // In addition, initialize gap extension penalty. SimpleAlignPrimer(localSimilarityMatrix, gapOpenPenalty, inputA, inputB); GapExtensionCost = gapExtensionPenalty; DynamicProgrammingPairwiseAlignerJob alignerJob = this.CreateAffineAlignmentJob(inputA, inputB); IList <IPairwiseSequenceAlignment> result = alignerJob.Align(); foreach (IPairwiseSequenceAlignment alignment in result) { foreach (PairwiseAlignedSequence sequence in alignment.AlignedSequences) { AddSimpleConsensusToResult(sequence); } } return(result); }
public void testBug() { List <ISequence> sequences = new List <ISequence>(); ISequence seq1 = new Sequence(Alphabets.Protein, "MQEPQSELNIDPPLSQETFSELWNLLPENNVLSSELCPAVDELLLPESVVNWLDEDSDDAPRMPATSAP"); ISequence seq2 = new Sequence(Alphabets.Protein, "PLSQETFSDLWNLLPENNLLSSELSAPVDDLLPYTDVATWLDECPNEAPQMPEPSAPAAPPPATPAPATSWPLSSFVPSQKTYPGNYGFRLGF"); ISequence seq3 = new Sequence(Alphabets.Protein, "MEPSSETGMDPPLSQETFEDLWSLLPDPLQTVTCRLDNLSEFPDYPLAADMSVLQEGLMGNAVPTVTSCAPSTDDYAGKYGLQLDFQQNGTAKS"); ISequence seq4 = new Sequence(Alphabets.Protein, "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLS"); ISequence seq5 = new Sequence(Alphabets.Protein, "MEESQAELGVEPPLSQETFSDLWKLLPENNLLSSELSPAVDDLLLSPEDVANWLDERPDEAPQMPEPPAPAAPTPAAPAPATSWPLSSFVPSQK"); ISequence seq6 = new Sequence(Alphabets.Protein, "MTAMEESQSDISLELPLSQETFSGLWKLLPPEDILPSPHCMDDLLLPQDVEEFFEGPSEALRVSGAPAAQDPVTETPGPVAPAPATPWPLSSFVPSQKTYQGNYGFHLGFLQ"); ISequence seq7 = new Sequence(Alphabets.Protein, "FRLGFLHSGTAKSVTWTYSPLLNKLFCQLAKTCPVQLWVSSPPPPNTCVRAMAIYKKSEFVTEVVRRCPHHERCSDSSDGLAPPQHLIRVEGNLRAKYLDDRNTFRHSVV"); sequences.Add(seq1); sequences.Add(seq2); sequences.Add(seq3); sequences.Add(seq4); sequences.Add(seq5); sequences.Add(seq6); sequences.Add(seq7); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner(sequences, 2, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.WeightedEuclideanDistance, sm, -8, -1, 2, 16); Assert.IsNotNull(msa.AlignedSequences); }
public void testBug2() { //Test on DNA benchmark dataset string filepath = @"TestUtils\122_raw.afa".TestDir(); FastAParser parser = new FastAParser(); IList <ISequence> orgSequences = parser.Parse(filepath).ToList(); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Assert.IsNotNull(msa.AlignedSequences); }
private (int I, int J) FindMostSimilarNodes(SimilarityMatrix matrix) { double bestSimilarity = matrix[0, 1]; int bestI = 0; int bestJ = 1; for (int i = 0; i < matrix.Dimension; i++) { for (int j = 0; j < matrix.Dimension; j++) { if (i == j) { continue; } if (matrix[i, j] > bestSimilarity) { bestSimilarity = matrix[i, j]; bestI = i; bestJ = j; } } } return(bestI, bestJ); }
/// <summary> /// Construct a progressive aligner /// </summary> /// <param name="profileAlignerName">ProfileAlignerNames member</param> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="gapOpenPenalty">negative gapOpenPenalty</param> /// <param name="gapExtendPenalty">negative gapExtendPenalty</param> public ProgressiveAligner(ProfileAlignerNames profileAlignerName, SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtendPenalty) { // Get ProfileAligner ready switch (profileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): _profileAligner = new NeedlemanWunschProfileAlignerSerial(); break; case (ProfileAlignerNames.SmithWatermanProfileAligner): _profileAligner = new SmithWatermanProfileAlignerSerial(); break; default: throw new Exception("Invalid profile aligner name"); } _profileAligner.SimilarityMatrix = similarityMatrix; _profileAligner.GapOpenCost = gapOpenPenalty; _profileAligner.GapExtensionCost = gapExtendPenalty; _alignedSequences = new List <ISequence>(); }
/// <summary> /// /// </summary> /// <param name="similarityMatrix"></param> /// <param name="gapOpenCost"></param> /// <param name="gapExtensionCost"></param> /// <param name="aInput"></param> /// <param name="bInput"></param> protected DynamicProgrammingPairwiseAlignerJob(SimilarityMatrix similarityMatrix, int gapOpenCost, int gapExtensionCost, ISequence aInput, ISequence bInput) { if (aInput == null) { throw new ArgumentNullException("aInput"); } aInput.Alphabet.TryGetDefaultGapSymbol(out gapCode); // Set Gap Penalty and Similarity Matrix this.gapOpenCost = gapOpenCost; this.gapExtensionCost = gapExtensionCost; // note that _gapExtensionCost is not used for linear gap penalty this.similarityMatrix = similarityMatrix; // Convert input strings to 0-based int arrays using similarity matrix mapping this.sequenceI = aInput; this.sequenceJ = bInput; colHeight = sequenceI.Count + 1; rowWidth = sequenceJ.Count + 1; gridCols = (int)((rowWidth - 1) / gridStride) + 1; gridRows = (int)((colHeight - 1) / gridStride) + 1; }
static void Main(string[] args) { //This is a simple driver program Console.WriteLine("Testing:Driver program for Affinity Propagation clustering algorithm."); var rnd = new ToyDataset(); Stopwatch s = new Stopwatch(); var data1 = rnd.DataSet(); var sim = SimilarityMatrix.SparseSimilarityMatrix(data1); Console.WriteLine($"Data size:{data1.Length} ; SimilarityMatrix size:{sim.Length}"); Console.WriteLine($"Start at:{DateTime.Now}"); s.Start(); try { AffinityPropagation model = new AffinityPropagation(data1.Length); var centers = model.Fit(sim); Print(centers); ClusterUtility.AssignClusterCenters(data1, centers); int[] centers_index = new int[model.Centers.Count]; model.Centers.CopyTo(centers_index); var t = ClusterUtility.GroupClusters(data1, centers, centers_index); //print the clusters (grouped) Print(t); } catch (Exception e) { Console.WriteLine($"\a{e.Message}"); } s.Stop(); Console.WriteLine($"\nEnding at:{DateTime.Now}"); Console.WriteLine($"Ellapsed time: {s.ElapsedMilliseconds} ms | {s.Elapsed.TotalSeconds} s | {s.Elapsed.TotalMinutes} m"); }
/// <summary> /// Constructor for NeedlemanWunschProfile Aligner. /// Sets default similarity matrix, gap penalties, and profile function name. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileScoreFunctionName">enum: profileScoreFunctionName</param> /// <param name="gapOpenPenalty">negative integer</param> /// <param name="gapExtensionPenalty">negative integer</param> /// <param name="numberOfPartitions">positive integer</param> public NeedlemanWunschProfileAlignerParallel(SimilarityMatrix similarityMatrix, ProfileScoreFunctionNames profileScoreFunctionName, int gapOpenPenalty, int gapExtensionPenalty, int numberOfPartitions) : base(similarityMatrix, profileScoreFunctionName, gapOpenPenalty, gapExtensionPenalty, numberOfPartitions) { }
public void IupacNASimilarityMatrices() { string filename = @"TestUtils\SimilarityMatrices\TestIupacNA.txt"; SimilarityMatrix sm = new SimilarityMatrix(filename); Assert.IsNotNull(sm.Matrix); }
/// <summary> /// Constructor for SmithWatermanProfileAligner. /// Sets default similarity matrix, gap penalties, and profile function name. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="profileScoreFunctionName">enum: profileScoreFunctionName</param> /// <param name="gapOpenPenalty">negative integer</param> /// <param name="gapExtensionPenalty">negative integer</param> /// <param name="numberOfPartitions">positive integer</param> public SmithWatermanProfileAlignerSerial(SimilarityMatrix similarityMatrix, ProfileScoreFunctionNames profileScoreFunctionName, int gapOpenPenalty, int gapExtensionPenalty, int numberOfPartitions) : base(similarityMatrix, profileScoreFunctionName, gapOpenPenalty, gapExtensionPenalty, numberOfPartitions) { }
public void TestNeedlemanWunschProfileAligner() { ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-"); Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>(); for (int i = 0; i < templateSequence.Count; ++i) { itemSet.Add(templateSequence[i], i); } Profiles.ItemSet = itemSet; IProfileAligner profileAligner = new NeedlemanWunschProfileAligner(); SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrices.AmbiguousDna); int gapOpenPenalty = -8; int gapExtendPenalty = -1; profileAligner.SimilarityMatrix = similarityMatrix; profileAligner.GapOpenCost = gapOpenPenalty; profileAligner.GapExtensionCost = gapExtendPenalty; ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]); IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]); profileAligner.Align(profileAlignmentA, profileAlignmentB); List <int> eStringSubtree = profileAligner.GenerateEString(profileAligner.AlignedA); List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB); List <ISequence> alignedSequences = new List <ISequence>(); ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]); alignedSequences.Add(seq); seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]); alignedSequences.Add(seq); float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty); ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT"); ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"); Assert.AreEqual(expectedSeqA.ToString(), alignedSequences[0].ToString()); Assert.AreEqual(expectedSeqB.ToString(), alignedSequences[1].ToString()); Assert.AreEqual(40, profileScore); }
/// <summary> /// Initializes a new instance of the DynamicProgrammingPairwiseAligner class. /// Constructor for all the pairwise aligner (NeedlemanWunsch, SmithWaterman, Overlap). /// Sets default similarity matrix and gap penalties. /// Users will typically reset these using parameters specific to their particular sequences and needs. /// </summary> protected DynamicProgrammingPairwiseAligner() { // Set default similarity matrix and gap penalty. // User will typically choose their own parameters, these defaults are reasonable for many cases. // Molecule type is set to protein, since this will also work for DNA and RNA in the // special case of a diagonal similarity matrix. this.InternalSimilarityMatrix = new DiagonalSimilarityMatrix(2, -2); GapOpenCost = -8; GapExtensionCost = -1; }
public Base(int maxNeighbours, string redisPrefix, IRedisClient redisClient) { RedisClient = redisClient; MaxNeighbours = maxNeighbours; RedisPrefix = redisPrefix; InputMatrices = new Dictionary<string, InputMatrix>(); SimilarityMatrix = new SimilarityMatrix( new Options {Key = "similarities", MaxNeighbours = MaxNeighbours, RedisPrefix = RedisPrefix}, redisClient); }
public void TestStartNoGapAlgorithm(string matrix, string X, string Y, Func <int, double> penaltyFunc, double evaluation, string resultX, string resultY) { var similarityMatrix = new SimilarityMatrix(matrix); var withGapPenalty = new WithGapPenalty(similarityMatrix, penaltyFunc); var tuple = withGapPenalty.StartNoGapAlgorithm(X, Y); Assert.AreEqual(evaluation, tuple.Item1); Assert.AreEqual(resultX, tuple.Item2.Item1); Assert.AreEqual(resultY, tuple.Item2.Item2); }
public void TestStartAlgorithm(string matrix, string X, string Y, string resultX, string resultY) { //string matrix = "A G T C\n0 -2 -2 -2 -2\n-2 2 -1 -1 -1\n-2 -1 2 -1 -1\n-2 -1 -1 2 -1\n-2 -1 -1 -1 2"; SimilarityMatrix similarityMatrix = new SimilarityMatrix(matrix); Hirschberg hirschberg = new Hirschberg(similarityMatrix); Tuple <string, string> tuple = hirschberg.StartAlgorithm(X, Y); Assert.AreEqual(resultX, tuple.Item1); Assert.AreEqual(resultY, tuple.Item2); }
public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName) { Mis = (DegreeCentralityLexRankParameters)mySummaryParameters; Debug.WriteLine("Starting execution of DegreeCentralityLexRank."); var startTime = DateTime.Now; var myTDM = new TDM(newsDirectory, Mis.MyTDMParameters, cacheFileName); var normalized = ((DegreeCentralityLexRankParameters)mySummaryParameters).SimilarityNormalized; var mySimilarityMatrix = new SimilarityMatrix(myTDM, cacheFileName, normalized); var totalPhrases = myTDM.PhrasesList.Count; var myCosineSimilarities = mySimilarityMatrix.CosineSimilarityBetweenPhrases; var weights = new double[totalPhrases]; for (var i = 0; i < totalPhrases; i++) { var sum = 0.0d; for (var j = 0; j < totalPhrases; j++) { if (myCosineSimilarities[i][j] > Mis.DegreeCentrality) { sum++; } } weights[i] = sum; } var phrasesList = new List <PositionValue>(); // Save candidate phrases with their weight (relevance) for (var i = 0; i < totalPhrases; i++) { phrasesList.Add(new PositionValue(i, weights[i])); } //phrasesList.Sort((x,y) => -1 * x.Value.CompareTo(y.Value)); // The phrases are ordered by their weight phrasesList.Sort(delegate(PositionValue x, PositionValue y) { if (Math.Abs(x.Value - y.Value) < 1e-07) { return(myTDM.PhrasesList[x.Position].PositionInDocument.CompareTo(myTDM.PhrasesList[y.Position].PositionInDocument)); } return(-1 * x.Value.CompareTo(y.Value)); }); TextSummary = Util.SummarizeByCompressionRatio(myTDM, phrasesList, mySummaryParameters.MySummaryType, Mis.MaximumLengthOfSummaryForRouge, out SummaryByPhrases); var fin = DateTime.Now - startTime; Debug.WriteLine("Minutes of DegreeCentralityLexRank: " + fin.TotalMinutes); }
/// <summary> /// Pairwise alignment of two sequences using an affine gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with an affine gap penalty. /// </summary> /// <param name="localSimilarityMatrix">Scoring matrix.</param> /// <param name="gapOpenPenalty">Gap open penalty (by convention, use a negative number for this.).</param> /// <param name="gapExtensionPenalty">Gap extension penalty (by convention, use a negative number for this.).</param> /// <param name="inputA">First input sequence.</param> /// <param name="inputB">Second input sequence.</param> /// <returns>A list of sequence alignments.</returns> public IList <IPairwiseSequenceAlignment> Align( SimilarityMatrix localSimilarityMatrix, int gapOpenPenalty, int gapExtensionPenalty, ISequence inputA, ISequence inputB) { this.SimilarityMatrix = localSimilarityMatrix; this.GapOpenCost = gapOpenPenalty; this.GapExtensionCost = gapExtensionPenalty; return(DoAlign(inputA, inputB, true)); }
public void PairwiseOverlapProteinSeqAffineGapUseEarth() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; overlap.UseEARTHToFillMatrix = true; overlap.GapExtensionCost = -1; IList <IPairwiseSequenceAlignment> result = overlap.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Affine; Matrix {1}; GapOpenCost {2}; GapExtenstionCost {3}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost, overlap.GapExtensionCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA"); alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX"); alignedSeq.Score = 25; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName) { MyParameters = (FSPParameters)mySummaryParameters; MyTDM = new TDM(newsDirectory, MyParameters.MyTDMParameters, cacheFileName); MyExternalMDS = new SimilarityMatrix(MyTDM, cacheFileName); SolutionSize = MyTDM.PhrasesList.Count; var phrasesList = Execute(); TextSummary = Util.SummarizeByCompressionRatio(MyTDM, phrasesList, mySummaryParameters.MySummaryType, MyParameters.MaximumLengthOfSummaryForRouge, out SummaryByPhrases); }
public void SmithWatermanProteinSeqSimpleGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; SmithWatermanAligner sw = new SmithWatermanAligner(); sw.SimilarityMatrix = sm; sw.GapOpenCost = gapPenalty; IList <IPairwiseSequenceAlignment> result = sw.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", sw.Name, sw.SimilarityMatrix.Name, sw.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void NeedlemanWunschProteinSeqAffineGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; NeedlemanWunschAligner nw = new NeedlemanWunschAligner(); nw.SimilarityMatrix = sm; nw.GapOpenCost = gapPenalty; nw.GapExtensionCost = -1; IList <IPairwiseSequenceAlignment> result = nw.Align(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Affine; Matrix {1}; GapOpenCost {2}; GapExtenstionCost {3}", nw.Name, nw.SimilarityMatrix.Name, nw.GapOpenCost, nw.GapExtensionCost)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", result[0].PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", result[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", result[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", result[0].PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", result[0].PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", result[0].PairwiseAlignedSequences[0].Consensus)); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "HEAGAWGHE-E"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "---PAW-HEAE"); alignedSeq.Consensus = new Sequence(AmbiguousProteinAlphabet.Instance, "HEAXAWGHEAE"); alignedSeq.Score = 14; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Performs initializations and validations required /// before carrying out sequence alignment. /// Initializes only gap open penalty. Initialization for /// gap extension, if required, has to be done separately. /// </summary> /// <param name="similarityMatrix">Scoring matrix.</param> /// <param name="gapPenalty">Gap open penalty (by convention, use a negative number for this.).</param> /// <param name="inputA">First input sequence.</param> /// <param name="inputB">Second input sequence.</param> private void SimpleAlignPrimer(SimilarityMatrix similarityMatrix, int gapPenalty, ISequence inputA, ISequence inputB) { InitializeAlign(inputA); // Set Gap Penalty and Similarity Matrix GapOpenCost = gapPenalty; // note that _gapExtensionCost is not used for linear gap penalty this.InternalSimilarityMatrix = similarityMatrix; ValidateAlignInput(inputA, inputB); // throws exception if input not valid // Convert input strings to 0-based int arrays using similarity matrix mapping this.FirstInputSequence = inputA; this.SecondInputSequence = inputB; }
public void TestProgressiveAligner() { ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-"); Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>(); for (int i = 0; i < templateSequence.Count; ++i) { itemSet.Add(templateSequence[i], i); } Profiles.ItemSet = itemSet; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrices.AmbiguousDna); int gapOpenPenalty = -8; int gapExtendPenalty = -1; int kmerLength = 3; ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGACAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringSerial(kmerDistanceMatrixGenerator.DistanceMatrix); BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering); IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT"); ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"); ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---"); Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString()); Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString()); Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString()); }
/// <summary> /// Performs initializations and validations required /// before carrying out sequence alignment. /// Initializes only gap open penalty. Initialization for /// gap extension, if required, has to be done seperately. /// </summary> /// <param name="similarityMatrix">Scoring matrix.</param> /// <param name="gapPenalty">Gap open penalty (by convention, use a negative number for this.)</param> /// <param name="aInput">First input sequence.</param> /// <param name="bInput">Second input sequence.</param> private void SimpleAlignPrimer(SimilarityMatrix similarityMatrix, int gapPenalty, ISequence aInput, ISequence bInput) { InitializeAlign(aInput); ResetSpecificAlgorithmMemberVariables(); // Set Gap Penalty and Similarity Matrix _gapOpenCost = gapPenalty; // note that _gapExtensionCost is not used for simple gap penalty _similarityMatrix = similarityMatrix; ValidateAlignInput(aInput, bInput); // throws exception if input not valid // Convert input strings to 0-based int arrays using similarity matrix mapping _a = similarityMatrix.ToByteArray(aInput.ToString()); _b = similarityMatrix.ToByteArray(bInput.ToString()); }
/// <summary> /// The execution method for the activity. /// </summary> /// <param name="executionContext">The execution context.</param> /// <returns>The execution status.</returns> protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { if (MatrixName.Equals("Blosum45", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum45); } else if (MatrixName.Equals("Blosum50", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); } else if (MatrixName.Equals("Blosum62", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); } else if (MatrixName.Equals("Blosum80", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum80); } else if (MatrixName.Equals("Blosum90", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum90); } else if (MatrixName.Equals("Pam250", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Pam250); } else if (MatrixName.Equals("Pam30", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Pam30); } else if (MatrixName.Equals("Pam70", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Pam70); } else if (MatrixName.Equals("AmbiguousDna", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); } else if (MatrixName.Equals("AmbiguousRna", StringComparison.InvariantCultureIgnoreCase)) { Matrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); } return(ActivityExecutionStatus.Closed); }
/// <summary> /// /// </summary> public NucmerPairwiseAligner() { // Set the default Similarity Matrix SimilarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.DiagonalScoreMatrix); // Set the defaults GapOpenCost = DefaultGapOpenCost; GapExtensionCost = DefaultGapExtensionCost; LengthOfMUM = DefaultLengthOfMUM; // Set the ClusterBuilder properties to defaults FixedSeparation = ClusterBuilder.DefaultFixedSeparation; MaximumSeparation = ClusterBuilder.DefaultMaximumSeparation; MinimumScore = ClusterBuilder.DefaultMinimumScore; SeparationFactor = ClusterBuilder.DefaultSeparationFactor; BreakLength = ModifiedSmithWaterman.DefaultBreakLength; }
public static void BasicTest() { BidirectionalGraph gA = new BidirectionalGraph(false); BidirectionalGraph gB = new BidirectionalGraph(false); IVertex a1 = gA.AddVertex(); IVertex a2 = gA.AddVertex(); IVertex a3 = gA.AddVertex(); IVertex a4 = gA.AddVertex(); gA.AddEdge(a1,a2); gA.AddEdge(a2,a3); gA.AddEdge(a3,a1); gA.AddEdge(a3,a4); SimilarityMatrix similarity = new SimilarityMatrix(gA); WriteMatrix(similarity.Matrix); }
/// <summary> /// Pairwise alignment of two sequences using a linear gap penalty. The various algorithms in derived classes (NeedlemanWunsch, /// SmithWaterman, and PairwiseOverlap) all use this general engine for alignment with a linear gap penalty. /// </summary> /// <param name="localSimilarityMatrix">Scoring matrix.</param> /// <param name="gapPenalty">Gap penalty (by convention, use a negative number for this.).</param> /// <param name="inputA">First input sequence.</param> /// <param name="inputB">Second input sequence.</param> /// <returns>A list of sequence alignments.</returns> public IList <IPairwiseSequenceAlignment> AlignSimple(SimilarityMatrix localSimilarityMatrix, int gapPenalty, ISequence inputA, ISequence inputB) { // Initialize and perform validations for simple alignment SimpleAlignPrimer(localSimilarityMatrix, gapPenalty, inputA, inputB); DynamicProgrammingPairwiseAlignerJob alignerJob = this.CreateSimpleAlignmentJob(inputA, inputB); IList <IPairwiseSequenceAlignment> result = alignerJob.Align(); foreach (IPairwiseSequenceAlignment alignment in result) { foreach (PairwiseAlignedSequence sequence in alignment.AlignedSequences) { AddSimpleConsensusToResult(sequence); } } return(result); }
public void TestMuscleMultipleSequenceAlignment() { ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-"); Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>(); for (int i = 0; i < templateSequence.Count; ++i) { itemSet.Add(templateSequence[i], i); } Profiles.ItemSet = itemSet; SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrices.AmbiguousDna); int gapOpenPenalty = -8; int gapExtendPenalty = -1; int kmerLength = 3; ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGACAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclieanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Aaverage; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; MuscleMultipleSequenceAlignment msa = new MuscleMultipleSequenceAlignment (sequences, MoleculeType.DNA, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty); ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT"); ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"); ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---"); Assert.AreEqual(expectedSeqA.ToString(), msa.AlignedSequences[0].ToString()); Assert.AreEqual(expectedSeqB.ToString(), msa.AlignedSequences[1].ToString()); Assert.AreEqual(expectedSeqC.ToString(), msa.AlignedSequences[2].ToString()); Assert.AreEqual(46, msa.AlignmentScore); }
public static void BasicTest() { BidirectionalGraph gA = new BidirectionalGraph(false); BidirectionalGraph gB = new BidirectionalGraph(false); IVertex a1 = gA.AddVertex(); IVertex a2 = gA.AddVertex(); IVertex a3 = gA.AddVertex(); IVertex a4 = gA.AddVertex(); gA.AddEdge(a1, a2); gA.AddEdge(a2, a3); gA.AddEdge(a3, a1); gA.AddEdge(a3, a4); SimilarityMatrix similarity = new SimilarityMatrix(gA); WriteMatrix(similarity.Matrix); }