public void ValidateNUCmerGetClusters() { // NOTE: Nigel ran this test with the same data through mmummer and mgaps and got the same result. // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.FilePathNode); // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.SearchSequenceFilePathNode); FastAParser parser = new FastAParser(); IEnumerable <ISequence> seqs1 = parser.Parse(filePath); IEnumerable <ISequence> seqs2 = parser.Parse(queryFilePath); var nuc = new Bio.Algorithms.Alignment.NUCmer(seqs1.First()) { LengthOfMUM = 5, MinimumScore = 0, }; var clusts = nuc.GetClusters(seqs2.First()); string clustCount1 = this.utilityObj.xmlUtil.GetTextValue( Constants.MediumSizeSequenceNodeName, Constants.ClustCount1Node); Assert.AreEqual(clustCount1, clusts.Count.ToString(CultureInfo.InvariantCulture)); }
/// <summary> /// Gets the Delta for list of query sequences. /// </summary> /// <param name="referenceSequence">The reference sequence.</param> /// <param name="originalQuerySequences">The query sequence.</param> /// <returns>Returns list of IEnumerable Delta Alignment.</returns> private IEnumerable<IEnumerable<DeltaAlignment>> GetDelta(IEnumerable<ISequence> referenceSequence, IEnumerable<ISequence> originalQuerySequences) { IEnumerable<ISequence> querySequences = Forward ? originalQuerySequences : (Reverse ? ReverseComplementSequenceList(originalQuerySequences) : AddReverseComplementsToSequenceList(originalQuerySequences)); foreach (ISequence refSeq in referenceSequence) { NUCmer nucmer = new NUCmer(refSeq) { FixedSeparation = FixedSeparation, BreakLength = BreakLength, LengthOfMUM = MinMatch, MaximumSeparation = MaxGap, MinimumScore = MinCluster, SeparationFactor = (float) DiagFactor }; foreach (ISequence qs in querySequences) { _queryCount++; yield return nucmer.GetDeltaAlignments(qs, !MaxMatch, qs.IsMarkedAsReverseComplement()); } } }
/// <summary> /// Returns the cluster. /// </summary> /// <param name="referenceSequence">The Reference sequences.</param> /// <param name="originalQuerySequences">The Query sequences.</param> /// <returns>Returns list of clusters.</returns> private IList<List<IList<Cluster>>> GetCluster(IEnumerable<ISequence> referenceSequence, IEnumerable<ISequence> originalQuerySequences) { var clusters = new List<List<IList<Cluster>>>(); var clusters1 = new List<IList<Cluster>>(); IEnumerable<ISequence> querySequences = Forward ? originalQuerySequences : (Reverse ? ReverseComplementSequenceList(originalQuerySequences) : AddReverseComplementsToSequenceList(originalQuerySequences)); _queryCount += querySequences.Count(); foreach (var sequence in referenceSequence) { NUCmer nucmer = new NUCmer(sequence) { FixedSeparation = FixedSeparation, BreakLength = BreakLength, LengthOfMUM = MinMatch, MaximumSeparation = MaxGap, MinimumScore = MinCluster, SeparationFactor = (float) DiagFactor }; clusters1.AddRange(querySequences.Select(qs => nucmer.GetClusters(qs, !MaxMatch, qs.IsMarkedAsReverseComplement()))); } clusters.Add(clusters1); return clusters; }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. /// </summary> /// <param name="referenceSequenceList">Reference sequence.</param> /// <param name="querySequenceList">List of input sequences.</param> /// <returns>A list of sequence alignment.</returns> private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> querySequenceList) { ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet); IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment sequenceAlignment; IList <PairwiseAlignedSequence> alignments; List <DeltaAlignment> deltas = new List <DeltaAlignment>(); foreach (ISequence refSequence in referenceSequenceList) { this.nucmerAlgo = new NUCmer((Sequence)refSequence); if (GapOpenCost != DefaultGapOpenCost) { this.nucmerAlgo.GapOpenCost = GapOpenCost; } if (GapExtensionCost != DefaultGapExtensionCost) { this.nucmerAlgo.GapExtensionCost = GapExtensionCost; } if (LengthOfMUM != DefaultLengthOfMUM) { this.nucmerAlgo.LengthOfMUM = LengthOfMUM; } // Set the ClusterBuilder properties to defaults if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) { this.nucmerAlgo.FixedSeparation = FixedSeparation; } if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) { this.nucmerAlgo.MaximumSeparation = MaximumSeparation; } if (MinimumScore != ClusterBuilder.DefaultMinimumScore) { this.nucmerAlgo.MinimumScore = MinimumScore; } if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) { this.nucmerAlgo.SeparationFactor = SeparationFactor; } if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) { this.nucmerAlgo.BreakLength = BreakLength; } this.nucmerAlgo.ConsensusResolver = ConsensusResolver; if (SimilarityMatrix != null) { this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix; } foreach (ISequence querySequence in querySequenceList) { IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence); deltas.AddRange(deltaAlignment); } } if (deltas.Count > 0) { ISequence concatReference = referenceSequenceList.ElementAt(0); //// concat all the sequences into one sequence if (referenceSequenceList.Count() > 1) { concatReference = ConcatSequence(referenceSequenceList); } foreach (ISequence querySequence in querySequenceList) { List <DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList(); sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence); // Convert delta alignments to sequence alignments alignments = ConvertDeltaToAlignment(qDelta); if (alignments.Count > 0) { foreach (PairwiseAlignedSequence align in alignments) { // Calculate the score of alignment align.Score = CalculateScore( align.FirstSequence, align.SecondSequence); // Make Consensus align.Consensus = MakeConsensus( align.FirstSequence, align.SecondSequence); sequenceAlignment.PairwiseAlignedSequences.Add(align); } } results.Add(sequenceAlignment); } } return(results); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. /// </summary> /// <param name="referenceSequenceList">Reference sequence.</param> /// <param name="originalQuerySequences">List of input sequences.</param> /// <returns>A list of sequence alignment.</returns> private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> originalQuerySequences) { ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet); IEnumerable <ISequence> querySequenceList = ForwardOnly ? originalQuerySequences : (ReverseOnly ? ReverseComplementSequenceList(originalQuerySequences) : AddReverseComplementsToSequenceList(originalQuerySequences)); IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>(); var deltas = new List <DeltaAlignment>(); foreach (ISequence refSequence in referenceSequenceList) { this.nucmerAlgo = new NUCmer(refSequence); if (GapOpenCost != DefaultGapOpenCost) { this.nucmerAlgo.GapOpenCost = GapOpenCost; } if (GapExtensionCost != DefaultGapExtensionCost) { this.nucmerAlgo.GapExtensionCost = GapExtensionCost; } if (LengthOfMUM != DefaultLengthOfMUM) { this.nucmerAlgo.LengthOfMUM = LengthOfMUM; } // Set the ClusterBuilder properties to defaults if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) { this.nucmerAlgo.FixedSeparation = FixedSeparation; } if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) { this.nucmerAlgo.MaximumSeparation = MaximumSeparation; } if (MinimumScore != ClusterBuilder.DefaultMinimumScore) { this.nucmerAlgo.MinimumScore = MinimumScore; } if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) { this.nucmerAlgo.SeparationFactor = SeparationFactor; } if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) { this.nucmerAlgo.BreakLength = BreakLength; } this.nucmerAlgo.ConsensusResolver = ConsensusResolver; if (SimilarityMatrix != null) { this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix; } foreach (ISequence querySequence in querySequenceList) { // Check for parameters that would prevent an alignment from being returned. if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore) { var msg = "Bad parameter settings for NucmerPairwiseAligner. " + "Tried to align a reference of length " + refSequence.Count.ToString() + " to a sequence of length " + querySequence.Count.ToString() + " while requiring a minimum score of MinimumScore = " + MinimumScore + ". This will prevent any alignments from being returned."; throw new ArgumentException(msg); } IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement()); deltas.AddRange(deltaAlignment); } } if (deltas.Count > 0) { ISequence concatReference = referenceSequenceList.ElementAt(0); //// concat all the sequences into one sequence if (referenceSequenceList.Count() > 1) { concatReference = ConcatSequence(referenceSequenceList); } foreach (ISequence querySequence in querySequenceList) { List <DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList(); IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence); // Convert delta alignments to sequence alignments IList <PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta); if (alignments.Count > 0) { foreach (PairwiseAlignedSequence align in alignments) { // Calculate the score of alignment align.Score = CalculateScore( align.FirstSequence, align.SecondSequence); // Make Consensus align.Consensus = MakeConsensus( align.FirstSequence, align.SecondSequence); sequenceAlignment.PairwiseAlignedSequences.Add(align); } } results.Add(sequenceAlignment); } } return(results); }
public void ValidateNUCmerGetClusters() { // NOTE: Nigel ran this test with the same data through mmummer and mgaps and got the same result. // Gets the reference sequence from the FastA file string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.FilePathNode); // Gets the query sequence from the FastA file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName, Constants.SearchSequenceFilePathNode); FastAParser parser = new FastAParser(); IEnumerable<ISequence> seqs1 = parser.Parse(filePath); IEnumerable<ISequence> seqs2 = parser.Parse(queryFilePath); var nuc = new Bio.Algorithms.Alignment.NUCmer(seqs1.First()) { LengthOfMUM = 5, MinimumScore = 0, }; var clusts = nuc.GetClusters(seqs2.First()); string clustCount1 = this.utilityObj.xmlUtil.GetTextValue( Constants.MediumSizeSequenceNodeName, Constants.ClustCount1Node); Assert.AreEqual(clustCount1, clusts.Count.ToString(CultureInfo.InvariantCulture)); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. /// </summary> /// <param name="referenceSequenceList">Reference sequence.</param> /// <param name="originalQuerySequences">List of input sequences.</param> /// <returns>A list of sequence alignment.</returns> private IEnumerable<IPairwiseSequenceAlignment> Alignment(IEnumerable<ISequence> referenceSequenceList, IEnumerable<ISequence> originalQuerySequences) { ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet); IEnumerable<ISequence> querySequenceList = ForwardOnly ? originalQuerySequences : (ReverseOnly ? ReverseComplementSequenceList(originalQuerySequences) : AddReverseComplementsToSequenceList(originalQuerySequences)); IList<IPairwiseSequenceAlignment> results = new List<IPairwiseSequenceAlignment>(); var deltas = new List<DeltaAlignment>(); foreach (ISequence refSequence in referenceSequenceList) { this.nucmerAlgo = new NUCmer(refSequence); if (GapOpenCost != DefaultGapOpenCost) this.nucmerAlgo.GapOpenCost = GapOpenCost; if (GapExtensionCost != DefaultGapExtensionCost) this.nucmerAlgo.GapExtensionCost = GapExtensionCost; if (LengthOfMUM != DefaultLengthOfMUM) this.nucmerAlgo.LengthOfMUM = LengthOfMUM; // Set the ClusterBuilder properties to defaults if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) this.nucmerAlgo.FixedSeparation = FixedSeparation; if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) this.nucmerAlgo.MaximumSeparation = MaximumSeparation; if (MinimumScore != ClusterBuilder.DefaultMinimumScore) this.nucmerAlgo.MinimumScore = MinimumScore; if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) this.nucmerAlgo.SeparationFactor = SeparationFactor; if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) this.nucmerAlgo.BreakLength = BreakLength; this.nucmerAlgo.ConsensusResolver = ConsensusResolver; if (SimilarityMatrix != null) this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix; foreach (ISequence querySequence in querySequenceList) { // Check for parameters that would prevent an alignment from being returned. if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore) { var msg = "Bad parameter settings for NucmerPairwiseAligner. " + "Tried to align a reference of length " + refSequence.Count.ToString() + " to a sequence of length " + querySequence.Count.ToString() + " while requiring a minimum score of MinimumScore = " + MinimumScore + ". This will prevent any alignments from being returned."; throw new ArgumentException(msg); } IEnumerable<DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement()); deltas.AddRange(deltaAlignment); } } if (deltas.Count > 0) { ISequence concatReference = referenceSequenceList.ElementAt(0); //// concat all the sequences into one sequence if (referenceSequenceList.Count() > 1) { concatReference = ConcatSequence(referenceSequenceList); } foreach (ISequence querySequence in querySequenceList) { List<DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList(); IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence); // Convert delta alignments to sequence alignments IList<PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta); if (alignments.Count > 0) { foreach (PairwiseAlignedSequence align in alignments) { // Calculate the score of alignment align.Score = CalculateScore( align.FirstSequence, align.SecondSequence); // Make Consensus align.Consensus = MakeConsensus( align.FirstSequence, align.SecondSequence); sequenceAlignment.PairwiseAlignedSequences.Add(align); } } results.Add(sequenceAlignment); } } return results; }
/// <summary> /// Aligns reads to reference genome using NUCmer. /// </summary> /// <param name="nodeName">Name of parent Node which contains the data in xml.</param> /// <param name="isFilePath">Represents sequence is in a file or not.</param> /// <returns>Delta i.e. output from NUCmer</returns> IList<IEnumerable<DeltaAlignment>> GetDeltaAlignment(string nodeName, bool isFilePath) { string[] referenceSequences = null; string[] searchSequences = null; List<ISequence> referenceSeqList = new List<ISequence>(); List<ISequence> searchSeqList = new List<ISequence>(); IList<IEnumerable<DeltaAlignment>> results = new List<IEnumerable<DeltaAlignment>>(); if (isFilePath) { // Gets the reference sequence from the FastA file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Comparative BVT : Successfully validated the File Path '{0}'.", filePath)); using (FastASequencePositionParser parser = new FastASequencePositionParser(filePath)) { IEnumerable<ISequence> referenceList = parser.Parse(); foreach (ISequence seq in referenceList) { referenceSeqList.Add(seq); } // Gets the query sequence from the FastA file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Comparative BVT : Successfully validated the File Path '{0}'.", queryFilePath)); using (FastASequencePositionParser queryParser = new FastASequencePositionParser(queryFilePath)) { IEnumerable<ISequence> querySeqList = queryParser.Parse(); foreach (ISequence seq in querySeqList) { searchSeqList.Add(seq); } } } } else { // Gets the reference & search sequences from the configurtion file referenceSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode); searchSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode); IAlphabet seqAlphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); for (int i = 0; i < referenceSequences.Length; i++) { ISequence referSeq = new Sequence(seqAlphabet, encodingObj.GetBytes(referenceSequences[i])); referenceSeqList.Add(referSeq); } string[] seqArray = searchSequences.ElementAt(0).Split(','); searchSeqList.AddRange(seqArray.Select(t => new Sequence(seqAlphabet, encodingObj.GetBytes(t))).Cast<ISequence>()); } foreach (ISequence reference in referenceSeqList) { NUCmer nucmerAligner = new NUCmer((Sequence)reference); string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FixedSeparationNode); string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MinimumScoreNode); string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeparationFactorNode); string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode); nucmerAligner.FixedSeparation = int.Parse(fixedSeparation); nucmerAligner.MinimumScore = int.Parse(minimumScore); nucmerAligner.SeparationFactor = int.Parse(separationFactor); nucmerAligner.LengthOfMUM = int.Parse(LengthOfMUM); foreach (ISequence querySeq in searchSeqList) { results.Add(nucmerAligner.GetDeltaAlignments(querySeq, true)); } } return results; }