Beispiel #1
0
        public void ValidateNUCmerGetClusters()
        {
            // NOTE: Nigel ran this test with the same data through mmummer and mgaps and got the same result.

            // Gets the reference sequence from the FastA file
            string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName,
                                                                   Constants.FilePathNode);

            // Gets the query sequence from the FastA file
            string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName,
                                                                        Constants.SearchSequenceFilePathNode);

            FastAParser             parser = new FastAParser();
            IEnumerable <ISequence> seqs1  = parser.Parse(filePath);
            IEnumerable <ISequence> seqs2  = parser.Parse(queryFilePath);
            var nuc = new Bio.Algorithms.Alignment.NUCmer(seqs1.First())
            {
                LengthOfMUM  = 5,
                MinimumScore = 0,
            };
            var    clusts      = nuc.GetClusters(seqs2.First());
            string clustCount1 = this.utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeSequenceNodeName, Constants.ClustCount1Node);

            Assert.AreEqual(clustCount1, clusts.Count.ToString(CultureInfo.InvariantCulture));
        }
Beispiel #2
0
        /// <summary>
        /// Gets the Delta for list of query sequences.
        /// </summary>
        /// <param name="referenceSequence">The reference sequence.</param>
        /// <param name="originalQuerySequences">The query sequence.</param>
        /// <returns>Returns list of IEnumerable Delta Alignment.</returns>
        private IEnumerable<IEnumerable<DeltaAlignment>> GetDelta(IEnumerable<ISequence> referenceSequence, IEnumerable<ISequence> originalQuerySequences)
        {
            IEnumerable<ISequence> querySequences =
                Forward ? originalQuerySequences
                : (Reverse
                    ? ReverseComplementSequenceList(originalQuerySequences)
                    : AddReverseComplementsToSequenceList(originalQuerySequences));

            foreach (ISequence refSeq in referenceSequence)
            {
                NUCmer nucmer = new NUCmer(refSeq) 
                {
                    FixedSeparation = FixedSeparation,
                    BreakLength = BreakLength,
                    LengthOfMUM = MinMatch,
                    MaximumSeparation = MaxGap,
                    MinimumScore = MinCluster,
                    SeparationFactor = (float) DiagFactor
                };

                foreach (ISequence qs in querySequences)
                {
                    _queryCount++;
                    yield return nucmer.GetDeltaAlignments(qs, !MaxMatch, qs.IsMarkedAsReverseComplement());
                }
            }
        }
Beispiel #3
0
        /// <summary>
        /// Returns the cluster.
        /// </summary>
        /// <param name="referenceSequence">The Reference sequences.</param>
        /// <param name="originalQuerySequences">The Query sequences.</param>
        /// <returns>Returns list of clusters.</returns>
        private IList<List<IList<Cluster>>> GetCluster(IEnumerable<ISequence> referenceSequence, IEnumerable<ISequence> originalQuerySequences)
        {
            var clusters = new List<List<IList<Cluster>>>();
            var clusters1 = new List<IList<Cluster>>();

            IEnumerable<ISequence> querySequences = 
                Forward ? originalQuerySequences
                        : (Reverse
                            ? ReverseComplementSequenceList(originalQuerySequences)
                            : AddReverseComplementsToSequenceList(originalQuerySequences));

            _queryCount += querySequences.Count();

            foreach (var sequence in referenceSequence)
            {
                NUCmer nucmer = new NUCmer(sequence)
                {
                    FixedSeparation = FixedSeparation,
                    BreakLength = BreakLength,
                    LengthOfMUM = MinMatch,
                    MaximumSeparation = MaxGap,
                    MinimumScore = MinCluster,
                    SeparationFactor = (float) DiagFactor
                };

                clusters1.AddRange(querySequences.Select(qs => nucmer.GetClusters(qs, !MaxMatch, qs.IsMarkedAsReverseComplement())));
            }

            clusters.Add(clusters1);

            return clusters;
        }
Beispiel #4
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="querySequenceList">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> querySequenceList)
        {
            ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         sequenceAlignment;
            IList <PairwiseAlignedSequence>    alignments;

            List <DeltaAlignment> deltas = new List <DeltaAlignment>();

            foreach (ISequence refSequence in referenceSequenceList)
            {
                this.nucmerAlgo = new NUCmer((Sequence)refSequence);

                if (GapOpenCost != DefaultGapOpenCost)
                {
                    this.nucmerAlgo.GapOpenCost = GapOpenCost;
                }
                if (GapExtensionCost != DefaultGapExtensionCost)
                {
                    this.nucmerAlgo.GapExtensionCost = GapExtensionCost;
                }
                if (LengthOfMUM != DefaultLengthOfMUM)
                {
                    this.nucmerAlgo.LengthOfMUM = LengthOfMUM;
                }

                // Set the ClusterBuilder properties to defaults
                if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation)
                {
                    this.nucmerAlgo.FixedSeparation = FixedSeparation;
                }
                if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation)
                {
                    this.nucmerAlgo.MaximumSeparation = MaximumSeparation;
                }
                if (MinimumScore != ClusterBuilder.DefaultMinimumScore)
                {
                    this.nucmerAlgo.MinimumScore = MinimumScore;
                }
                if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor)
                {
                    this.nucmerAlgo.SeparationFactor = SeparationFactor;
                }
                if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength)
                {
                    this.nucmerAlgo.BreakLength = BreakLength;
                }

                this.nucmerAlgo.ConsensusResolver = ConsensusResolver;
                if (SimilarityMatrix != null)
                {
                    this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix;
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence);
                    deltas.AddRange(deltaAlignment);
                }
            }

            if (deltas.Count > 0)
            {
                ISequence concatReference = referenceSequenceList.ElementAt(0);
                //// concat all the sequences into one sequence
                if (referenceSequenceList.Count() > 1)
                {
                    concatReference = ConcatSequence(referenceSequenceList);
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    List <DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList();
                    sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence);

                    // Convert delta alignments to sequence alignments
                    alignments = ConvertDeltaToAlignment(qDelta);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }

                    results.Add(sequenceAlignment);
                }
            }

            return(results);
        }
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="originalQuerySequences">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> originalQuerySequences)
        {
            ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IEnumerable <ISequence> querySequenceList =
                ForwardOnly ? originalQuerySequences
                    : (ReverseOnly
                        ? ReverseComplementSequenceList(originalQuerySequences)
                        : AddReverseComplementsToSequenceList(originalQuerySequences));

            IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>();

            var deltas = new List <DeltaAlignment>();

            foreach (ISequence refSequence in referenceSequenceList)
            {
                this.nucmerAlgo = new NUCmer(refSequence);

                if (GapOpenCost != DefaultGapOpenCost)
                {
                    this.nucmerAlgo.GapOpenCost = GapOpenCost;
                }
                if (GapExtensionCost != DefaultGapExtensionCost)
                {
                    this.nucmerAlgo.GapExtensionCost = GapExtensionCost;
                }
                if (LengthOfMUM != DefaultLengthOfMUM)
                {
                    this.nucmerAlgo.LengthOfMUM = LengthOfMUM;
                }

                // Set the ClusterBuilder properties to defaults
                if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation)
                {
                    this.nucmerAlgo.FixedSeparation = FixedSeparation;
                }
                if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation)
                {
                    this.nucmerAlgo.MaximumSeparation = MaximumSeparation;
                }
                if (MinimumScore != ClusterBuilder.DefaultMinimumScore)
                {
                    this.nucmerAlgo.MinimumScore = MinimumScore;
                }
                if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor)
                {
                    this.nucmerAlgo.SeparationFactor = SeparationFactor;
                }
                if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength)
                {
                    this.nucmerAlgo.BreakLength = BreakLength;
                }

                this.nucmerAlgo.ConsensusResolver = ConsensusResolver;
                if (SimilarityMatrix != null)
                {
                    this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix;
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    //  Check for parameters that would prevent an alignment from being returned.
                    if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore)
                    {
                        var msg = "Bad parameter settings for NucmerPairwiseAligner. " +
                                  "Tried to align a reference of length " + refSequence.Count.ToString() +
                                  " to a sequence of length " + querySequence.Count.ToString() +
                                  " while requiring a minimum score of MinimumScore = " + MinimumScore +
                                  ". This will prevent any alignments from being returned.";
                        throw new ArgumentException(msg);
                    }
                    IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement());
                    deltas.AddRange(deltaAlignment);
                }
            }

            if (deltas.Count > 0)
            {
                ISequence concatReference = referenceSequenceList.ElementAt(0);
                //// concat all the sequences into one sequence
                if (referenceSequenceList.Count() > 1)
                {
                    concatReference = ConcatSequence(referenceSequenceList);
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    List <DeltaAlignment>      qDelta            = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList();
                    IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence);

                    // Convert delta alignments to sequence alignments
                    IList <PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }

                    results.Add(sequenceAlignment);
                }
            }

            return(results);
        }
Beispiel #6
0
        public void ValidateNUCmerGetClusters()
        {
            // NOTE: Nigel ran this test with the same data through mmummer and mgaps and got the same result.

            // Gets the reference sequence from the FastA file
            string filePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName,
                Constants.FilePathNode);

            // Gets the query sequence from the FastA file
            string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.MediumSizeSequenceNodeName,
                Constants.SearchSequenceFilePathNode);

            FastAParser parser = new FastAParser();
            IEnumerable<ISequence> seqs1 = parser.Parse(filePath);
            IEnumerable<ISequence> seqs2 = parser.Parse(queryFilePath);
            var nuc = new Bio.Algorithms.Alignment.NUCmer(seqs1.First()) {
                LengthOfMUM = 5,
                MinimumScore = 0,
            };
            var clusts = nuc.GetClusters(seqs2.First());
            string clustCount1 = this.utilityObj.xmlUtil.GetTextValue(
                Constants.MediumSizeSequenceNodeName, Constants.ClustCount1Node);

            Assert.AreEqual(clustCount1, clusts.Count.ToString(CultureInfo.InvariantCulture));
        }
Beispiel #7
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="originalQuerySequences">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IEnumerable<IPairwiseSequenceAlignment> Alignment(IEnumerable<ISequence> referenceSequenceList, IEnumerable<ISequence> originalQuerySequences)
        {
            ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IEnumerable<ISequence> querySequenceList = 
                ForwardOnly ? originalQuerySequences
                    : (ReverseOnly
                        ? ReverseComplementSequenceList(originalQuerySequences)
                        : AddReverseComplementsToSequenceList(originalQuerySequences));

            IList<IPairwiseSequenceAlignment> results = new List<IPairwiseSequenceAlignment>();

            var deltas = new List<DeltaAlignment>();

            foreach (ISequence refSequence in referenceSequenceList)
            {
                this.nucmerAlgo = new NUCmer(refSequence);

                if (GapOpenCost != DefaultGapOpenCost) this.nucmerAlgo.GapOpenCost = GapOpenCost;
                if (GapExtensionCost != DefaultGapExtensionCost) this.nucmerAlgo.GapExtensionCost = GapExtensionCost;
                if (LengthOfMUM != DefaultLengthOfMUM) this.nucmerAlgo.LengthOfMUM = LengthOfMUM;

                // Set the ClusterBuilder properties to defaults
                if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) this.nucmerAlgo.FixedSeparation = FixedSeparation;
                if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) this.nucmerAlgo.MaximumSeparation = MaximumSeparation;
                if (MinimumScore != ClusterBuilder.DefaultMinimumScore) this.nucmerAlgo.MinimumScore = MinimumScore;
                if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) this.nucmerAlgo.SeparationFactor = SeparationFactor;
                if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) this.nucmerAlgo.BreakLength = BreakLength;

                this.nucmerAlgo.ConsensusResolver = ConsensusResolver;
                if (SimilarityMatrix != null) this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix;

                foreach (ISequence querySequence in querySequenceList)
                {
                    //  Check for parameters that would prevent an alignment from being returned.
                    if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore)
                    {
                        var msg = "Bad parameter settings for NucmerPairwiseAligner. " +
                                   "Tried to align a reference of length " + refSequence.Count.ToString() +
                                   " to a sequence of length " + querySequence.Count.ToString() +
                                   " while requiring a minimum score of MinimumScore = " + MinimumScore +
                                   ". This will prevent any alignments from being returned.";
                        throw new ArgumentException(msg);
                    }
                    IEnumerable<DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement());
                    deltas.AddRange(deltaAlignment);
                }
            }

            if (deltas.Count > 0)
            {
                ISequence concatReference = referenceSequenceList.ElementAt(0);
                //// concat all the sequences into one sequence
                if (referenceSequenceList.Count() > 1)
                {
                    concatReference = ConcatSequence(referenceSequenceList);
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    List<DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList();
                    IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence);

                    // Convert delta alignments to sequence alignments
                    IList<PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                    align.FirstSequence,
                                    align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                    align.FirstSequence,
                                    align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }

                    results.Add(sequenceAlignment);
                }
            }

            return results;
        }
        /// <summary>
        /// Aligns reads to reference genome using NUCmer.
        /// </summary>
        /// <param name="nodeName">Name of parent Node which contains the data in xml.</param>
        /// <param name="isFilePath">Represents sequence is in a file or not.</param>
        /// <returns>Delta i.e. output from NUCmer</returns>      
        IList<IEnumerable<DeltaAlignment>> GetDeltaAlignment(string nodeName, bool isFilePath)
        {
            string[] referenceSequences = null;
            string[] searchSequences = null;

            List<ISequence> referenceSeqList = new List<ISequence>();
            List<ISequence> searchSeqList = new List<ISequence>();
            IList<IEnumerable<DeltaAlignment>> results = new List<IEnumerable<DeltaAlignment>>();

            if (isFilePath)
            {
                // Gets the reference sequence from the FastA file
                string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                    Constants.FilePathNode1);

                Assert.IsNotNull(filePath);
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                    "Comparative BVT : Successfully validated the File Path '{0}'.", filePath));

                using (FastASequencePositionParser parser = new FastASequencePositionParser(filePath))
                {
                    IEnumerable<ISequence> referenceList = parser.Parse();

                    foreach (ISequence seq in referenceList)
                    {
                        referenceSeqList.Add(seq);
                    }

                    // Gets the query sequence from the FastA file
                    string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName,
                        Constants.FilePathNode2);

                    Assert.IsNotNull(queryFilePath);
                    ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                        "Comparative BVT : Successfully validated the File Path '{0}'.", queryFilePath));

                    using (FastASequencePositionParser queryParser = new FastASequencePositionParser(queryFilePath))
                    {
                        IEnumerable<ISequence> querySeqList = queryParser.Parse();

                        foreach (ISequence seq in querySeqList)
                        {
                            searchSeqList.Add(seq);
                        }
                    }
                }
            }
            else
            {
                // Gets the reference & search sequences from the configurtion file
                referenceSequences = utilityObj.xmlUtil.GetTextValues(nodeName,
                    Constants.ReferenceSequencesNode);
                searchSequences = utilityObj.xmlUtil.GetTextValues(nodeName,
                  Constants.SearchSequencesNode);

                IAlphabet seqAlphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName,
                       Constants.AlphabetNameNode));

                for (int i = 0; i < referenceSequences.Length; i++)
                {
                    ISequence referSeq = new Sequence(seqAlphabet, encodingObj.GetBytes(referenceSequences[i]));
                    referenceSeqList.Add(referSeq);
                }

                string[] seqArray = searchSequences.ElementAt(0).Split(',');

                searchSeqList.AddRange(seqArray.Select(t => new Sequence(seqAlphabet, encodingObj.GetBytes(t))).Cast<ISequence>());
            }

            foreach (ISequence reference in referenceSeqList)
            {
                NUCmer nucmerAligner = new NUCmer((Sequence)reference);

                string fixedSeparation = utilityObj.xmlUtil.GetTextValue(nodeName,
                         Constants.FixedSeparationNode);
                string minimumScore = utilityObj.xmlUtil.GetTextValue(nodeName,
                         Constants.MinimumScoreNode);
                string separationFactor = utilityObj.xmlUtil.GetTextValue(nodeName,
                         Constants.SeparationFactorNode);
                string LengthOfMUM = utilityObj.xmlUtil.GetTextValue(nodeName,
                         Constants.MUMLengthNode);

                nucmerAligner.FixedSeparation = int.Parse(fixedSeparation);
                nucmerAligner.MinimumScore = int.Parse(minimumScore);
                nucmerAligner.SeparationFactor = int.Parse(separationFactor);
                nucmerAligner.LengthOfMUM = int.Parse(LengthOfMUM);

                foreach (ISequence querySeq in searchSeqList)
                {
                    results.Add(nucmerAligner.GetDeltaAlignments(querySeq, true));
                }
            }

            return results;
        }