示例#1
0
        /// <summary>
        ///     Validate building Kmer using sequence and kmer length.
        /// </summary>
        /// <param name="nodeName">Name of the xml node for different test cases</param>
        /// <param name="IsKmerBuilder">True if validating kmerbuilder or else false.</param>
        private void ValidateKmer(string nodeName, bool IsKmerBuilder)
        {
            // Get the parameters from Xml
            string Sequence          = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1);
            string expectedKmerCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmrSeqCountNode);
            string expectedKmerSeq   = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerSequenceNode);
            string expectedKmerPos   = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.PositionsNode);

            // Create a Kmer Sequence.
            ISequence       seq = new Sequence(Alphabets.DNA, Sequence);
            KmersOfSequence kmerSeq;

            if (IsKmerBuilder)
            {
                // Build Kmer.
                var             kmerBuilder = new SequenceToKmerBuilder();
                KmersOfSequence kmerList    = kmerBuilder.Build(seq, 2);

                // Validate builder kmer.
                Assert.AreEqual(expectedKmerCount, kmerList.Kmers.Count.ToString((IFormatProvider)null));
                Assert.AreEqual(expectedKmerSeq, new String(kmerList.BaseSequence.Select(a => (char)a).ToArray()));
                Assert.AreEqual(expectedKmerPos, kmerList.Length.ToString((IFormatProvider)null));
            }
            else
            {
                kmerSeq = new KmersOfSequence(seq, 2);

                // Validate Kmer Seq.
                Assert.AreEqual(expectedKmerSeq, new String(kmerSeq.BaseSequence.Select(a => (char)a).ToArray()));
                Assert.AreEqual(expectedKmerPos, kmerSeq.Length.ToString((IFormatProvider)null));
            }
        }
示例#2
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences</param>
        /// <param name="kmerLength">Kmer Length</param>
        public void Build(IList <ISequence> sequences, int kmerLength)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            _baseSequence = new List <ISequence>(sequences);
            var kmersData = SequenceToKmerBuilder.BuildPaDeNAKmerDictionary(sequences, kmerLength);

            _kmerNodes = new HashSet <DeBruijnNode>(
                kmersData.Values.AsParallel().Select(kd =>
            {
                kd.Node =
                    kd.KeyHasSameOrientation ?
                    new DeBruijnNode(kmerLength, kd.Kmer.SequenceIndex, kd.Kmer.KmerPosition, kd.Kmer.Count, kd.Kmer.CountRC) :
                    new DeBruijnNode(kmerLength, kd.Kmer.SequenceIndex, kd.Kmer.KmerPosition, kd.Kmer.CountRC, kd.Kmer.Count);
                kd.Kmer = null;
                return(kd.Node);
            }));

            // Add edge information
            GenerateAdjacency(kmersData, kmerLength);
        }
示例#3
0
        /// <summary>
        /// Add a line to each debruijin node if it corresponds to a
        /// kmer from a single position in a reference genome,
        /// </summary>
        protected void PaintKmersWithReference()
        {
            List <int>    missingLocs      = new List <int> ();
            var           refKmerPositions = SequenceToKmerBuilder.BuildKmerDictionary(ReferenceGenome.ReferenceSequence, this.KmerLength);
            int           KmersPainted     = 0;
            int           KmersSkipped     = 0;
            DeBruijnGraph graph            = this.Graph;
            long          totalNodes       = graph.NodeCount;

            foreach (var v in refKmerPositions)
            {
                ISequence    seq       = v.Key;
                IList <long> locations = v.Value;
                if (locations.Count == 1)
                {
                    var kmerData = new KmerData32();
                    kmerData.SetKmerData(seq, 0, this.KmerLength);
                    DeBruijnNode matchingNode = this.Graph.KmerManager.SetNewOrGetOld(kmerData, false);
                    if (matchingNode != null)
                    {
                        matchingNode.ReferenceGenomePosition = (short)locations [0];
                        KmersPainted++;
                        if (matchingNode.ReferenceGenomePosition < 0)
                        {
                            throw new Exception();
                        }
                    }
                    else
                    {
                        missingLocs.Add((int)locations [0]);
                    }
                }
                else
                {
                    KmersSkipped += locations.Count;
                }
            }
            if (false && OutputDiagnosticInformation)
            {
                StreamWriter sw = new StreamWriter("OutMissing.csv");
                foreach (int i in missingLocs)
                {
                    sw.WriteLine(i.ToString());
                }
                sw.Close();
            }
            double percentKmersSkipped = 100.0 * (KmersSkipped) / ((double)(KmersPainted + KmersSkipped));

            if (percentKmersSkipped > 95.0)
            {
                throw new InvalidProgramException("Reference Genome Skipped over 95% of Kmers");
            }
            double percentHit = KmersPainted / (double)refKmerPositions.Count;

            RaiseMessage("A total of " + (100.0 * percentHit).ToString() + "% nodes in the reference were painted");
            PercentNodesPainted = 100.0 * KmersPainted / (double)totalNodes;
            RaiseMessage(PercentNodesPainted.ToString("n2") + " % of nodes painted, for a total of " + KmersPainted.ToString() + " painted.");
            RaiseMessage(percentKmersSkipped.ToString("n2") + " % of Kmers were skipped for being in multiple locations");
        }
示例#4
0
        /// <summary>
        ///     Validates the Sequences for all the general test cases.
        /// </summary>
        /// <param name="node">Xml Node Name</param>
        /// <param name="additionalParameter">
        ///     Additional Parameter based
        ///     on which the validations are done.
        /// </param>
        private void ValidateComputeFeature(string node, AssemblyParameters additionalParameter)
        {
            // Get the parameters from Xml
            string firstSequence        = utilityObj.xmlUtil.GetTextValue(node, Constants.SequenceNode1);
            string secondSequence       = utilityObj.xmlUtil.GetTextValue(node, Constants.SequenceNode2);
            string kmerLength           = utilityObj.xmlUtil.GetTextValue(node, Constants.KmerLengthNode);
            string expectedFeatureCount = utilityObj.xmlUtil.GetTextValue(node, Constants.FeatureCount);
            string expectedFeature      = utilityObj.xmlUtil.GetTextValue(node, Constants.FeatureName);
            string expectedFeatureType  = utilityObj.xmlUtil.GetTextValue(node, Constants.FeatureType);
            string expectedStartIndex   = utilityObj.xmlUtil.GetTextValue(node, Constants.StartIndexNode);
            string expectedEndIndex     = utilityObj.xmlUtil.GetTextValue(node, Constants.EndIndexNode);

            ISequence seq1 = null;
            ISequence seq2 = null;

            // Create Sequences.
            switch (additionalParameter)
            {
            case AssemblyParameters.Assemble:
                var seqObj1 =
                    new Sequence(Alphabets.Protein, firstSequence);
                var seqObj2 =
                    new Sequence(Alphabets.Protein, secondSequence);
                seq1 = seqObj1;
                seq2 = seqObj2;
                break;

            case AssemblyParameters.Consensus:
                seq1 = new Sequence(Alphabets.DNA, firstSequence);
                seq2 = new Sequence(Alphabets.DNA, secondSequence);
                break;
            }

            var             kmerBuilder = new SequenceToKmerBuilder();
            KmersOfSequence kmerList    =
                kmerBuilder.Build(seq1, int.Parse(kmerLength, null));
            List <WordMatch> nodes =
                WordMatch.BuildMatchTable(
                    kmerList,
                    seq2,
                    int.Parse(kmerLength, null));
            List <WordMatch> matchList =
                WordMatch.GetMinimalList(nodes, int.Parse(kmerLength, null));
            List <DifferenceNode> diffNode =
                DifferenceNode.BuildDiffList(matchList, seq1, seq2);
            List <DifferenceNode.CompareFeature> features =
                DifferenceNode.OutputDiffList(diffNode, seq1, seq2);

            // Validate difference.

            Assert.AreEqual(expectedFeatureCount, features.Count.ToString((IFormatProvider)null));
            Assert.AreEqual(expectedFeature, features[0].Feature);
            Assert.AreEqual(expectedFeatureType, features[0].FeatureType);
            Assert.AreEqual(expectedStartIndex, features[0].Start.ToString((IFormatProvider)null));
            Assert.AreEqual(expectedEndIndex, features[0].End.ToString((IFormatProvider)null));
            ApplicationLog.WriteLine(string.Format(null, "Kmer P1 : Validated DifferenceNodes successfully."));
        }
示例#5
0
        public static void FindDifferencesInSequences(string firstFile, string secondFile)
        {
            // parsowanie pierwszej listy
            if (!SequenceParsers.IsFasta(firstFile))
            {
                Console.WriteLine("Nieprawidlowy format pierwszego pliku!");
                return;
            }

            if (!SequenceParsers.IsFasta(secondFile))
            {
                Console.WriteLine("Nieprawidlowy format drugiego pliku!");
                return;
            }

            var firstParser = SequenceParsers.FindParserByFileName(firstFile);

            firstParser.Alphabet = AmbiguousProteinAlphabet.Instance;
            var firstSequenceList  = firstParser.Parse();
            var firstFileSequences = Helper.ConvertIenumerableToList(firstSequenceList);

            // parsowanie drugiej listy
            var secondParser = SequenceParsers.FindParserByFileName(firstFile);

            secondParser.Alphabet = AmbiguousProteinAlphabet.Instance;
            var secondSequenceList  = secondParser.Parse();
            var secondFileSequences = Helper.ConvertIenumerableToList(secondSequenceList);

            // pobranie listy KMER'ów
            var kmerBuilder = new SequenceToKmerBuilder();
            var kmerList    = kmerBuilder.Build(firstFileSequences.First(), 2);
            var nodes       = WordMatch.BuildMatchTable(kmerList, secondFileSequences.First(), 2);

            var list2 = new List <WordMatch>(nodes);

            var matchList = WordMatch.GetMinimalList(list2, 2);

            var list3 = new List <WordMatch>(matchList);

            // znajdŸ ró¿nice miêdzy wêz³ami
            var diffNode = DifferenceNode.BuildDiffList(list3, firstFileSequences.First(), secondFileSequences.First());

            var list4 = new List <DifferenceNode>(diffNode);

            var features = DifferenceNode.OutputDiffList(list4, firstFileSequences.First(), secondFileSequences.First());

            foreach (var compareFeature in features)
            {
                Console.WriteLine(compareFeature.Feature);
            }
        }
示例#6
0
        public void SequenceCompare()
        {
            ISequence seq1 = new Sequence(Alphabets.DNA, "AAAAAA");
            ISequence seq2 = new Sequence(Alphabets.DNA, "AAATAA");

            SequenceToKmerBuilder kmerBuilder             = new SequenceToKmerBuilder();
            KmersOfSequence       kmers                   = kmerBuilder.Build(seq1, 2);
            List <WordMatch>      nodes                   = WordMatch.BuildMatchTable(kmers, seq1, seq2, 2);
            List <WordMatch>      matchList               = WordMatch.GetMinimalList(nodes, 2);
            List <DifferenceNode> diffNode                = DifferenceNode.BuildDiffList(matchList, seq1, seq2);
            List <DifferenceNode.CompareFeature> features = DifferenceNode.OutputDiffList(diffNode, seq1, seq2);

            Assert.AreEqual(features.Count, 4);
            Assert.AreEqual(features[0].Feature, "Insertion of 1 bases in 2 ");
            Assert.AreEqual(features[1].FeatureType, "REPLACE");
            Assert.AreEqual(features[2].Feature, "Insertion of 1 bases in 1 ");
            Assert.AreEqual(features[3].FeatureType, "REPLACE");
        }
示例#7
0
        public void ValidateSequenceCompare()
        {
            string firstSequence = utilityObj.xmlUtil.GetTextValue(Constants.SequenceCompareNode,
                                                                   Constants.SequenceNode1);
            string secondSequence = utilityObj.xmlUtil.GetTextValue(Constants.SequenceCompareNode,
                                                                    Constants.SequenceNode2);
            string replace = utilityObj.xmlUtil.GetTextValue(Constants.SequenceCompareNode,
                                                             Constants.ReplaceNode);
            ISequence             seq1                    = new Sequence(Alphabets.DNA, firstSequence);
            ISequence             seq2                    = new Sequence(Alphabets.DNA, secondSequence);
            var                   kmerBuilder             = new SequenceToKmerBuilder();
            KmersOfSequence       kmers                   = kmerBuilder.Build(seq1, 2);
            List <WordMatch>      nodes                   = WordMatch.BuildMatchTable(kmers, seq2, 2);
            List <WordMatch>      matchList               = WordMatch.GetMinimalList(nodes, 2);
            List <DifferenceNode> diffNode                = DifferenceNode.BuildDiffList(matchList, seq1, seq2);
            List <DifferenceNode.CompareFeature> features = DifferenceNode.OutputDiffList(diffNode, seq1, seq2);

            //Validating the bahavior.
            Assert.AreEqual(features.Count, 4);
            Assert.AreEqual(features[0].Feature, Constants.InsertionOfOneBaseIn2);
            Assert.AreEqual(features[1].FeatureType, replace);
            Assert.AreEqual(features[2].Feature, Constants.InsertionOfOneBaseIn1);
            Assert.AreEqual(features[3].FeatureType, replace);
        }
示例#8
0
        /// <summary>
        /// Validate building Kmer using sequence and kmer length.
        /// </summary>
        /// <param name="nodeName">Name of the xml node for different test cases</param>
        /// <param name="IsKmerBuilder">True if validating kmerbuilder or else false.</param>
        static void ValidateKmer(string nodeName, bool IsKmerBuilder)
        {
            // Get the parameters from Xml
            string Sequence = Utility._xmlUtil.GetTextValue(nodeName,
                                                            Constants.SequenceNode1);
            string expectedKmerCount = Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.KmrSeqCountNode);
            string expectedKmerSeq = Utility._xmlUtil.GetTextValue(nodeName,
                                                                   Constants.KmerSequenceNode);
            string expectedKmerPos = Utility._xmlUtil.GetTextValue(nodeName,
                                                                   Constants.PositionsNode);

            // Create a Kmer Sequence.
            ISequence       seq = new Sequence(Alphabets.DNA, Sequence);
            KmersOfSequence kmerSeq;

            if (IsKmerBuilder)
            {
                // Build Kmer.
                SequenceToKmerBuilder kmerBuilder = new SequenceToKmerBuilder();
                KmersOfSequence       kmerList    = kmerBuilder.Build(seq, 2);

                // Validate builder kmer.
                Assert.AreEqual(expectedKmerCount, kmerList.Kmers.Count.ToString());
                Assert.AreEqual(expectedKmerSeq, kmerList.BaseSequence.ToString());
                Assert.AreEqual(expectedKmerPos, kmerList.Length.ToString());
            }
            else
            {
                kmerSeq = new KmersOfSequence(seq, 2);

                // Validate Kmer Seq.
                Assert.AreEqual(expectedKmerSeq, kmerSeq.BaseSequence.ToString());
                Assert.AreEqual(expectedKmerPos, kmerSeq.Length.ToString());
            }
        }
示例#9
0
        /// <summary>
        /// Aligns reads to contigs using kmer method of alignment.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <param name="reads">List of read sequences.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>List of Contig.</returns>
        public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength);
            IList <ContigIndex>   contigDatas;

            contigDatas = contigs.AsParallel().Select(contig =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength);
                ContigIndex index             = new ContigIndex(contig);
                IList <KmerIndexer> positions;
                foreach (ISequence kmer in kmers)
                {
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        index.ContigReadMatchIndexes.Add(positions);
                    }
                    else
                    {
                        index.ContigReadMatchIndexes.Add(new List <KmerIndexer>());
                    }
                }

                return(index);
            }).ToList();

            return(contigDatas.Select(contigData =>
            {
                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedReads = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedReads.Contains(contigIndex))
                        {
                            visitedReads.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                Contig contigOutputStructure = new Contig();
                contigOutputStructure.Consensus = contigData.ContigSequence;

                for (int index = 0; index < visitedReads.Count; index++)
                {
                    foreach (ReadMap maps in tasks[index].Result)
                    {
                        Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence()
                        {
                            Length = maps.Length,
                            Position = maps.StartPositionOfContig,
                            ReadPosition = maps.StartPositionOfRead,
                            Sequence = reads.ElementAt(visitedReads[index])
                        };

                        if (new string(
                                contigOutputStructure.Consensus.GetSubSequence(
                                    assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()).
                            Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length)
                                              .Select(a => (char)a).ToArray())))
                        {
                            assembledSeq.IsComplemented = false;
                            assembledSeq.IsReversed = false;
                        }
                        else
                        {
                            assembledSeq.IsComplemented = true;
                            assembledSeq.IsReversed = true;
                        }

                        contigOutputStructure.Sequences.Add(assembledSeq);
                    }
                }

                return contigOutputStructure;
            }).ToList());
        }
        /// <summary>
        /// Public method mapping Reads to Contigs.
        /// </summary>
        /// <param name="contigs">List of sequences of contigs.</param>
        /// <param name="reads">List of input reads.</param>
        /// <param name="kmerLength">Length of kmer.</param>
        /// <returns>Contig Read Map.</returns>
        public ReadContigMap Map(IList <ISequence> contigs, IEnumerable <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map  = SequenceToKmerBuilder.BuildKmerDictionary(contigs, kmerLength);
            ReadContigMap         maps = new ReadContigMap();

            Parallel.ForEach(reads, readSequence =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(readSequence, kmerLength);
                ReadIndex read = new ReadIndex(readSequence);
                foreach (ISequence kmer in kmers)
                {
                    IList <KmerIndexer> positions;
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        read.ContigReadMatchIndexes.Add(positions);
                    }
                }

                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedContigs = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < read.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in read.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedContigs.Contains(contigIndex))
                        {
                            visitedContigs.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(
                                    t => MapRead(
                                        readPosition,
                                        read.ContigReadMatchIndexes,
                                        contigIndex,
                                        read.ReadSequence.Count,
                                        kmerLength),
                                    TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                var overlapMaps = new Dictionary <ISequence, IList <ReadMap> >();
                for (int index = 0; index < visitedContigs.Count; index++)
                {
                    overlapMaps.Add(contigs.ElementAt(visitedContigs[index]), tasks[index].Result);
                }

                lock (maps)
                {
                    if (!maps.ContainsKey(read.ReadSequence.ID))
                    {
                        maps.Add(read.ReadSequence.ID, overlapMaps);
                    }
                    else
                    {
                        throw new ArgumentException(
                            string.Format(CultureInfo.CurrentCulture, Resource.DuplicatingReadIds, read.ReadSequence.ID));
                    }
                }
            });

            return(maps);
        }
示例#11
0
        public void PathPurger1()
        {
            const int         KmerLength      = 7;
            ISequence         sequence        = new Sequence(Alphabets.DNA, "GATTCAAGGGCTGGGGG");
            IList <ISequence> contigsSequence = SequenceToKmerBuilder.GetKmerSequences(sequence, KmerLength).ToList();
            ContigGraph       graph           = new ContigGraph();

            graph.BuildContigGraph(contigsSequence, KmerLength);
            List <Node>          contigs = graph.Nodes.ToList();
            IList <ScaffoldPath> paths   =
                new List <ScaffoldPath>();
            ScaffoldPath path = new ScaffoldPath();

            foreach (Node node in contigs)
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(2, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(3, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(6, 5))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(0, 11))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(7, 4))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(11, 0))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(2, 9))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            path = new ScaffoldPath();
            foreach (Node node in contigs.GetRange(1, 10))
            {
                path.Add(new KeyValuePair <Node, Edge>(node, null));
            }

            paths.Add(path);
            PathPurger assembler = new PathPurger();

            assembler.PurgePath(paths);
            Assert.AreEqual(paths.Count, 1);
            Assert.IsTrue(Compare(paths.First(), contigs));
        }