Пример #1
0
        public void MapContigToReverseComplementOfRead()
        {
            IList <ISequence> contigs = new List <ISequence>();
            IList <ISequence> reads   = new List <ISequence>();
            Sequence          seq     = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray());

            seq.ID = "1";
            contigs.Add(seq);
            Sequence read = new Sequence(Alphabets.DNA, "CCTTATCAG".Select(a => (byte)a).ToArray());

            read.ID = "2";
            reads.Add(read);
            const int      kmerLength = 6;
            IList <Contig> alignment  = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength);

            Assert.AreEqual(alignment.Count, contigs.Count);
            Contig contig = alignment.First();

            Contig.AssembledSequence sequence = contig.Sequences.First();
            Assert.AreEqual(sequence.Length, 9);
            Assert.AreEqual(sequence.Position, 1);
            Assert.AreEqual(sequence.ReadPosition, 0);
            Assert.AreEqual(sequence.Sequence, reads.First());
            Assert.AreEqual(sequence.IsComplemented, true);
            Assert.AreEqual(sequence.IsReversed, true);
        }
Пример #2
0
        public void MapReadToContig()
        {
            IList <ISequence> contigs = new List <ISequence>();
            IList <ISequence> reads   = new List <ISequence>();
            Sequence          seq     = new Sequence(Alphabets.DNA, "TCTGATAAGG");

            seq.DisplayID = "1";
            contigs.Add(seq);
            Sequence read = new Sequence(Alphabets.DNA, "CTGATAAGG");

            read.DisplayID = "2";
            reads.Add(read);
            const int      kmerLength = 6;
            IList <Contig> alignment  = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength);

            Assert.AreEqual(alignment.Count, contigs.Count);
            Contig contig = alignment.First();

            Contig.AssembledSequence sequence = contig.Sequences.First();
            Assert.AreEqual(sequence.Length, 9);
            Assert.AreEqual(sequence.Position, 1);
            Assert.AreEqual(sequence.ReadPosition, 0);
            Assert.AreEqual(sequence.Sequence, reads.First());
            Assert.AreEqual(sequence.IsComplemented, false);
            Assert.AreEqual(sequence.IsReversed, false);
        }
Пример #3
0
        /// <summary>
        /// Parses a list of assembled sparse sequences from the reader.
        /// </summary>
        /// <param name="contigReader">The reader to read the assembled sparse sequences from
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>Returns contig assemble sequence.</returns>
        protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader)
        {
            // Check input arguments
            if (contigReader == null)
            {
                throw new ArgumentNullException("contigReader");
            }

            List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>();

            while (contigReader.HasLines)
            {
                Contig.AssembledSequence aseq = new Contig.AssembledSequence();
                aseq.Sequence = ParseOne(contigReader);
                sequenceList.Add(aseq);
            }
            return(sequenceList);
        }
Пример #4
0
        /// <summary>
        /// Parses a list of assembled sparse sequences from the reader.
        /// </summary>
        /// <param name="contigReader">The reader to read the assembled sparse sequences from</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns></returns>
        protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader, bool isReadOnly)
        {
            if (contigReader == null)
            {
                throw new ArgumentNullException("contigReader");
            }

            List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>();

            while (contigReader.HasLines)
            {
                Contig.AssembledSequence aseq = new Contig.AssembledSequence();
                int offset;
                var sequenceWithOffset = ParseOneWithOffset(contigReader, isReadOnly);
                aseq.Sequence = sequenceWithOffset.Item1;
                offset        = sequenceWithOffset.Item2;
                aseq.Position = offset;
                sequenceList.Add(aseq);
            }
            return(sequenceList);
        }
Пример #5
0
        /// <summary>
        /// Aligns reads to contigs using kmer method of alignment.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <param name="reads">List of read sequences.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>List of Contig.</returns>
        public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength);
            IList <ContigIndex>   contigDatas;

            contigDatas = contigs.AsParallel().Select(contig =>
            {
                IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength);
                ContigIndex index             = new ContigIndex(contig);
                IList <KmerIndexer> positions;
                foreach (ISequence kmer in kmers)
                {
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        index.ContigReadMatchIndexes.Add(positions);
                    }
                    else
                    {
                        index.ContigReadMatchIndexes.Add(new List <KmerIndexer>());
                    }
                }

                return(index);
            }).ToList();

            return(contigDatas.Select(contigData =>
            {
                IList <Task <IList <ReadMap> > > tasks =
                    new List <Task <IList <ReadMap> > >();

                // Stores information about contigs for which tasks has been generated.
                IList <long> visitedReads = new List <long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedReads.Contains(contigIndex))
                        {
                            visitedReads.Add(contigIndex);
                            tasks.Add(
                                Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                Contig contigOutputStructure = new Contig();
                contigOutputStructure.Consensus = contigData.ContigSequence;

                for (int index = 0; index < visitedReads.Count; index++)
                {
                    foreach (ReadMap maps in tasks[index].Result)
                    {
                        Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence()
                        {
                            Length = maps.Length,
                            Position = maps.StartPositionOfContig,
                            ReadPosition = maps.StartPositionOfRead,
                            Sequence = reads.ElementAt(visitedReads[index])
                        };

                        if (new string(
                                contigOutputStructure.Consensus.GetSubSequence(
                                    assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()).
                            Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length)
                                              .Select(a => (char)a).ToArray())))
                        {
                            assembledSeq.IsComplemented = false;
                            assembledSeq.IsReversed = false;
                        }
                        else
                        {
                            assembledSeq.IsComplemented = true;
                            assembledSeq.IsReversed = true;
                        }

                        contigOutputStructure.Sequences.Add(assembledSeq);
                    }
                }

                return contigOutputStructure;
            }).ToList());
        }
Пример #6
0
        public void TestContigWithBinaryFormatter()
        {
            Stream stream = null;

            try
            {
                stream = File.Open("Contig.data", FileMode.Create);
                BinaryFormatter          formatter     = new BinaryFormatter();
                Sequence                 seq1          = new Sequence(Alphabets.DNA, "ACGACTTACG");
                Contig.AssembledSequence assembledSeq1 = new Contig.AssembledSequence();
                assembledSeq1.Sequence       = seq1;
                assembledSeq1.Position       = 0;
                assembledSeq1.IsReversed     = false;
                assembledSeq1.IsComplemented = false;

                Sequence seq2 = new Sequence(Alphabets.DNA, "TACGATCCGGAAA");
                Contig.AssembledSequence assembledSeq2 = new Contig.AssembledSequence();
                assembledSeq2.Sequence       = seq2;
                assembledSeq2.Position       = 6;
                assembledSeq2.IsReversed     = false;
                assembledSeq2.IsComplemented = false;

                Sequence consensus = new Sequence(Alphabets.DNA, "ACGACTTACGATCCGGAAA");
                Contig   contig    = new Contig();
                contig.Sequences.Add(assembledSeq1);
                contig.Sequences.Add(assembledSeq2);
                contig.Consensus = consensus;

                formatter.Serialize(stream, contig);
                stream.Seek(0, SeekOrigin.Begin);

                Contig deserializedContig = (Contig)formatter.Deserialize(stream);
                Assert.AreNotSame(contig, deserializedContig);
                Assert.AreEqual(contig.Consensus.ToString(), deserializedContig.Consensus.ToString());
                Assert.AreEqual(contig.Length, deserializedContig.Length);
                Assert.AreEqual(contig.Sequences.Count, deserializedContig.Sequences.Count);

                for (int i = 0; i < contig.Sequences.Count; i++)
                {
                    Assert.AreEqual(
                        contig.Sequences[i].Sequence.ToString(),
                        deserializedContig.Sequences[i].Sequence.ToString());

                    Assert.AreEqual(
                        contig.Sequences[i].IsComplemented,
                        deserializedContig.Sequences[i].IsComplemented);

                    Assert.AreEqual(contig.Sequences[i].IsReversed, deserializedContig.Sequences[i].IsReversed);
                    Assert.AreEqual(contig.Sequences[i].Position, deserializedContig.Sequences[i].Position);
                }
            }
            catch (Exception)
            {
                Assert.Fail();
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                    stream = null;
                }
            }
        }
Пример #7
0
        /// <summary>
        /// Aligns reads to contigs using kmer method of alignment.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <param name="reads">List of read sequences.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>List of Contig.</returns>
        public static IList<Contig> ReadContigAlignment(IList<ISequence> contigs, IList<ISequence> reads, int kmerLength)
        {
            KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength);
            IList<ContigIndex> contigDatas;
            contigDatas = contigs.AsParallel().Select(contig =>
            {
                IEnumerable<ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength);
                ContigIndex index = new ContigIndex(contig);
                foreach (ISequence kmer in kmers)
                {
                    IList<KmerIndexer> positions;
                    if (map.TryGetValue(kmer, out positions) ||
                        map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions))
                    {
                        index.ContigReadMatchIndexes.Add(positions);
                    }
                    else
                    {
                        index.ContigReadMatchIndexes.Add(new List<KmerIndexer>());
                    }
                }

                return index;
            }).ToList();
            
            return contigDatas.Select(contigData =>
            {
                IList<Task<IList<ReadMap>>> tasks =
                    new List<Task<IList<ReadMap>>>();

                // Stores information about contigs for which tasks has been generated.
                IList<long> visitedReads = new List<long>();

                // Creates Task for every read in nodes for a given contig.
                for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++)
                {
                    int readPosition = index;
                    foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index])
                    {
                        long contigIndex = kmer.SequenceIndex;
                        if (!visitedReads.Contains(contigIndex))
                        {
                            visitedReads.Add(contigIndex);
                            tasks.Add(
                                Task<IList<ReadMap>>.Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent));
                        }
                    }
                }

                Contig contigOutputStructure = new Contig();
                contigOutputStructure.Consensus = contigData.ContigSequence;

                for (int index = 0; index < visitedReads.Count; index++)
                {
                    foreach (ReadMap maps in tasks[index].Result)
                    {
                        Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence()
                        {
                            Length = maps.Length,
                            Position = maps.StartPositionOfContig,
                            ReadPosition = maps.StartPositionOfRead,
                            Sequence = reads.ElementAt(visitedReads[index])
                        };

                        if (new string(
                            contigOutputStructure.Consensus.GetSubSequence(
                            assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()).
                            Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length)
                            .Select(a => (char)a).ToArray())))
                        {
                            assembledSeq.IsComplemented = false;
                            assembledSeq.IsReversed = false;
                        }
                        else
                        {
                            assembledSeq.IsComplemented = true;
                            assembledSeq.IsReversed = true;
                        }

                        contigOutputStructure.Sequences.Add(assembledSeq);
                    }
                }

                return contigOutputStructure;
            }).ToList();
        }