public void MapContigToReverseComplementOfRead() { IList <ISequence> contigs = new List <ISequence>(); IList <ISequence> reads = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "TCTGATAAGG".Select(a => (byte)a).ToArray()); seq.ID = "1"; contigs.Add(seq); Sequence read = new Sequence(Alphabets.DNA, "CCTTATCAG".Select(a => (byte)a).ToArray()); read.ID = "2"; reads.Add(read); const int kmerLength = 6; IList <Contig> alignment = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength); Assert.AreEqual(alignment.Count, contigs.Count); Contig contig = alignment.First(); Contig.AssembledSequence sequence = contig.Sequences.First(); Assert.AreEqual(sequence.Length, 9); Assert.AreEqual(sequence.Position, 1); Assert.AreEqual(sequence.ReadPosition, 0); Assert.AreEqual(sequence.Sequence, reads.First()); Assert.AreEqual(sequence.IsComplemented, true); Assert.AreEqual(sequence.IsReversed, true); }
public void MapReadToContig() { IList <ISequence> contigs = new List <ISequence>(); IList <ISequence> reads = new List <ISequence>(); Sequence seq = new Sequence(Alphabets.DNA, "TCTGATAAGG"); seq.DisplayID = "1"; contigs.Add(seq); Sequence read = new Sequence(Alphabets.DNA, "CTGATAAGG"); read.DisplayID = "2"; reads.Add(read); const int kmerLength = 6; IList <Contig> alignment = ReadAlignment.ReadContigAlignment(contigs, reads, kmerLength); Assert.AreEqual(alignment.Count, contigs.Count); Contig contig = alignment.First(); Contig.AssembledSequence sequence = contig.Sequences.First(); Assert.AreEqual(sequence.Length, 9); Assert.AreEqual(sequence.Position, 1); Assert.AreEqual(sequence.ReadPosition, 0); Assert.AreEqual(sequence.Sequence, reads.First()); Assert.AreEqual(sequence.IsComplemented, false); Assert.AreEqual(sequence.IsReversed, false); }
/// <summary> /// Parses a list of assembled sparse sequences from the reader. /// </summary> /// <param name="contigReader">The reader to read the assembled sparse sequences from /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>Returns contig assemble sequence.</returns> protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader) { // Check input arguments if (contigReader == null) { throw new ArgumentNullException("contigReader"); } List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>(); while (contigReader.HasLines) { Contig.AssembledSequence aseq = new Contig.AssembledSequence(); aseq.Sequence = ParseOne(contigReader); sequenceList.Add(aseq); } return(sequenceList); }
/// <summary> /// Parses a list of assembled sparse sequences from the reader. /// </summary> /// <param name="contigReader">The reader to read the assembled sparse sequences from</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns></returns> protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader, bool isReadOnly) { if (contigReader == null) { throw new ArgumentNullException("contigReader"); } List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>(); while (contigReader.HasLines) { Contig.AssembledSequence aseq = new Contig.AssembledSequence(); int offset; var sequenceWithOffset = ParseOneWithOffset(contigReader, isReadOnly); aseq.Sequence = sequenceWithOffset.Item1; offset = sequenceWithOffset.Item2; aseq.Position = offset; sequenceList.Add(aseq); } return(sequenceList); }
/// <summary> /// Aligns reads to contigs using kmer method of alignment. /// </summary> /// <param name="contigs">List of contig sequences.</param> /// <param name="reads">List of read sequences.</param> /// <param name="kmerLength">Kmer Length.</param> /// <returns>List of Contig.</returns> public static IList <Contig> ReadContigAlignment(IList <ISequence> contigs, IList <ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength); IList <ContigIndex> contigDatas; contigDatas = contigs.AsParallel().Select(contig => { IEnumerable <ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength); ContigIndex index = new ContigIndex(contig); IList <KmerIndexer> positions; foreach (ISequence kmer in kmers) { if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { index.ContigReadMatchIndexes.Add(positions); } else { index.ContigReadMatchIndexes.Add(new List <KmerIndexer>()); } } return(index); }).ToList(); return(contigDatas.Select(contigData => { IList <Task <IList <ReadMap> > > tasks = new List <Task <IList <ReadMap> > >(); // Stores information about contigs for which tasks has been generated. IList <long> visitedReads = new List <long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedReads.Contains(contigIndex)) { visitedReads.Add(contigIndex); tasks.Add( Task <IList <ReadMap> > .Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent)); } } } Contig contigOutputStructure = new Contig(); contigOutputStructure.Consensus = contigData.ContigSequence; for (int index = 0; index < visitedReads.Count; index++) { foreach (ReadMap maps in tasks[index].Result) { Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence() { Length = maps.Length, Position = maps.StartPositionOfContig, ReadPosition = maps.StartPositionOfRead, Sequence = reads.ElementAt(visitedReads[index]) }; if (new string( contigOutputStructure.Consensus.GetSubSequence( assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()). Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length) .Select(a => (char)a).ToArray()))) { assembledSeq.IsComplemented = false; assembledSeq.IsReversed = false; } else { assembledSeq.IsComplemented = true; assembledSeq.IsReversed = true; } contigOutputStructure.Sequences.Add(assembledSeq); } } return contigOutputStructure; }).ToList()); }
public void TestContigWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("Contig.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); Sequence seq1 = new Sequence(Alphabets.DNA, "ACGACTTACG"); Contig.AssembledSequence assembledSeq1 = new Contig.AssembledSequence(); assembledSeq1.Sequence = seq1; assembledSeq1.Position = 0; assembledSeq1.IsReversed = false; assembledSeq1.IsComplemented = false; Sequence seq2 = new Sequence(Alphabets.DNA, "TACGATCCGGAAA"); Contig.AssembledSequence assembledSeq2 = new Contig.AssembledSequence(); assembledSeq2.Sequence = seq2; assembledSeq2.Position = 6; assembledSeq2.IsReversed = false; assembledSeq2.IsComplemented = false; Sequence consensus = new Sequence(Alphabets.DNA, "ACGACTTACGATCCGGAAA"); Contig contig = new Contig(); contig.Sequences.Add(assembledSeq1); contig.Sequences.Add(assembledSeq2); contig.Consensus = consensus; formatter.Serialize(stream, contig); stream.Seek(0, SeekOrigin.Begin); Contig deserializedContig = (Contig)formatter.Deserialize(stream); Assert.AreNotSame(contig, deserializedContig); Assert.AreEqual(contig.Consensus.ToString(), deserializedContig.Consensus.ToString()); Assert.AreEqual(contig.Length, deserializedContig.Length); Assert.AreEqual(contig.Sequences.Count, deserializedContig.Sequences.Count); for (int i = 0; i < contig.Sequences.Count; i++) { Assert.AreEqual( contig.Sequences[i].Sequence.ToString(), deserializedContig.Sequences[i].Sequence.ToString()); Assert.AreEqual( contig.Sequences[i].IsComplemented, deserializedContig.Sequences[i].IsComplemented); Assert.AreEqual(contig.Sequences[i].IsReversed, deserializedContig.Sequences[i].IsReversed); Assert.AreEqual(contig.Sequences[i].Position, deserializedContig.Sequences[i].Position); } } catch (Exception) { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
/// <summary> /// Aligns reads to contigs using kmer method of alignment. /// </summary> /// <param name="contigs">List of contig sequences.</param> /// <param name="reads">List of read sequences.</param> /// <param name="kmerLength">Kmer Length.</param> /// <returns>List of Contig.</returns> public static IList<Contig> ReadContigAlignment(IList<ISequence> contigs, IList<ISequence> reads, int kmerLength) { KmerIndexerDictionary map = SequenceToKmerBuilder.BuildKmerDictionary(reads, kmerLength); IList<ContigIndex> contigDatas; contigDatas = contigs.AsParallel().Select(contig => { IEnumerable<ISequence> kmers = SequenceToKmerBuilder.GetKmerSequences(contig, kmerLength); ContigIndex index = new ContigIndex(contig); foreach (ISequence kmer in kmers) { IList<KmerIndexer> positions; if (map.TryGetValue(kmer, out positions) || map.TryGetValue(kmer.GetReverseComplementedSequence(), out positions)) { index.ContigReadMatchIndexes.Add(positions); } else { index.ContigReadMatchIndexes.Add(new List<KmerIndexer>()); } } return index; }).ToList(); return contigDatas.Select(contigData => { IList<Task<IList<ReadMap>>> tasks = new List<Task<IList<ReadMap>>>(); // Stores information about contigs for which tasks has been generated. IList<long> visitedReads = new List<long>(); // Creates Task for every read in nodes for a given contig. for (int index = 0; index < contigData.ContigReadMatchIndexes.Count; index++) { int readPosition = index; foreach (KmerIndexer kmer in contigData.ContigReadMatchIndexes[index]) { long contigIndex = kmer.SequenceIndex; if (!visitedReads.Contains(contigIndex)) { visitedReads.Add(contigIndex); tasks.Add( Task<IList<ReadMap>>.Factory.StartNew(t => MapRead(readPosition, contigData.ContigReadMatchIndexes, contigIndex, kmerLength), TaskCreationOptions.AttachedToParent)); } } } Contig contigOutputStructure = new Contig(); contigOutputStructure.Consensus = contigData.ContigSequence; for (int index = 0; index < visitedReads.Count; index++) { foreach (ReadMap maps in tasks[index].Result) { Contig.AssembledSequence assembledSeq = new Contig.AssembledSequence() { Length = maps.Length, Position = maps.StartPositionOfContig, ReadPosition = maps.StartPositionOfRead, Sequence = reads.ElementAt(visitedReads[index]) }; if (new string( contigOutputStructure.Consensus.GetSubSequence( assembledSeq.Position, assembledSeq.Length).Select(a => (char)a).ToArray()). Equals(new string(assembledSeq.Sequence.GetSubSequence(assembledSeq.ReadPosition, assembledSeq.Length) .Select(a => (char)a).ToArray()))) { assembledSeq.IsComplemented = false; assembledSeq.IsReversed = false; } else { assembledSeq.IsComplemented = true; assembledSeq.IsReversed = true; } contigOutputStructure.Sequences.Add(assembledSeq); } } return contigOutputStructure; }).ToList(); }